In [1]:
# -*- coding:utf-8 -*-

### 5.1.1  讀寫文本文件

In [2]:
# 寫入文件
f = open("tmp.txt", "w") # 打開文件
f.writelines(["line1\n","line2\n"]) # 寫入多行
f.write("line3\nline4")
f.close() # 關閉文件

# 讀取文件全部內容
f = open("tmp.txt", "r")
print(f.read())
#print(f.readlines()) # 讀出多行
f.close()

# 按行讀取文件
f = open("tmp.txt", "r")
while True:
    line = f.readline() # 讀單行
    if line:
        print("line:",line)
    else:
        break
f.close()

line1
line2
line3
line4
line: line1

line: line2

line: line3

line: line4


### 5.1.2  寫日誌文件

In [3]:
import logging

# 獲取logger對象,取名mylog
logger = logging.getLogger("mylog")
# 輸出DEBUG及以上級別的信息，針對所有輸出的第一層過濾
logger.setLevel(level=logging.DEBUG)

# 獲取文件日誌句柄並設置日誌級別，第二層過濾
handler = logging.FileHandler("log.txt")
handler.setLevel(logging.INFO)	

# 生成並設置文件日誌格式，其中name爲上面設置的mylog
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# 獲取流句柄並設置日誌級別，第二層過濾
console = logging.StreamHandler()
console.setLevel(logging.WARNING)

# 爲logger對象添加句柄
logger.addHandler(handler)
logger.addHandler(console)

# 記錄日誌
logger.info("show info")
logger.debug("show debug")
logger.warning("show warning")



### 5.1.3  讀寫XML文件

In [4]:
from xml.dom import minidom

dom=minidom.Document()
root_node=dom.createElement('root') # 創建根節點
dom.appendChild(root_node) # 添加根節點

book_node=dom.createElement('blog') # 創建第一個子節點
book_node.setAttribute('level','3') # 添加屬性
root_node.appendChild(book_node) # 爲root添加子節點

name_node=dom.createElement('addr') # 創建第二個子節點
name_text=dom.createTextNode('https://blog.csdn.net/xieyan0811') # 添加文字
name_node.appendChild(name_text)
root_node.appendChild(name_node)

# toxml() 轉換成字符串, toprettyxml()轉換成樹形縮進版式
print(dom.toprettyxml())
with open('test_dom.xml','w') as fh:
    dom.writexml(fh, indent='',addindent='\t', newl='\n', encoding='UTF-8')

<?xml version="1.0" ?>
<root>
	<blog level="3"/>
	<addr>https://blog.csdn.net/xieyan0811</addr>
</root>



In [5]:
from xml.dom import minidom
with open('test_dom.xml','r') as fh:
    dom = minidom.parse(fh) # 獲取dom對象
    root = dom.documentElement # 獲取根節點
    print("node name", root.nodeName) # 顯示節點名: root
    print("node type", root.nodeType) # 顯示節點類型
    print("child nodes", root.childNodes) # 列出所有子節點
    blog = root.getElementsByTagName('blog')[0] # 根據標籤名獲取元素列表
    print(blog.getAttribute('level')) # 獲取屬性值
    addr = root.getElementsByTagName('addr')[0]
    print("addr's child nodes", addr.childNodes)
    text_node = addr.childNodes[0] # 獲取文本節點內容
    print("text data", text_node.data)
    print("parent", addr.parentNode.nodeName) # 顯示name的父節點名稱

node name root
node type 1
child nodes [<DOM Text node "'\n\t'">, <DOM Element: blog at 0x7f00b0586b90>, <DOM Text node "'\n\t'">, <DOM Element: addr at 0x7f00b0586d58>, <DOM Text node "'\n'">]
3
addr's child nodes [<DOM Text node "'https://bl'...">]
text data https://blog.csdn.net/xieyan0811
parent root


### 5.1.4  讀寫Json文件

In [6]:
import json

data = [{"group":0,"param":["one","two","three"]},
        {"group":1,"param":["1","2","3"]}] 

jsonstr = json.dumps(data)
print(jsonstr)
jsonstr = json.dumps(data, sort_keys=True, 
                 indent=4, separators=(',', ': '))
print(jsonstr)
data1 = json.loads(jsonstr)
print(data1, type(data1))

with open('json.txt','w') as json_file:
    json.dump(data, json_file)
    json_file.close()

with open('json.txt','r') as json_file:
    data = json.load(json_file)
    json_file.close()
print(data1, type(data1))

[{"group": 0, "param": ["one", "two", "three"]}, {"group": 1, "param": ["1", "2", "3"]}]
[
    {
        "group": 0,
        "param": [
            "one",
            "two",
            "three"
        ]
    },
    {
        "group": 1,
        "param": [
            "1",
            "2",
            "3"
        ]
    }
]
[{'group': 0, 'param': ['one', 'two', 'three']}, {'group': 1, 'param': ['1', '2', '3']}] <class 'list'>
[{'group': 0, 'param': ['one', 'two', 'three']}, {'group': 1, 'param': ['1', '2', '3']}] <class 'list'>


### 5.1.5  讀寫CSV文件

In [7]:
import pandas as pd

df = pd.DataFrame({'Name': ['Smith', 'Lucy'], 'Age': ['25', '20'], 'Sex': ['男','女']})
print(df.info()) # 顯示dataframe相關信息
df.to_csv("tmp.csv", index=False, header=True, columns=['Name','Sex','Age'])

df1 = pd.read_csv("tmp.csv")
print(df1.info())
print(df1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
Name    2 non-null object
Age     2 non-null object
Sex     2 non-null object
dtypes: object(3)
memory usage: 128.0+ bytes
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
Name    2 non-null object
Sex     2 non-null object
Age     2 non-null int64
dtypes: int64(1), object(2)
memory usage: 128.0+ bytes
None
    Name Sex  Age
0  Smith   男   25
1   Lucy   女   20


### 5.1.6  讀寫PKL文件

In [8]:
import pandas as pd

df = pd.DataFrame({'Name': ['Smith', 'Lucy'], 'Age': ['25', '20'], 'Sex': ['男','女']})
print(df.info())
df.to_pickle("tmp.pkl")

df1 = pd.read_pickle("tmp.pkl")
print(df1.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
Name    2 non-null object
Age     2 non-null object
Sex     2 non-null object
dtypes: object(3)
memory usage: 128.0+ bytes
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
Name    2 non-null object
Age     2 non-null object
Sex     2 non-null object
dtypes: object(3)
memory usage: 128.0+ bytes
None


In [9]:
import pickle
data1 = {'a': [1, 2.0, 4+6j],
         'b': ('string1', u'Unicode string'),
         'c': None}
output = open('tmp2.pkl', 'wb')
pickle.dump(data1, output)
output.close()

pkl_file = open('tmp2.pkl', 'rb')
data2 = pickle.load(pkl_file)
print(data2)
pkl_file.close()

{'a': [1, 2.0, (4+6j)], 'b': ('string1', 'Unicode string'), 'c': None}


In [10]:
from sklearn import svm
from sklearn import datasets
from sklearn.externals import joblib

clf = svm.SVC()
iris = datasets.load_iris()
clf.fit(iris.data, iris.target)
joblib.dump(clf, "tmp3.pkl")

clf1 = joblib.load("tmp3.pkl")
print(clf1.predict(iris.data[:2]))

[0 0]




### 5.1.7  讀寫HDF5文件

In [11]:
import h5py
import numpy as np

f = h5py.File('tmp.h5','w')
f['data'] = np.zeros((3,3))
f['labels'] = np.array([1,2,3,4,5])
f.close()

f = h5py.File('tmp.h5','r')
for key in f.keys():
    print(f[key].name)
    print(f[key].shape)
    print(f[key].value)
f.close()

/data
(3, 3)
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
/labels
(5,)
[1 2 3 4 5]




### 5.1.8  讀寫Excel文件

In [12]:
import pandas as pd
import openpyxl

df = pd.DataFrame({'Name': ['Smith', 'Lucy'], 'Age': ['25', '20'], 'Sex': ['男','女']})
df.to_excel("tmp.xlsx")

df1 = pd.read_excel("tmp.xlsx")
print(df1)

wb = openpyxl.load_workbook('tmp.xlsx')
sheets = wb.sheetnames
print(sheets)
for i in range(len(sheets)):
    sheet = wb[sheets[i]]
    print('title', sheet.title)
    for col in sheet.iter_cols(min_row=0, min_col=0, max_row=3, max_col=3):
        for cell in col:
            print(cell.value)

   Unnamed: 0   Name  Age Sex
0           0  Smith   25   男
1           1   Lucy   20   女
['Sheet1']
title Sheet1
None
0
1
Name
Smith
Lucy
Age
25
20
