# Data Loading, Storage, and File Formats

## 6.2 Binary Data Formats

In [1]:
import pandas as pd
frame = pd.read_csv('files/ex1.csv')
frame

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [2]:
frame.to_pickle('files/frame_pickle')

In [3]:
pd.read_pickle('files/frame_pickle')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


#### Using HDF5 Format

In [4]:
import numpy as np
frame = pd.DataFrame({'a': np.random.randn(100)})
frame

Unnamed: 0,a
0,0.005513
1,0.981831
2,0.786094
3,-1.733928
4,-0.997789
...,...
95,-1.656546
96,-0.365152
97,1.240032
98,-0.156561


In [5]:
store = pd.HDFStore('files/mydata.h5')
store

<class 'pandas.io.pytables.HDFStore'>
File path: files/mydata.h5

In [6]:
store['obj1'] = frame
store

<class 'pandas.io.pytables.HDFStore'>
File path: files/mydata.h5

In [7]:
store['obj1_col'] = frame['a']
store

<class 'pandas.io.pytables.HDFStore'>
File path: files/mydata.h5

In [8]:
store['obj1']

Unnamed: 0,a
0,0.005513
1,0.981831
2,0.786094
3,-1.733928
4,-0.997789
...,...
95,-1.656546
96,-0.365152
97,1.240032
98,-0.156561


In [9]:
store.put('obj2', frame, format='table')

In [10]:
store.select('obj2', where=['index >= 10 and index <= 15'])

Unnamed: 0,a
10,0.245359
11,-0.617043
12,-1.093939
13,0.390815
14,0.94927
15,-0.451925


In [11]:
store.close()

In [12]:
frame.to_hdf('files/mydata.h5', 'obj3', format='table')
pd.read_hdf('files/mydata.h5', 'obj3', where=['index < 5'])

Unnamed: 0,a
0,0.005513
1,0.981831
2,0.786094
3,-1.733928
4,-0.997789


#### Reading Microsoft Excel Files

In [13]:
xlsx = pd.ExcelFile('files/ex1.xlsx')

In [14]:
pd.read_excel(xlsx, 'Sheet1')

Unnamed: 0,24-11221-0470303,A-1b(03)T,1
0,24-11221-1000502,A-1b(03)T,2
1,24-11221-1002500,B2b (12)T,3
2,24-11221-1002600,B2b (12)T,4
3,24-11221-1002602,B2b (12)T,5
4,24-11221-1002604,A-1b(03)T,6
5,24-11221-1002700,A-1b(03)T,7
6,24-11221-1002701,A-1b(03)T,8
7,24-11221-1003000,B2b (12)T,9
8,24-11221-1005102,A-1b(03)T,10
9,24-11221-1005104,A-2c(06)T,11


In [15]:
frame = pd.read_excel('files/ex1.xlsx', 'Sheet1')
frame

Unnamed: 0,24-11221-0470303,A-1b(03)T,1
0,24-11221-1000502,A-1b(03)T,2
1,24-11221-1002500,B2b (12)T,3
2,24-11221-1002600,B2b (12)T,4
3,24-11221-1002602,B2b (12)T,5
4,24-11221-1002604,A-1b(03)T,6
5,24-11221-1002700,A-1b(03)T,7
6,24-11221-1002701,A-1b(03)T,8
7,24-11221-1003000,B2b (12)T,9
8,24-11221-1005102,A-1b(03)T,10
9,24-11221-1005104,A-2c(06)T,11


In [16]:
writer = pd.ExcelWriter('files/ex2.xlsx')
frame.to_excel(writer, 'Sheet1')
writer.save()
frame.to_excel('files/ex3.xlsx')