# Binary Data Formats

In [1]:
import pandas as pd
import numpy as np

In [2]:
frame = pd.read_csv('ex1.csv')

In [3]:
frame

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [4]:
frame.to_pickle('frame_pickle')

In [5]:
pd.read_pickle('frame_pickle')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


## Using HDF5 Format

In [6]:
frame = pd.DataFrame({'a':np.random.randn(100)}) # Generating data using DataFrame.

In [7]:
store = pd.HDFStore('mydata.h5') # Storing data in HDF (Hierarchical Data Format) Binary Format.

In [8]:
store['obj1'] = frame

In [10]:
store['obj1_col'] = frame['a']

In [11]:
store

<class 'pandas.io.pytables.HDFStore'>
File path: mydata.h5

In [12]:
store['obj1']

Unnamed: 0,a
0,-1.457861
1,-1.302492
2,-0.454604
3,-1.126438
4,-1.249617
...,...
95,-1.140481
96,0.523685
97,0.739555
98,0.617244


In [14]:
store.put('obj2', frame, format='table')

In [15]:
store.select('obj2', where=['index >= 10 and index <= 15'])

Unnamed: 0,a
10,0.726952
11,-0.017941
12,-1.424542
13,-0.379131
14,-0.734328
15,-1.61907


In [16]:
frame.to_hdf('mydata.h5', 'obj3', format='table')

In [17]:
pd.read_hdf('mydata.h5', 'obj3', where=['index < 5'])

ValueError: The file 'mydata.h5' is already opened, but not in read-only mode (as requested).

## Reading Microsoft Excel files

In [21]:
xlsx = pd.ExcelFile('ex1.xlsx')

In [22]:
pd.read_excel(xlsx, 'Sheet1')

Unnamed: 0.1,Unnamed: 0,a,b,c,d,message
0,0,1,2,3,4,hello
1,1,5,6,7,8,world
2,2,9,10,11,12,fooo


In [23]:
frame = pd.read_excel('ex1.xlsx', 'Sheet1')

In [24]:
frame

Unnamed: 0.1,Unnamed: 0,a,b,c,d,message
0,0,1,2,3,4,hello
1,1,5,6,7,8,world
2,2,9,10,11,12,fooo


In [25]:
frame.to_excel('ex2.xlsx')