# IO Tools

In [1]:
import pandas as pd

In [9]:
df = pd.read_csv('FBI-CRIME11.csv')

In [3]:
print(df.head())

         Year  Population  Violent Crime  Violent Crime Rate  \
0  2013-12-31   316128839        1163146               367.9   
1  2012-12-31   313873685        1217057               387.8   
2  2011-12-31   311587816        1206005               387.1   
3  2010-12-31   309330219        1251248               404.5   
4  2009-12-31   307006550        1325896               431.9   

   Murder and nonegligent manslaughter  \
0                                14196   
1                                14856   
2                                14661   
3                                14722   
4                                15399   

   Murder and nonegligent manslaughter rate  Rape (legacy definition)  \
0                                       4.5                     79770   
1                                       4.7                     85141   
2                                       4.7                     84175   
3                                       4.8                     85593 

In [4]:
df.to_csv('newFBI-Crime11CSV') # output df as csv

In [5]:
df['Violent Crime Rate'].to_csv('ViolentCrimeRate') # csv of only one column

In [10]:
df.set_index('Year', inplace = True) # NO NEED TO DO df = df.set_index('Year')
print(df.head())

            Population  Violent Crime  Violent Crime Rate  \
Year                                                        
2013-12-31   316128839        1163146               367.9   
2012-12-31   313873685        1217057               387.8   
2011-12-31   311587816        1206005               387.1   
2010-12-31   309330219        1251248               404.5   
2009-12-31   307006550        1325896               431.9   

            Murder and nonegligent manslaughter  \
Year                                              
2013-12-31                                14196   
2012-12-31                                14856   
2011-12-31                                14661   
2010-12-31                                14722   
2009-12-31                                15399   

            Murder and nonegligent manslaughter rate  \
Year                                                   
2013-12-31                                       4.5   
2012-12-31                                    

## csv IO Operations

In [12]:
df = pd.read_csv('ViolentCrimeRate')
print(df.head())

   0  367.9
0  1  387.8
1  2  387.1
2  3  404.5
3  4  431.9
4  5  458.6


In [13]:
df = pd.read_csv('ViolentCrimeRate', index_col = 0)
print(df.head())

   367.9
0       
1  387.8
2  387.1
3  404.5
4  431.9
5  458.6


In [15]:
df = pd.read_csv('ViolentCrimeRate', names = ['Date', 'Violent Crime Rate']
                 , index_col = 0)
print(df.head())

      Violent Crime Rate
Date                    
0                  367.9
1                  387.8
2                  387.1
3                  404.5
4                  431.9


In [16]:
df.to_csv('ViolentCrimeRate')

In [17]:
df.to_csv('ViolentCrimeRateWithoutHeaders', headers = False)

## ```read_hdf``` and ```to_hdf```

In [18]:
df = pd.read_csv('ViolentCrimeRateWithoutHeaders'
                , names = ['Date', 'Violent_Crime_Rate']
                , index_col = 0)

print(df.head())

      Violent_Crime_Rate
Date                    
Date  Violent Crime Rate
0                  367.9
1                  387.8
2                  387.1
3                  404.5


In [19]:
store = pd.HDFStore('hdfstore.h5') # create a HDF
print(store)

<class 'pandas.io.pytables.HDFStore'>
File path: hdfstore.h5
Empty


In [21]:
#HDF can have multiple tables
store.put('d1', df, format = 'table', data_columns = True) #d1 is 1 table

print(store['d1'].shape)
store.close()

(19, 1)


In [22]:
print(store)
store.close()

<class 'pandas.io.pytables.HDFStore'>
File path: hdfstore.h5
File is CLOSED


In [23]:
hdf = pd.read_hdf('hdfstore.h5', 'd1') # d1 is the table to load
print(hdf)

      Violent_Crime_Rate
Date                    
Date  Violent Crime Rate
0                  367.9
1                  387.8
2                  387.1
3                  404.5
4                  431.9
5                  458.6
6                  471.8
7                  479.3
8                  469.0
9                  463.2
10                 475.8
11                 494.4
12                 504.5
13                 506.5
14                 523.0
15                 567.6
16                 611.0
17                 636.6


## ```read_json``` and ```to_json```

In [24]:
df = pd.read_hdf('hdfstore.h5', 'd1')
print(df.head())

      Violent_Crime_Rate
Date                    
Date  Violent Crime Rate
0                  367.9
1                  387.8
2                  387.1
3                  404.5


In [25]:
df.to_json('exampleJSON')

In [26]:
df = pd.read_json('exampleJSON')
print(df.head())

   Violent_Crime_Rate
0               367.9
1               387.8
10              475.8
11              494.4
12              504.5


In [30]:
import urllib

In [40]:
depth_json = urllib.urlopen('https://btc-e.com/api/3/depth/btc_usd')

In [41]:
depth_df = pd.read_json(depth_json)
print(depth_json)

<addinfourl at 210964936L whose fp = <socket._fileobject object at 0x000000000C6DEF48>>


In [42]:
print(depth_df)

                                                btc_usd
asks  [[432.141, 0.011038], [432.237, 0.0396626], [4...
bids  [[431.947, 0.1903374], [431.946, 0.011045], [4...


## ```read_pickle``` and ```to_pickle```

This is serialization in Python. Can be used to save any object in Python.

In [43]:
depth_df.to_pickle('pickle')

In [44]:
newdf = pd.read_pickle('pickle')

In [45]:
print(newdf)

                                                btc_usd
asks  [[432.141, 0.011038], [432.237, 0.0396626], [4...
bids  [[431.947, 0.1903374], [431.946, 0.011045], [4...


Pickling can be done without pandas.

In [46]:
import pickle

In [47]:
pickle_out = open('newdf.pickle', 'wb') #create a new pickle file
pickle.dump(newdf, pickle_out) #dump ANY object in Python to pickle file!!!!
pickle_out.close()

In [48]:
pickle_in = open('newdf.pickle', 'rb')
super_cool = pickle.load(pickle_in)

In [49]:
print(super_cool)
print(super_cool.head())

                                                btc_usd
asks  [[432.141, 0.011038], [432.237, 0.0396626], [4...
bids  [[431.947, 0.1903374], [431.946, 0.011045], [4...
                                                btc_usd
asks  [[432.141, 0.011038], [432.237, 0.0396626], [4...
bids  [[431.947, 0.1903374], [431.946, 0.011045], [4...
