# Store US Stock Prices in HDF5 Format

## Imports

In [1]:
import pandas as pd

## Load source data

In [2]:
df = pd.read_csv('us_stocks.csv')
df.head()

Unnamed: 0,ticker,date,open,high,low,close,volume,ex-dividend,split_ratio,adj_open,adj_high,adj_low,adj_close,adj_volume
0,A,1999-11-18,45.5,50.0,40.0,44.0,44739900.0,0.0,1.0,31.041951,34.112034,27.289627,30.01859,44739900.0
1,A,1999-11-19,42.94,43.0,39.81,40.38,10897100.0,0.0,1.0,29.295415,29.33635,27.160002,27.548879,10897100.0
2,A,1999-11-22,41.31,44.0,40.06,44.0,4705200.0,0.0,1.0,28.183363,30.01859,27.330562,30.01859,4705200.0
3,A,1999-11-23,42.5,43.63,40.25,40.25,4274400.0,0.0,1.0,28.995229,29.766161,27.460188,27.460188,4274400.0
4,A,1999-11-24,40.13,41.94,40.0,41.06,3464400.0,0.0,1.0,27.378319,28.613174,27.289627,28.012803,3464400.0


## Apply required transformations

In [3]:
df['date'] = pd.to_datetime(df.iloc[:, 1])
df.set_index(['ticker', 'date'])
df.drop(['open',
         'high',
         'low',
         'close',
         'volume',
         'ex-dividend',
         'split_ratio'
        ], axis=1, inplace = True)
columnDict = {'adj_open': 'open',
              'adj_high': 'high',
              'adj_low': 'low',
              'adj_close': 'close',
              'adj_volume': 'volume'}
df.rename(columns=columnDict, inplace=True)

## Plot data

In [4]:
df.head()

Unnamed: 0,ticker,date,open,high,low,close,volume
0,A,1999-11-18,31.041951,34.112034,27.289627,30.01859,44739900.0
1,A,1999-11-19,29.295415,29.33635,27.160002,27.548879,10897100.0
2,A,1999-11-22,28.183363,30.01859,27.330562,30.01859,4705200.0
3,A,1999-11-23,28.995229,29.766161,27.460188,27.460188,4274400.0
4,A,1999-11-24,27.378319,28.613174,27.289627,28.012803,3464400.0


## Store in HDF5 format

In [5]:
df.to_hdf('us_stocks.h5', key='df', mode='w')