### Load Historical SPX Index Prices via .csv files provided by Hanweck, Clean & Process Data  

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from datetime import datetime

In [2]:
intra_filepath = os.getcwd() + r'\Data\Intraday'

In [3]:
spx_intra_files = np.array(os.listdir(intra_filepath + r'\SPX_raw'))
spx_intra_files

array(['SPX with vols greeks hamilton 2015.csv',
       'SPX with vols greeks hamilton 201601 - 201603.csv',
       'SPX with vols greeks hamilton 201604 - 201612.csv'], 
      dtype='<U49')

#### Construct dataframe with SPX Bid/Ask for each timestamp 

In [4]:
spx_intra = pd.DataFrame()

for file in spx_intra_files:
    print(file)
    filename = os.path.join(intra_filepath, 'SPX_raw', file)
    df = pd.read_csv(filename, parse_dates=['TIMESTAMP'],\
                    date_parser = lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'),\
                    usecols = ['TIMESTAMP', 'UPRICE_BID', 'UPRICE_ASK'])
    
    spx_intra = pd.concat([spx_intra, df])

SPX with vols greeks hamilton 2015.csv
SPX with vols greeks hamilton 201601 - 201603.csv
SPX with vols greeks hamilton 201604 - 201612.csv


In [5]:
spx_intra.head()

Unnamed: 0,TIMESTAMP,UPRICE_BID,UPRICE_ASK
0,2015-01-02 09:30:00,0.0,0.0
1,2015-01-02 09:40:00,2072.1499,2072.1499
2,2015-01-02 09:50:00,2068.86011,2068.86011
3,2015-01-02 10:00:00,2068.5,2068.5
4,2015-01-02 10:10:00,2063.32007,2063.32007


In [6]:
len(spx_intra)

24266720

In [7]:
spx_intra = spx_intra.drop_duplicates(subset=['TIMESTAMP'])

In [8]:
len(spx_intra)

20160

In [9]:
spx_intra.head()

Unnamed: 0,TIMESTAMP,UPRICE_BID,UPRICE_ASK
0,2015-01-02 09:30:00,0.0,0.0
1,2015-01-02 09:40:00,2072.1499,2072.1499
2,2015-01-02 09:50:00,2068.86011,2068.86011
3,2015-01-02 10:00:00,2068.5,2068.5
4,2015-01-02 10:10:00,2063.32007,2063.32007


In [10]:
spx_intra.sort_values(by =['TIMESTAMP'], inplace = True)

In [11]:
spx_intra['UPRICE_MID'] = (spx_intra['UPRICE_BID'] + spx_intra['UPRICE_ASK']) / 2

In [12]:
spx_intra.tail()

Unnamed: 0,TIMESTAMP,UPRICE_BID,UPRICE_ASK,UPRICE_MID
10859315,2016-12-30 15:20:00,2237.84009,2237.84009,2237.84009
10859316,2016-12-30 15:30:00,2239.12012,2239.12012,2239.12012
10859317,2016-12-30 15:40:00,2238.12988,2238.12988,2238.12988
10859318,2016-12-30 15:50:00,2235.6001,2235.6001,2235.6001
10859319,2016-12-30 16:00:00,2239.61011,2239.61011,2239.61011


In [13]:
spx_intra.set_index('TIMESTAMP', inplace=True)

In [15]:
spx_intra.tail()

Unnamed: 0_level_0,UPRICE_BID,UPRICE_ASK,UPRICE_MID
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-30 15:20:00,2237.84009,2237.84009,2237.84009
2016-12-30 15:30:00,2239.12012,2239.12012,2239.12012
2016-12-30 15:40:00,2238.12988,2238.12988,2238.12988
2016-12-30 15:50:00,2235.6001,2235.6001,2235.6001
2016-12-30 16:00:00,2239.61011,2239.61011,2239.61011


#### Save final intraday SPX Price Returns dataset to pickle 

In [14]:
spx_intra.to_pickle(intra_filepath + r'\Pickle\spx_returns.pkl')