In [233]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import math
from datetime import datetime, timedelta

#Read In VIX data
vix = pd.read_csv('data/VIX_History.csv')
vix['DATEi'] = pd.to_datetime(vix['DATE'])
#vix.drop(columns=['DATE', 'LOW', 'OPEN', 'HIGH'], inplace=True)
#vix.columns = ['VIX', 'DATEi']
vix = vix.set_index('DATEi')
vix_rolling = vix['CLOSE'].rolling(60).mean()

claims = pd.read_csv('data/Initial_Jobless_Claims.csv')
#claims['DATEi'] = pd.to_datetime(claims['DATE'])
#claims.drop(columns=['DATE'], inplace=True)
claims = claims.set_index('DATE')

spy = pd.read_csv('data/SPY.csv')
spy = spy.set_index('Date')
spy['Change'] = spy['Close'].pct_change()

with open('data/R2_STD_IVS_DFW.pkl', 'rb') as file:
    standard_ivs = pickle.load(file)

In [234]:
lstm_input = pd.DataFrame(index=list(standard_ivs.keys()))
columns = ['log_spy_change', '22_IV_AVG', '5_IV_AVG', '1_IV_AVG', 'jobless_claims', 'day_of_week', 'day_of_month', 'SPY_VOL', 'VIX_level', '60_VIX_AVG']
for col in columns:
    lstm_input[col] = np.nan

## Form Claims List

In [235]:
def date_range(start_date_str):
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.now()
    date_list = []

    while start_date <= end_date:
        date_list.append(start_date.strftime("%Y-%m-%d"))
        start_date += timedelta(days=1)

    return date_list

start_date = "2010-02-01"
claims_full = pd.DataFrame(index=date_range(start_date))
claims_full['Claims'] = np.nan

for date in claims.index:
    if date in claims_full.index:
        claims_full.loc[date]['Claims'] = claims.loc[date]['ICSA']
claims_full['Claims'] = claims_full['Claims'].ffill()

In [236]:
avgs = [standard_ivs[day].mean().mean() for day in standard_ivs.keys()]
ivs_avgs = pd.DataFrame(index=lstm_input.index)
ivs_avgs['1_IV_AVG'] = avgs
ivs_avgs['5_IV_AVG'] = ivs_avgs['1_IV_AVG'].rolling(5, min_periods=1).mean()
ivs_avgs['22_IV_AVG'] = ivs_avgs['1_IV_AVG'].rolling(22, min_periods=1).mean()

### Fill DataFrame

In [237]:
for date in spy.index:
    if date in lstm_input.index:
        lstm_input.loc[date, 'log_spy_change'] = math.log1p(spy.loc[date]['Change'])
        lstm_input.loc[date, 'VIX_level'] = vix.loc[date]['CLOSE']
        lstm_input.loc[date, 'SPY_VOL'] = spy.loc[date]['Volume']
        lstm_input.loc[date, 'day_of_month'] = int(date[-2:])
        lstm_input.loc[date, 'day_of_week'] = int(pd.to_datetime(date).weekday())
        lstm_input.loc[date, 'jobless_claims'] = claims_full.loc[date]['Claims']
        lstm_input.loc[date, '60_VIX_AVG'] = vix_rolling[date]
lstm_input['1_IV_AVG'] = ivs_avgs['1_IV_AVG']
lstm_input['5_IV_AVG'] = ivs_avgs['5_IV_AVG']
lstm_input['22_IV_AVG'] = ivs_avgs['22_IV_AVG']

In [260]:
test = lstm_input.copy()
#test.iloc[1]['log_spy_change'] = 0
len(test)

3409

In [239]:
lstm_input.to_csv('data/Exogenous_Variables.csv')