In [359]:
from baseline.main_model import *

In [360]:
pd.options.display.float_format = '{:.4f}'.format

In [361]:
vix = pd.read_csv('vix_prices.csv')

In [362]:
import copy

In [363]:
intraday_file = 'fandi_intraday_data.csv'
daily_file = 'fandi_daily.csv'
ticker = 'GOOGL'
date = '2021-12-08'
test_date  = copy.deepcopy(date)

In [364]:
def add_vix_data(daily_data, vix):
    vix['Date'] = pd.to_datetime(vix['Date'], format='%Y%m%d')
    vix['vix'] = vix['vix'].shift() #since vix values are close of day price, shift so date associated with vix of day prior
    daily_data = daily_data.merge(vix, left_on = 'DATE', right_on = 'Date', how='left')
    daily_data.drop(columns=['Date'], inplace=True)
    
    return daily_data

def agg_intraday_daily(intraday_data, daily_data):
    '''Aggregate intraday and daily data for a specific symbol.'''

    intraday_data, daily_data = eliminate_half_days(intraday_data, daily_data)
    daily_data['daily_vol_pct'] = daily_data['total_vol_m'].rank(pct=True)

    df = intraday_data.merge(daily_data[['DATE', 'symbol', 'total_vol_m', 'overnight_gap', 'daily_vol_pct', 'vix']], how='left', left_on=['date', 'symbol'],
                     right_on=['DATE', 'symbol']) #changed
    df.index = pd.DatetimeIndex(df['datetime'])
    df['time'] = df.index.time

    df.drop(columns=['DATE', 'datetime'], inplace=True)

    df = df.between_time('09:31:00', '15:59:00')
    df['%_vol'] = df['size'] / df['total_vol_m']

    df.dropna(inplace=True)

    return df

def regress_volume(data):
    '''Regress volume data for each minute.'''
    x = data.groupby(data.index.date)[['overnight_gap', 'daily_vol_pct', 'vix']].first()
    y_df = data.pivot(index = 'date', columns='time', values='%_vol')
    y_df.fillna(0, inplace=True)

    coef = []
    intercept = []
    for t in data['time'].unique():
        y = y_df[t]

        regression_model = LinearRegression()
        regression_model.fit(x, y)

        coef.append(regression_model.coef_)
        intercept.append(regression_model.intercept_)

    coef = pd.DataFrame(coef, columns = [ 'b1' , 'b2', 'b3']) #changed
    coef['b0'] = intercept
    coef.index = data['time'].unique()

    return coef

def predict_intraday(coef, historical, estimated_daily, overnight_gap):
    '''Predicts intraday volume for given date'''
    est_daily_vol = estimated_daily['Daily']
    vol_pct = stats.percentileofscore(historical['total_vol_m'], est_daily_vol)/100
    vix = historical['vix'][-1]

    coef['est_%_vol'] = coef['b0'] + overnight_gap * coef['b1'] + vol_pct * coef['b2'] + vix * coef['b3'] #changed
    coef['est_%_vol_smooth'] = coef['est_%_vol'].rolling(5).mean().shift(-2)

    coef['est_vol'] = round(coef['est_%_vol_smooth'] * est_daily_vol)
    coef.loc[coef['est_vol'].isnull(), 'est_vol'] = round(coef['est_%_vol'] * est_daily_vol) #est_vol for first 2 mins and last 2 mins are not smoothed

    return coef #changed

In [365]:
date = datetime.strptime(date, '%Y-%m-%d').date()
daily_data, intraday_data, overnight_gap = clean_data(intraday_file, daily_file, ticker, date)
daily_data = add_vix_data(daily_data, vix)
historical = agg_intraday_daily(intraday_data, daily_data)

In [366]:
estimated_daily = DailyModel(daily_data)
coef = regress_volume(historical)
estimated_intraday = predict_intraday(coef, historical, estimated_daily, overnight_gap)

In [367]:
estimated_intraday.dropna(inplace=True)

In [368]:
GOOG = pd.read_csv(intraday_file)

In [369]:
google = GOOG[GOOG['sym_root']=='GOOG']
google = google[google['sym_suffix'] == 'L']
google = google[google['date'] == str(test_date)]

In [370]:
google['TIME_M']  = pd.to_timedelta(google['time'])
google['date'] = pd.to_datetime(google['date']) #format='%Y%m%d')
google['datetime'] = google['date'] + google['TIME_M']
google.set_index('datetime', inplace = True)
google = google.between_time('9:33:00', '15:57:00')

In [371]:
def find_metrics(est_data, real_data):
    abs_val = np.abs(est_data['est_vol'].values - real_data['size'].values)/ real_data['size'].values
    return np.sum(abs_val)/len(abs_val)

In [372]:
find_metrics(estimated_intraday, google)

1.9823819102913185

In [373]:
print(test_date)

2021-12-08


check 12/8 later