In [62]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import baseline.main_model as baseline
from datetime import datetime
pd.options.plotting.backend = "plotly"

In [115]:
new_errors = pd.read_csv('data/volatility_results/errors/errors_NFLX.csv')
baseline_errors = pd.read_csv('data/volatility_results/baseline_errors/baseline_errors_NFLX.csv')
predictions = pd.read_csv('data/volatility_results/predictions/predictions_NFLX.csv')
error_improvement = pd.read_csv('data/volatility_results/error_improvement/error_improvement_NFLX.csv')

In [116]:
new_errors.drop(['2021-11-26'], axis=1, inplace=True)
baseline_errors.drop(['2021-11-26'], axis=1, inplace=True)
predictions.drop(['2021-11-26'], axis=1, inplace=True)
error_improvement.drop(['2021-11-26'], axis=1, inplace=True)

In [117]:
intraday_file = 'data/russell/Russell_NFLX.csv'
daily_file = 'data/russell/russell_daily.csv'
ticker = 'NFLX'
date = '2021-12-02'

prediction = baseline.main(intraday_file, daily_file, ticker, date)

In [118]:
improvement_avg = error_improvement.mean()
new_avg = new_errors.mean()
baseline_avg = baseline_errors.mean()


In [119]:
len(improvement_avg.loc[improvement_avg > 0])/len(improvement_avg)

0.5060240963855421

In [12]:
time = old_errors['time']

fig = go.Figure()

fig.add_trace(go.Scatter(x=time, y=new_errors['2021-12-02'],
                    mode='lines',
                    name='Price Volatility Model'))
fig.update_layout(title={
        'text': "Price Volatility Model Errors for NFLX on 2021-12-02",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis_title="Time (in Minutes)", 
        yaxis_title="Error Metric")
fig.show()

In [112]:
dates = new_avg.index

fig = go.Figure()
fig.add_trace(go.Scatter(x=dates, y=improvement_avg,
                    mode='lines',
                    name='Error Improvement Over Baseline'))
fig.update_layout(title={
        'text': "Average PVM Error Improvement for NFLX By Day",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis_title="Date (2021-09-01 to 2021-12-31)", 
        yaxis_title="Average Baseline Error - PVM Error",
        legend_title="Metric"
                 )

fig.show()

In [120]:
intraday_data = pd.read_csv(intraday_file)
intraday_data = baseline.format_intraday(intraday_data)
intraday_data = intraday_data.loc[intraday_data['symbol'] == ticker]
intraday_data.index = pd.DatetimeIndex(intraday_data['datetime'])
intraday_data['time'] = intraday_data.index.time
intraday_data = intraday_data.between_time('09:45:00', '15:59:00')

In [121]:
dt = datetime.strptime(date, '%Y-%m-%d').date()
real_vol = intraday_data.loc[intraday_data.index.date == dt]['size']
real_vol

datetime
2021-12-02 09:45:00     12264
2021-12-02 09:46:00      8030
2021-12-02 09:47:00     11769
2021-12-02 09:48:00     17510
2021-12-02 09:49:00     31202
                        ...  
2021-12-02 15:55:00     20221
2021-12-02 15:56:00     18107
2021-12-02 15:57:00     62709
2021-12-02 15:58:00     81052
2021-12-02 15:59:00    180090
Name: size, Length: 375, dtype: int64

In [124]:

time = old_errors['time'][:137]

fig = go.Figure()

fig.add_trace(go.Scatter(x=time, y=real_vol[:137],
                    mode='lines',
                    name='Actual', line=dict(color='grey')))
fig.add_trace(go.Scatter(x=time, y=predictions['2021-12-02'][:137],
                    mode='lines',
                    name='Price Volatility Model', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=time, y=prediction['intraday'][14:151],
                    mode='lines',
                    name='Baseline Model', line=dict(color='red')))


fig.update_layout(title={
        'text': "Price Volatility Model and Baseline Prediction for NFLX on 2021-12-02 Morning",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis_title="Time (in Minutes)", 
        yaxis_title="Volume (in Shares)")
fig.show()

In [33]:
print(f"Old Model Average Error: {old_avg.mean()}, New Model Average Error: {new_avg.mean()}, Baseline Average Error: {baseline_avg.mean()}")

Old Model Average Error: 0.7272363767577259, New Model Average Error: 0.9468641533940266, Baseline Average Error: 0.9780078523798703


In [64]:

russell_1000.plot()

In [88]:
with open("data/russell/Russell_1000.txt", 'r') as input:
        tickers = input.read().splitlines()

avg_improvement = {}
for ticker in tickers:
    try:
        error_improvement = pd.read_csv(f"data/volatility_results/error_improvement/error_improvement_{ticker}.csv")
        error_improvement.drop(['2021-11-26'], axis=1, inplace=True)

        avg_improvement[ticker] = error_improvement.mean().mean()
    except:
        pass

In [92]:
russell_1000 = pd.DataFrame.from_dict(avg_improvement, orient='index')
russell_1000.columns = ['Error Improvement']

In [93]:
fig = russell_1000.plot.bar()
fig.update_layout(xaxis={'categoryorder':'total ascending'},
                  title={
        'text': "Average PVM Error Improvement for Russell 1000 (2021-09-01 to 2021-12-31)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis_title="Tickers for Russell 1000 Sample", 
        yaxis_title="Average Baseline Error - PVM Error",
        legend_title="Metric"
                 )
fig.show()

In [113]:
len(russell_1000.loc[russell_1000['Error Improvement'] > 0])/len(russell_1000)

0.8541666666666666

In [74]:
with open("data/russell/Russell_2000.txt", 'r') as input:
        tickers = input.read().splitlines()

avg_improvement = {}
for ticker in tickers:
    try:
        error_improvement = pd.read_csv(f"data/volatility_results/error_improvement/error_improvement_{ticker}.csv")
        error_improvement.drop(['2021-11-26'], axis=1, inplace=True)

        avg_improvement[ticker] = error_improvement.mean().mean()
    except:
        pass

russell_2000 = pd.DataFrame.from_dict(avg_improvement, orient='index')

In [123]:
fig = russell_2000.plot.bar()
fig.update_layout(xaxis={'categoryorder':'total ascending'},
                  title={
        'text': "Average PVM Error Improvement for Russell 2000 (2021-09-01 to 2021-12-31)",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        xaxis_title="Tickers for Russell 2000 Sample", 
        yaxis_title="Average Baseline Error - PVM Error",
        legend_title="Metric"
                 )
fig.show()