In [6]:
import torch
from util import MyBertModel
from neural_net import validation_dataloader, test_dat
from util import predict
import numpy as np
import pandas as pd
import time
import plotly.graph_objects as go
import plotly.express as px
from config import TARGET_COL_NAME

In [7]:
model = MyBertModel()

#Later to restore:
model.load_state_dict(torch.load("data/model"))
model.eval()

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print("Using GPU.")
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")
model.to(device)


start = time.time()
y_pred_scaled = predict(model, validation_dataloader, device)


test_dat.loc[:, "Fcst"] = y_pred_scaled

end = time.time()
print(f"{start-end:.2f}s")


def get_metrics(y, y_hat):
    mae = np.abs(y_hat - y).mean()
    rw_mae =  (np.abs(y)).mean()
    TP = ((y_hat > 0)  & (y > 0)).mean()
    TN = ((y_hat < 0)  & (y < 0)).mean()
    return mae, rw_mae, TP, TN



Some weights of the model checkpoint at yiyanghkust/finbert-fls were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using GPU.
-9.81s


# Regression Task Performance

In [40]:
test_labels = test_dat.loc[:, TARGET_COL_NAME].tolist()

print(f"Length of evaluation set: {len(y_pred_scaled)}")
print("Vanilla results:")
y_hat = y_pred_scaled
y = np.array(test_labels)
assert len(y_hat) == len(y)

mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)


pred_margin_mask = np.abs(y_pred_scaled) >= 0.02

print(f"\nWith prediction margin mask:")
y_hat = y_pred_scaled[pred_margin_mask]
y = np.array(test_labels)[pred_margin_mask]
print(f"\nLength of prediction margin masked evaluation set: {len(y_hat)}")
mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)



Length of evaluation set: 401
Vanilla results:
        mae    mae_rw        TP       TN
0  0.067704  0.014372  0.354115  0.17207

With prediction margin mask:

Length of prediction margin masked evaluation set: 105
        mae    mae_rw        TP        TN
0  0.075422  0.027408  0.419048  0.161905


In [41]:
##############
# Import stocks
stocks = pd.read_pickle("data/stocks.pkl").reset_index()
# TODO: Do same transformations as import in asset_data_preprocessor


# Analysis of single forecast: 

In [59]:
idx = 11
tmp = test_dat.loc[pred_margin_mask]
row = tmp.iloc[idx, :]
print(row)
# print(f"Fcst: {row.Fcst}")
# print(f"Target: {row.IntradayReturn}")

print(row.body[:750])
pr_time, ticker, fcst = row[["Date", "ID", "Fcst"]]
df = stocks.query("(Date >= @pr_time) & (ID == @ticker)").head(30)
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

Date                                                2023-05-09 00:00:00
NewsTimestamp                                 2023-05-09 06:30:00-04:00
ID                                                                 SEAS
body                  the company , a leading theme park and enterta...
CloseToCloseReturn                                             0.057239
Fcst                                                           0.020265
Name: 566, dtype: object
the company , a leading theme park and entertainment company, today reported its financial results for the first quarter of 2023. First Quarter 2023 Highlights * Attendance was 3.4 million guests, a decrease of approximately 25,000 guests from the first quarter of 2022. * Total revenue was a record $293.3 million, an increase of $22.7 million or 8.4% from the first quarter of 2022. * Net loss was $16.5 million, the second smallest net loss in the first quarter and an increase of $7.5 million from the first quarter of 2022. * Adjusted EBI

In [43]:
print(row)

Date                                                2023-05-09 00:00:00
NewsTimestamp                                 2023-05-09 08:00:00-04:00
ID                                                                 NKLA
body                  Reprioritizing and refocusing the company on N...
CloseToCloseReturn                                            -0.130369
Fcst                                                           0.021123
Name: 533, dtype: object


# Trading Performance

In [44]:
tmp = test_dat.loc[pred_margin_mask].dropna()

In [45]:
tmp.head()

Unnamed: 0,Date,NewsTimestamp,ID,body,CloseToCloseReturn,Fcst
1197,2023-04-27,2023-04-26 16:05:21-04:00,SPSC,Company delivers 89th consecutive quarter of t...,0.033088,0.028737
1927,2023-03-01,2023-02-28 19:24:48-04:00,EOSE,"the company , a leading provider of safe, scal...",-0.196347,0.029226
1900,2023-03-02,2023-03-01 17:02:00-04:00,ORGO,"the company , a leading regenerative medicine ...",0.192982,0.037298
1833,2023-03-03,2023-03-03 09:00:00-04:00,SPTN,Food solutions company SpartanNash today annou...,0.030203,0.031267
1403,2023-04-11,2023-04-11 08:00:00-04:00,ERNA,"the company , a life science company committed...",-0.035265,-0.028694


In [46]:
trades = np.sign(tmp["Fcst"])*tmp["CloseToCloseReturn"]

In [47]:
trades.mean()

0.019887575222824257

In [48]:
px.scatter(tmp, x=TARGET_COL_NAME, y="Fcst")