In [12]:
import torch
from util import MyBertModel
from neural_net import validation_dataloader, test_dat
from util import predict
import numpy as np
import pandas as pd
import time
import plotly.graph_objects as go

In [13]:
model = MyBertModel()

#Later to restore:
model.load_state_dict(torch.load("data/model"))
model.eval()

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print("Using GPU.")
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")
model.to(device)


start = time.time()
y_pred_scaled = predict(model, validation_dataloader, device)


test_dat.loc[:, "Fcst"] = y_pred_scaled

end = time.time()
print(f"{start-end:.2f}s")


def get_metrics(y, y_hat):
    mae = np.abs(y_hat - y).mean()
    rw_mae =  (np.abs(y)).mean()
    TP = ((y_hat > 0)  & (y > 0)).mean()
    TN = ((y_hat < 0)  & (y < 0)).mean()
    return mae, rw_mae, TP, TN



Some weights of the model checkpoint at yiyanghkust/finbert-fls were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using GPU.
-3.91s


# Regression Task Performance

In [14]:
test_labels = test_dat.IntradayReturn.tolist()

print(f"Length of evaluation set: {len(y_pred_scaled)}")
print("Vanilla results:")
y_hat = y_pred_scaled
y = np.array(test_labels)
assert len(y_hat) == len(y)

mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)


pred_margin_mask = np.abs(y_pred_scaled) >= 0.02

print(f"\nWith prediction margin mask:")
y_hat = y_pred_scaled[pred_margin_mask]
y = np.array(test_labels)[pred_margin_mask]
print(f"\nLength of prediction margin masked evaluation set: {len(y_hat)}")
mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)



Length of evaluation set: 163
Vanilla results:
        mae    mae_rw        TP        TN
0  0.036149  0.019821  0.190184  0.282209

With prediction margin mask:

Length of prediction margin masked evaluation set: 65
        mae    mae_rw        TP        TN
0  0.036051  0.035247  0.261538  0.353846


In [15]:
##############
# Import stocks
stocks = pd.read_csv("data/stocks.csv")
stocks.loc[:, "Date"] = pd.to_datetime(stocks.Date)
# TODO: Do same transformations as import in asset_data_preprocessor


# Analysis of single forecast: 

In [18]:
idx = -5
tmp = test_dat.loc[pred_margin_mask]
row = tmp.iloc[idx, :]
print(row.ID)
print(row.Fcst)
print(row.body[:750])
pr_time, ticker, fcst = row[["Date", "ID", "Fcst"]]
df = stocks.query("(Date >= @pr_time) & (ID == @ticker)").head(30)
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

TUP
-0.04875996336340904
Attorney Advertising -- Bronstein,
Gewirtz & Grossman, LLC notifies investors that a class action lawsuit has
been filed against the company ("Tupperware" or the
"Company") and certain of its officers, on behalf of all persons and
entities that purchased, or otherwise acquired Tupperware securities between
March 10, 2021 and a future date, inclusive (the "Class Period"). This class action seeks to recover damages against Defendants for alleged
violations of the federal securities laws. The complaint alleges that defendants throughout the Class Period made false
and/or misleading statements and/or failed to disclose, among other things,
that: (1) Tupperware did not disclose its serious issues with internal
controls; (2) Tupperware's financia


In [19]:
print(row)

Date                                             2023-05-15 00:00:00
NewsTimestamp                              2023-05-15 10:00:00-04:00
ID                                                               TUP
body               Attorney Advertising -- Bronstein,\nGewirtz & ...
IntradayReturn                                             -0.013699
NextDayReturn                                               0.042877
CloseToNextOpen                                             0.013889
Fcst                                                        -0.04876
Name: 719, dtype: object


# Trading Performance

In [25]:
tmp = test_dat.loc[pred_margin_mask].dropna()

In [27]:
tmp.head()

Unnamed: 0,Date,NewsTimestamp,ID,body,IntradayReturn,NextDayReturn,CloseToNextOpen,Fcst
28,2023-06-01,2023-06-01 16:45:00-04:00,WCC,The Board of Directors of Wesco\nInternational...,-0.002561,0.046024,0.025094,-0.043387
52,2023-06-01,2023-06-01 16:00:00-04:00,CYTK,"Cytokinetics,\nIncorporated today announced t...",0.003171,0.001815,0.015806,-0.099908
54,2023-06-01,2023-06-01 14:00:29-04:00,DZSI,_Investors can_ _contact_ _the law firm at no ...,-0.090476,-0.108247,0.015707,0.026276
73,2023-06-01,2023-06-01 08:30:00-04:00,LNTH,"the company\n(""Lantheus"") , a company committe...",0.02074,-0.008609,0.004041,-0.020228
76,2023-06-01,2023-06-01 08:00:00-04:00,COLL,"the company,\nInc. , a leading, diversified sp...",0.015909,-0.036526,0.004474,0.027758


In [28]:
trades = np.sign(tmp["Fcst"])*tmp["CloseToNextOpen"]

In [31]:
trades.mean()

-0.0035877356593284346