In [12]:
import torch
from util import MyBertModel
from neural_net import validation_dataloader, test_dat
from util import predict
import numpy as np
import pandas as pd
import time
import plotly.graph_objects as go
import plotly.express as px

In [2]:
model = MyBertModel()

#Later to restore:
model.load_state_dict(torch.load("data/model"))
model.eval()

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print("Using GPU.")
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")
model.to(device)


start = time.time()
y_pred_scaled = predict(model, validation_dataloader, device)


test_dat.loc[:, "Fcst"] = y_pred_scaled

end = time.time()
print(f"{start-end:.2f}s")


def get_metrics(y, y_hat):
    mae = np.abs(y_hat - y).mean()
    rw_mae =  (np.abs(y)).mean()
    TP = ((y_hat > 0)  & (y > 0)).mean()
    TN = ((y_hat < 0)  & (y < 0)).mean()
    return mae, rw_mae, TP, TN



Some weights of the model checkpoint at yiyanghkust/finbert-fls were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using GPU.
-11.45s


# Regression Task Performance

In [14]:
test_labels = test_dat.IntradayReturn.tolist()

print(f"Length of evaluation set: {len(y_pred_scaled)}")
print("Vanilla results:")
y_hat = y_pred_scaled
y = np.array(test_labels)
assert len(y_hat) == len(y)

mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)


pred_margin_mask = np.abs(y_pred_scaled) >= 0.01

print(f"\nWith prediction margin mask:")
y_hat = y_pred_scaled[pred_margin_mask]
y = np.array(test_labels)[pred_margin_mask]
print(f"\nLength of prediction margin masked evaluation set: {len(y_hat)}")
mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)



Length of evaluation set: 401
Vanilla results:
        mae    mae_rw        TP        TN
0  0.036809  0.006905  0.394015  0.109726

With prediction margin mask:

Length of prediction margin masked evaluation set: 99
       mae    mae_rw        TP        TN
0  0.03675  0.013505  0.414141  0.070707


In [27]:
##############
# Import stocks
stocks = pd.read_pickle("data/stocks.pkl").reset_index()
# TODO: Do same transformations as import in asset_data_preprocessor


# Analysis of single forecast: 

In [28]:
idx = -5
tmp = test_dat.loc[pred_margin_mask]
row = tmp.iloc[idx, :]
print(row.ID)
print(row.Fcst)
print(row.body[:750])
pr_time, ticker, fcst = row[["Date", "ID", "Fcst"]]
df = stocks.query("(Date >= @pr_time) & (ID == @ticker)").head(30)
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

UNF
0.018685750663280487
The Board of Directors
of the company  today declared regular quarterly cash
dividends of $0.310 per share (31.0 cents) on the Company's Common Stock and
$0.248 per share (24.8 cents) on the Company's Class B Common Stock. Both
dividends are payable a future date, to shareholders of record on June 8,
2023. Headquartered in Wilmington, Mass., the company ( _NYSE:_ _UNF_) is a
North American leader in the supply and servicing of uniform and workwear
programs, as well as the delivery of facility service programs. Together with
its subsidiaries, the company also provides first aid and safety products, and
manages specialized garment programs for the cleanroom and nuclear industries. the companymanufactures its own branded workwear, protective c


In [29]:
print(row)

Date                                             2023-04-04 00:00:00
NewsTimestamp                              2023-04-04 12:47:54-04:00
ID                                                               UNF
body               The Board of Directors\nof the company  today ...
IntradayReturn                                             -0.016864
NextDayReturn                                              -0.001963
CloseToNextOpen                                            -0.009195
Fcst                                                        0.018686
Name: 1427, dtype: object


# Trading Performance

In [30]:
tmp = test_dat.loc[pred_margin_mask].dropna()

In [31]:
tmp.head()

Unnamed: 0,Date,NewsTimestamp,ID,body,IntradayReturn,NextDayReturn,CloseToNextOpen,Fcst
1279,2023-04-25,2023-04-25 08:02:00-04:00,ARLO,"the company\n, a leading smart home security b...",-0.03858,0.022472,0.0,0.018512
695,2023-05-04,2023-05-04 16:28:00-04:00,CNS,"The Board of Directors of Cohen &\nSteers, Inc...",-0.031774,-0.003075,0.019174,0.012459
1943,2023-02-28,2023-02-28 17:15:00-04:00,AHH,Armada Hoffler\nannounced that its Board of Di...,-0.002336,0.004695,-0.002342,-0.013514
437,2023-05-10,2023-05-10 07:00:00-04:00,DIBS,"1stdibs.com, Inc. , a\nleading online marketpl...",-0.068966,-0.014742,0.004938,0.010673
275,2023-05-15,2023-05-15 07:05:00-04:00,FULC,"―_ _Appointed Alex C. Sapir CEO & President, e...",0.067449,0.049724,-0.005495,0.011637


In [32]:
trades = np.sign(tmp["Fcst"])*tmp["CloseToNextOpen"]

In [33]:
trades.mean()

0.00010389511016940422

In [34]:
px.scatter(tmp, x="IntradayReturn", y="Fcst")