In [None]:
%load_ext autoreload
%autoreload 2
import torch
from src.model.neural_network import BERTClassifier, predict
from src.model.data_loading import get_data_loader_from_dataset
from src.config import config, MODEL_CONFIG
import numpy as np
import pandas as pd
import time
import plotly.graph_objects as go
import plotly.express as px
from lightning import Trainer

In [None]:
# Settings
BATCH_SIZE = 16
target_col_name = MODEL_CONFIG.target_col_name

In [None]:
from model.data_loading import CustomDataModule


dataset = pd.read_parquet(config.data.merged)

torch.cuda.empty_cache()
dm = CustomDataModule(news_data_path=config.data.learning_dataset, 
                      input_ids_path=config.data.benzinga.input_ids, 
                      masks_path=config.data.benzinga.masks, 
                      batch_size=2,
                      target_col_name=target_col_name)

In [None]:
model = BERTClassifier.load_from_checkpoint("/path/to/checkpoint.ckpt")

# disable randomness, dropout, etc...
model.eval()

with torch.no_grad():
    y_hat = model(data_loader)

In [None]:
start = time.time()

dataset.loc[:, "fcst"] = predictions

end = time.time()
print(f"{end-start:.2f}s")

# Regression Task Performance

In [None]:
test_labels = dataset.loc[:, MODEL_CONFIG.target_col_name].tolist()

print(f"Length of evaluation set: {len(y_pred_scaled)}")
print("Vanilla results:")
y_hat = y_pred_scaled
y = np.array(test_labels)
assert len(y_hat) == len(y)

mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)


pred_margin_mask = np.abs(y_pred_scaled) >= 0.02

print(f"\nWith prediction margin mask:")
y_hat = y_pred_scaled[pred_margin_mask]
y = np.array(test_labels)[pred_margin_mask]
print(f"\nLength of prediction margin masked evaluation set: {len(y_hat)}")
mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)



In [None]:
##############
# Import stocks
stocks = pd.read_pickle("data/stocks.pkl").reset_index()
# TODO: Do same transformations as import in asset_data_preprocessor


# Analysis of single forecast: 

In [None]:
idx = 11
tmp = test_dat.loc[pred_margin_mask]
row = tmp.iloc[idx, :]
print(row)
# print(f"Fcst: {row.Fcst}")
# print(f"Target: {row.IntradayReturn}")

print(row.body[:750])
pr_time, ticker, fcst = row[["Date", "ID", "Fcst"]]
df = stocks.query("(Date >= @pr_time) & (ID == @ticker)").head(30)
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

In [None]:
print(row)

# Trading Performance

In [None]:
tmp = test_dat.loc[pred_margin_mask].dropna()

In [None]:
tmp.head()

In [None]:
trades = np.sign(tmp["Fcst"])*tmp["CloseToCloseReturn"]

In [None]:
trades.mean()

In [None]:
px.scatter(tmp, x=MODEL_CONFIG.target_col_name, y="Fcst")