In [None]:
%load_ext autoreload
%autoreload 2
import torch
from src.model.neural_network import BERTRegressor, predict
from src.model.data_loading import get_data_loader_from_dataset
from src.config import config, MODEL_CONFIG
import numpy as np
import pandas as pd
import time
import plotly.graph_objects as go
import plotly.express as px
from transformers import BertTokenizerFast
import logging

In [None]:
# Settings
bert_model_name = MODEL_CONFIG.transformer_hugface_id
batch_size = 16
tokenizer = BertTokenizerFast.from_pretrained(bert_model_name)
input_col_name = MODEL_CONFIG.input_col_name
target_col_name = MODEL_CONFIG.target_col_name

In [None]:
dataset = pd.read_parquet(config.data.merged)

torch.cuda.empty_cache()
train_dataloader = get_data_loader_from_dataset(dataset=dataset, 
                                                split="training", 
                                                batch_size=batch_size,
                                                label_col=target_col_name,
                                                data_loader_kwargs=dict(shuffle=False))

In [None]:
model = BERTRegressor(bert_model_name)
model.load_state_dict(torch.load("data/model"))
model.eval()

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print("Using GPU.")
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")
#model.to(device)

In [None]:
# Test
train_sample = dataset.loc[dataset.split == "training", :].iloc[0:20]
sample_data_loader = get_data_loader_from_dataset(dataset=train_sample, 
                            split="training", 
                            batch_size=10,
                            label_col=target_col_name,
                            data_loader_kwargs=dict(shuffle=False))
predict(model, sample_data_loader, device)

In [None]:
start = time.time()

prediction = predict(model, train_dataloader, device)
dataset.loc[:, "fcst"] = prediction

end = time.time()
print(f"{end-start:.2f}s")

# Regression Task Performance

In [None]:
test_labels = dataset.loc[:, MODEL_CONFIG.target_col_name].tolist()

print(f"Length of evaluation set: {len(y_pred_scaled)}")
print("Vanilla results:")
y_hat = y_pred_scaled
y = np.array(test_labels)
assert len(y_hat) == len(y)

mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)


pred_margin_mask = np.abs(y_pred_scaled) >= 0.02

print(f"\nWith prediction margin mask:")
y_hat = y_pred_scaled[pred_margin_mask]
y = np.array(test_labels)[pred_margin_mask]
print(f"\nLength of prediction margin masked evaluation set: {len(y_hat)}")
mae, rw_mae, TP, TN = get_metrics(y_hat, y)
metrics_dict = dict(mae=[mae], mae_rw=[rw_mae], TP=[TP], TN=[TN])
metrics_df = pd.DataFrame.from_dict(metrics_dict)
print(metrics_df)



In [None]:
##############
# Import stocks
stocks = pd.read_pickle("data/stocks.pkl").reset_index()
# TODO: Do same transformations as import in asset_data_preprocessor


# Analysis of single forecast: 

In [None]:
idx = 11
tmp = test_dat.loc[pred_margin_mask]
row = tmp.iloc[idx, :]
print(row)
# print(f"Fcst: {row.Fcst}")
# print(f"Target: {row.IntradayReturn}")

print(row.body[:750])
pr_time, ticker, fcst = row[["Date", "ID", "Fcst"]]
df = stocks.query("(Date >= @pr_time) & (ID == @ticker)").head(30)
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

In [None]:
print(row)

# Trading Performance

In [None]:
tmp = test_dat.loc[pred_margin_mask].dropna()

In [None]:
tmp.head()

In [None]:
trades = np.sign(tmp["Fcst"])*tmp["CloseToCloseReturn"]

In [None]:
trades.mean()

In [None]:
px.scatter(tmp, x=MODEL_CONFIG.target_col_name, y="Fcst")