In [None]:
import os
import sys
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

from pathlib import Path
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import mean_squared_error

from data_loader import *

## Settings

In [None]:
num_train = 60
num_test = 20
num_total = num_train + num_test

window_size = 10
kernel_size = 5

writer = SummaryWriter(log_dir=f"runs/fully_connected/{datetime.now().strftime('%Y%m%d_%H%M%S')}")


## Data

In [None]:
X1, y1 = load_stock("AMZN")
X2, y2 = load_stock("TSLA")
X3, y3 = load_stock("AAPL")
X4, y4 = load_stock("NVDA")
X5, y5 = load_stock("AMGN")
X6, y6 = load_stock("AAL")
X7, y7 = load_stock("LUV")
X8, y8 = load_stock("JNJ")
X9, y9 = load_stock("UNH")
X10, y10 = load_stock("JEPI")
X100, y100 = load_stock("QQQ")
X101, y101 = load_stock("SPY")

X = np.stack([X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X100, X101])
X = X.reshape((X.shape[0] * X.shape[1], X.shape[-1]))
y = np.stack([y1, y2, y3, y4, y5, y6, y7, y8, y9, y10])


X_train, y_train = setup_cov_tensors(X[:, -num_total: -num_test], y[:, -num_total: -num_test], window_size)
X_test, y_test = setup_cov_tensors(X[:, -num_test-window_size:], y[:, -num_test-window_size:], window_size)


In [None]:
print("Input", X_train.shape)
print("Out", y_train.shape)

## Model

In [None]:
# Define the 1D convolutional model
class FullyConnectedStockReturnPredictor(nn.Module):
    def __init__(self, neteork_in_size, neteork_out_size, kernel_size, sequence_length):
        super(FullyConnectedStockReturnPredictor, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=neteork_in_size, out_channels=30, kernel_size=kernel_size)
        self.conv2 = nn.Conv1d(in_channels=30, out_channels=10, kernel_size=kernel_size)
        num_conv_layers = 2
        self.fc1 = nn.Linear(10 * (sequence_length - num_conv_layers * kernel_size + num_conv_layers * 1), neteork_out_size)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.tanh(x)
        x = self.conv2(x)
        x = torch.tanh(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x


## Train

In [None]:
# Instantiate the model, define loss and optimizer
model = FullyConnectedStockReturnPredictor(X.shape[0], y.shape[0], kernel_size, window_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)

def train(epoch):
    model.train()
    optimizer.zero_grad()
    inputs_batch = X_train
    outputs = model(inputs_batch)

    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 500 == 0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}')
        writer.add_scalar('train/loss', loss.item(), epoch)


def test(X, y):
    model.eval()

    with torch.no_grad():
        test_outputs = model(X).squeeze()
        test_loss = mean_squared_error(y.numpy(), test_outputs.numpy())
        print(f'Test Loss (MSE): {test_loss:.4f}')

        return test_outputs, y

In [None]:
# Training loop

for epoch in range(4000):
    train(epoch)
    if epoch % 100 == 0:
        test(X_test, y_test)

writer.close()

## Plot

In [None]:
pred, obj = test(X_test, y_test)

In [None]:
stock_id = 9
pred = np.array(test_outputs[:, stock_id])
#pred[np.abs(pred) < 0.01] = 0
obj = np.array(y_test[stock_id])

exp = np.sqrt(np.dot(pred, pred) / len(pred))
real = (np.dot(pred, obj) / len(pred)) / exp

print("exp", exp, "real", real)
print("Accuracy", np.sum((pred * obj) > 0)/np.sum(pred != 0))

obj_sort = np.argsort(obj)
plt.plot(np.cumsum(pred[obj_sort]), label="pred")
plt.plot(np.cumsum(obj[obj_sort]), label="obj")

plt.legend()

In [None]:
plt.plot((pred), label="pred")
plt.plot((obj), label="obj")

plt.legend()