In [34]:
import os
import sys
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pickle
import numpy as np
import pandas as pd

# Cesta ke skriptům
sys.path.append(os.path.abspath("../scripts"))
from utils import Normalizer, create_multifeature_lstm_dataset

In [35]:
WINDOW_SIZE = 20
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
PATIENCE = 5

results = []  # pro ukládání výsledků

In [36]:
with open("../data/preprocessed_datasets.pkl", "rb") as f:
    vsechny_datasety = pickle.load(f)

print(f"Načteno {len(vsechny_datasety)} datasetů")

Načteno 18 datasetů


In [37]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.3):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

In [38]:
for data in vsechny_datasety:
    ticker = data["ticker"]
    sektor = data["sector"]

    print(f"Trénuji model pro {ticker} ({sektor})")

    x_train = torch.tensor(data["x_train"], dtype=torch.float32)
    y_train = torch.tensor(data["y_train"], dtype=torch.float32)
    x_val = torch.tensor(data["x_val"], dtype=torch.float32)
    y_val = torch.tensor(data["y_val"], dtype=torch.float32)

    model = LSTMModel(input_size=x_train.shape[2])
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(TensorDataset(x_val, y_val), batch_size=BATCH_SIZE)

    best_val_loss = float("inf")
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for xb, yb in train_loader:
            pred = model(xb).squeeze()
            loss = loss_fn(pred, yb.view(-1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                pred = model(xb).squeeze()
                loss = loss_fn(pred, yb.view(-1))
                val_loss += loss.item()
        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), f"../results/best_model_{ticker}.pth")
        else:
            patience_counter += 1

        if patience_counter >= PATIENCE:
            print(f"Early stopping pro {ticker}")
            break

    results.append({
        "ticker": ticker,
        "sector": sektor,
        "train_loss": round(train_loss, 5),
        "val_loss": round(best_val_loss, 5)
    })

Trénuji model pro NVDA (Technology)
Epoch 1/50 - Train Loss: 0.0037 - Val Loss: 0.0629
Epoch 2/50 - Train Loss: 0.0002 - Val Loss: 0.0081
Epoch 3/50 - Train Loss: 0.0001 - Val Loss: 0.0154
Epoch 4/50 - Train Loss: 0.0001 - Val Loss: 0.0196
Epoch 5/50 - Train Loss: 0.0001 - Val Loss: 0.0214
Epoch 6/50 - Train Loss: 0.0001 - Val Loss: 0.0163
Epoch 7/50 - Train Loss: 0.0001 - Val Loss: 0.0196
Early stopping pro NVDA
Trénuji model pro MSFT (Technology)
Epoch 1/50 - Train Loss: 0.0268 - Val Loss: 0.0723
Epoch 2/50 - Train Loss: 0.0013 - Val Loss: 0.0038
Epoch 3/50 - Train Loss: 0.0007 - Val Loss: 0.0084
Epoch 4/50 - Train Loss: 0.0006 - Val Loss: 0.0056
Epoch 5/50 - Train Loss: 0.0006 - Val Loss: 0.0044
Epoch 6/50 - Train Loss: 0.0006 - Val Loss: 0.0042
Epoch 7/50 - Train Loss: 0.0005 - Val Loss: 0.0039
Early stopping pro MSFT
Trénuji model pro AAPL (Technology)
Epoch 1/50 - Train Loss: 0.0485 - Val Loss: 0.0125
Epoch 2/50 - Train Loss: 0.0015 - Val Loss: 0.0014
Epoch 3/50 - Train Loss: 0.0

In [39]:
results_df = pd.DataFrame(results)
results_df.to_csv("../results/all_model_results.csv", index=False)
print("Výsledky uloženy do all_model_results.csv")
results_df.head()

Výsledky uloženy do all_model_results.csv


Unnamed: 0,ticker,sector,train_loss,val_loss
0,NVDA,Technology,0.00011,0.00807
1,MSFT,Technology,0.00054,0.00383
2,AAPL,Technology,0.00067,0.00144
3,META,Communication,0.00039,0.00181
4,DIS,Communication,0.00089,0.00029
