In [1]:
import sys
from pathlib import Path

project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

import os
os.chdir(project_root)

from dotenv import load_dotenv
load_dotenv()

In [2]:
import torch
from torch.utils.data import DataLoader

from src.data.pipeline_with_news import get_datasets_with_news
from src.models.transformer_model_with_news import StockTransformerWithNews
from src.training.trainer_with_news import TrainerWithNews
from src.utils.config import load_config

config = load_config()

# Enable news in config
config.data.use_news = True

print(f"News enabled: {config.data.use_news}")
print(f"Tickers: {config.data.tickers}")

In [3]:
# Load datasets with news
print("Loading datasets with news embeddings...")
train_dataset, val_dataset, test_dataset, feature_columns = get_datasets_with_news(
    config=config,
    use_news_cache=True,
    force_refresh_news=False  # Set to True to refresh news cache
)

print(f" Datasets loaded!")
print(f"  Train: {len(train_dataset)} samples")
print(f"  Val: {len(val_dataset)} samples")
print(f"  Test: {len(test_dataset)} samples")
print(f"  Features: {len(feature_columns)}")

In [4]:
# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config.training.batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=False,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.training.batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
)

# Test batch to check news embeddings
sample_batch = next(iter(train_loader))
print(f"Batch format: {len(sample_batch)} items")
if len(sample_batch) == 3:
    x, news_emb, y = sample_batch
    print(f"  x shape: {x.shape}")
    print(f"  news_emb shape: {news_emb.shape if news_emb is not None else None}")
    print(f"  y shape: {y.shape}")

In [5]:
# Create enhanced model with news
model = StockTransformerWithNews(
    input_dim=len(feature_columns),
    news_embedding_dim=768,  # FinBERT embedding dimension
    d_model=config.model.d_model,
    n_heads=config.model.n_heads,
    n_layers=config.model.n_layers,
    d_ff=config.model.d_ff,
    dropout=config.model.dropout,
    activation=config.model.activation,
    prediction_horizon=config.data.prediction_horizon,
    news_fusion_method="concat",  # or "add"
)

print(f" Model created!")
print(f"  Input dim: {len(feature_columns)}")
print(f"  News embedding dim: 768")
print(f"  Fusion method: concat")

In [6]:
# Create trainer
trainer = TrainerWithNews(
    model=model,
    config=config,
    train_loader=train_loader,
    val_loader=val_loader,
)

print(f" Trainer created!")

In [7]:
# Train model
history = trainer.train()

print(f"\n{'='*60}")
print(f"Обучението завърши!")
print(f"Best validation loss: {history['best_val_loss']:.6f}")
print(f"{'='*60}")

In [8]:
# Save enhanced model
from src.utils import config as _cfg

checkpoint_name = "best_model_with_news.pt"
checkpoint_path = _cfg.PROJECT_ROOT / config.paths.models_dir / checkpoint_name
checkpoint_path.parent.mkdir(parents=True, exist_ok=True)

print(f"\nЗапазване на модела в: {checkpoint_path}")
torch.save({
    'epoch': len(history['train_losses']) - 1,
    'model_state_dict': model.state_dict(),
    'score': history['best_val_loss'],
    'model_type': 'StockTransformerWithNews',
    'config': config,
}, checkpoint_path)

if checkpoint_path.exists():
    import time
    file_size = checkpoint_path.stat().st_size / (1024 * 1024)  # MB
    mtime = time.ctime(checkpoint_path.stat().st_mtime)
    print(f" Файлът е запазен успешно!")
    print(f"  Размер: {file_size:.2f} MB")
    print(f"  Модифициран: {mtime}")