In [1]:
import sys
from pathlib import Path

# Add parent directory to Python path
module_path = str(Path().absolute().parent)
if module_path not in sys.path:
    sys.path.append(module_path)

In [1]:
import os
import sys
import yaml
from datetime import datetime
import logging

# Add the project root to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

# Import the training script
from scripts.train_model import train_model

In [2]:
# Load configuration
config_path = os.path.join(project_root, 'training', 'configs', 'config.yaml')
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# You can modify config parameters here if needed
config['training']['num_epochs'] = 20  # Example modification
config['training']['batch_size'] = 64  # Example modification

In [3]:
# Setup logging
log_dir = os.path.join(project_root, 'logs')
os.makedirs(log_dir, exist_ok=True)

log_file = os.path.join(log_dir, f'training_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [4]:
# Train the model
try:
    logger.info("Starting model training...")
    results = train_model(config)  # No args needed when called programmatically
    logger.info("Training completed successfully!")

    # Access the results
    history = results['history']
    test_metrics = results['test_metrics']
    model = results['model']
    trainer = results['trainer']

except Exception as e:
    logger.error(f"Error during training: {str(e)}")
    raise

2025-03-31 09:47:28,806 - INFO - Starting model training...
2025-03-31 09:47:28,821 - INFO - Logging setup complete. Log file: logs\training_20250331-094728.log


2025-03-31 09:47:28,821 - root - INFO - Logging setup complete. Log file: logs\training_20250331-094728.log






2025-03-31 09:47:28,827 - INFO - Starting Hybrid Music Recommender training at 2025-03-31 09:47:28


2025-03-31 09:47:28,827 - root - INFO - Starting Hybrid Music Recommender training at 2025-03-31 09:47:28










2025-03-31 09:47:28,843 - INFO - TRAINING CONFIGURATION


2025-03-31 09:47:28,843 - root - INFO - TRAINING CONFIGURATION






2025-03-31 09:47:28,853 - INFO - Model Architecture:


2025-03-31 09:47:28,853 - root - INFO - Model Architecture:


2025-03-31 09:47:28,854 - INFO -   embedding_dim: 32


2025-03-31 09:47:28,854 - root - INFO -   embedding_dim: 32


2025-03-31 09:47:28,862 - INFO -   hidden_dims: [128, 64, 32]


2025-03-31 09:47:28,862 - root - INFO -   hidden_dims: [128, 64, 32]


2025-03-31 09:47:28,864 - INFO -   prediction_dims: [64, 32]


2025-03-31 09:47:28,864 - root - INFO -   prediction_dims: [64, 32]


2025-03-31 09:47:28,870 - INFO -   dropout: 0.2


2025-03-31 09:47:28,870 - root - INFO -   dropout: 0.2


2025-03-31 09:47:28,873 - INFO -   final_layer_size: 16


2025-03-31 09:47:28,873 - root - INFO -   final_layer_size: 16


2025-03-31 09:47:28,876 - INFO -   user_tower:


2025-03-31 09:47:28,876 - root - INFO -   user_tower:


2025-03-31 09:47:28,876 - INFO -     hidden_layers: [128, 64, 32]


2025-03-31 09:47:28,876 - root - INFO -     hidden_layers: [128, 64, 32]


2025-03-31 09:47:28,884 - INFO -     dropout: 0.2


2025-03-31 09:47:28,884 - root - INFO -     dropout: 0.2


2025-03-31 09:47:28,891 - INFO -     activation: relu


2025-03-31 09:47:28,891 - root - INFO -     activation: relu


2025-03-31 09:47:28,894 - INFO -   item_tower:


2025-03-31 09:47:28,894 - root - INFO -   item_tower:


2025-03-31 09:47:28,897 - INFO -     hidden_layers: [128, 64, 32]


2025-03-31 09:47:28,897 - root - INFO -     hidden_layers: [128, 64, 32]


2025-03-31 09:47:28,904 - INFO -     dropout: 0.2


2025-03-31 09:47:28,904 - root - INFO -     dropout: 0.2


2025-03-31 09:47:28,905 - INFO -     activation: relu


2025-03-31 09:47:28,905 - root - INFO -     activation: relu


2025-03-31 09:47:28,909 - INFO - Training Parameters:


2025-03-31 09:47:28,909 - root - INFO - Training Parameters:


2025-03-31 09:47:28,914 - INFO -   learning_rate: 0.001


2025-03-31 09:47:28,914 - root - INFO -   learning_rate: 0.001


2025-03-31 09:47:28,917 - INFO -   weight_decay: 0.0001


2025-03-31 09:47:28,917 - root - INFO -   weight_decay: 0.0001


2025-03-31 09:47:28,922 - INFO -   num_epochs: 20


2025-03-31 09:47:28,922 - root - INFO -   num_epochs: 20


2025-03-31 09:47:28,928 - INFO -   patience: 5


2025-03-31 09:47:28,928 - root - INFO -   patience: 5


2025-03-31 09:47:28,932 - INFO -   batch_size: 64


2025-03-31 09:47:28,932 - root - INFO -   batch_size: 64


2025-03-31 09:47:28,936 - INFO -   num_workers: 4


2025-03-31 09:47:28,936 - root - INFO -   num_workers: 4


2025-03-31 09:47:28,940 - INFO -   checkpoint_dir: models/checkpoints


2025-03-31 09:47:28,940 - root - INFO -   checkpoint_dir: models/checkpoints


2025-03-31 09:47:28,944 - INFO -   log_dir: logs


2025-03-31 09:47:28,944 - root - INFO -   log_dir: logs


2025-03-31 09:47:28,947 - INFO -   optimizer: adam


2025-03-31 09:47:28,947 - root - INFO -   optimizer: adam


2025-03-31 09:47:28,954 - INFO -   loss_function: binary_cross_entropy


2025-03-31 09:47:28,954 - root - INFO -   loss_function: binary_cross_entropy


2025-03-31 09:47:28,959 - INFO -   class_weights: [0.3, 0.7]


2025-03-31 09:47:28,959 - root - INFO -   class_weights: [0.3, 0.7]


2025-03-31 09:47:28,960 - INFO -   lr_scheduler:


2025-03-31 09:47:28,960 - root - INFO -   lr_scheduler:


2025-03-31 09:47:28,967 - INFO -     use: True


2025-03-31 09:47:28,967 - root - INFO -     use: True


2025-03-31 09:47:28,967 - INFO -     factor: 0.5


2025-03-31 09:47:28,967 - root - INFO -     factor: 0.5


2025-03-31 09:47:28,973 - INFO -     patience: 3


2025-03-31 09:47:28,973 - root - INFO -     patience: 3


2025-03-31 09:47:28,977 - INFO -   early_stopping_patience: 5


2025-03-31 09:47:28,977 - root - INFO -   early_stopping_patience: 5


2025-03-31 09:47:28,985 - INFO - Evaluation Metrics:


2025-03-31 09:47:28,985 - root - INFO - Evaluation Metrics:


2025-03-31 09:47:28,987 - INFO -   metrics: ['accuracy', 'precision', 'recall', 'f1', 'auc', 'map', 'ndcg']


2025-03-31 09:47:28,987 - root - INFO -   metrics: ['accuracy', 'precision', 'recall', 'f1', 'auc', 'map', 'ndcg']


2025-03-31 09:47:28,992 - INFO -   k_values: [5, 10, 20, 50, 100]


2025-03-31 09:47:28,992 - root - INFO -   k_values: [5, 10, 20, 50, 100]


2025-03-31 09:47:28,996 - INFO -   save_predictions: True


2025-03-31 09:47:28,996 - root - INFO -   save_predictions: True


2025-03-31 09:47:29,002 - INFO -   save_metrics: True


2025-03-31 09:47:29,002 - root - INFO -   save_metrics: True


2025-03-31 09:47:29,008 - INFO -   cohort_analysis: True


2025-03-31 09:47:29,008 - root - INFO -   cohort_analysis: True


2025-03-31 09:47:29,008 - INFO -   metrics_dir: evaluation/metrics


2025-03-31 09:47:29,008 - root - INFO -   metrics_dir: evaluation/metrics


2025-03-31 09:47:29,013 - INFO -   predictions_dir: evaluation/predictions


2025-03-31 09:47:29,013 - root - INFO -   predictions_dir: evaluation/predictions






2025-03-31 09:47:29,024 - INFO - Using device: cpu


2025-03-31 09:47:29,024 - root - INFO - Using device: cpu


2025-03-31 09:47:29,028 - INFO - Loading and preprocessing data...


2025-03-31 09:47:29,028 - root - INFO - Loading and preprocessing data...


2025-03-31 09:47:29,032 - ERROR - Data file not found: data/train.csv


2025-03-31 09:47:29,032 - utils.data_utils - ERROR - Data file not found: data/train.csv


2025-03-31 09:47:29,032 - ERROR - Error loading data: Data file not found: data/train.csv


2025-03-31 09:47:29,032 - utils.data_utils - ERROR - Error loading data: Data file not found: data/train.csv


2025-03-31 09:47:29,040 - ERROR - Error loading/preprocessing data: Data file not found: data/train.csv
Traceback (most recent call last):
  File "c:\Users\mecha\Documents\lhydra-hybrid\scripts\train_model.py", line 106, in train_model
    interactions_df = load_data(data_path)
  File "c:\Users\mecha\Documents\lhydra-hybrid\utils\data_utils.py", line 46, in load_data
    raise FileNotFoundError(f"Data file not found: {data_path}")
FileNotFoundError: Data file not found: data/train.csv


2025-03-31 09:47:29,040 - root - ERROR - Error loading/preprocessing data: Data file not found: data/train.csv
Traceback (most recent call last):
  File "c:\Users\mecha\Documents\lhydra-hybrid\scripts\train_model.py", line 106, in train_model
    interactions_df = load_data(data_path)
  File "c:\Users\mecha\Documents\lhydra-hybrid\utils\data_utils.py", line 46, in load_data
    raise FileNotFoundError(f"Data file not found: {data_path}")
FileNotFoundError: Data file not found: data/train.csv


2025-03-31 09:47:29,048 - ERROR - Error during training: Data file not found: data/train.csv


2025-03-31 09:47:29,048 - __main__ - ERROR - Error during training: Data file not found: data/train.csv


FileNotFoundError: Data file not found: data/train.csv

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Plot training history
plt.figure(figsize=(12, 6))
history.plot(x='epoch', y=['train_loss', 'val_loss']) # Assumes 'history' is plot-compatible (e.g., DataFrame)
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Plot test metrics
plt.figure(figsize=(12, 6))
metrics_df = pd.DataFrame([test_metrics]) # Assumes 'test_metrics' is a dict
sns.barplot(data=metrics_df.melt(), x='variable', y='value')
plt.xticks(rotation=45)
plt.title('Test Set Metrics')
plt.tight_layout()
plt.show()