In [None]:
import pandas as pd
import numpy as np
import torch
import pickle
from tqdm import tqdm
import sys
sys.path.append('..')

from config import CONFIG
from src.models.baseline import RandomModel, PopularityModel
from src.models.matrix_factorization import MatrixFactorization
from src.models.ncf import NCF
from src.models.hybrid_nn import HybridNN

# %%
# Load processed data
print("Loading data...")
train_data = pd.read_csv('../data/processed/train.csv')
val_data = pd.read_csv('../data/processed/val.csv')
user_features = pd.read_csv('../data/processed/user_features.csv')
item_features = pd.read_csv('../data/processed/item_features.csv')

with open('../data/processed/encoders.pkl', 'rb') as f:
    encoders = pickle.load(f)

n_users = encoders['n_users']
n_items = encoders['n_items']
print(f"Users: {n_users}, Items: {n_items}")

# %%
# Prepare data for PyTorch
train_users = torch.LongTensor(train_data['user_id'].values)
train_items = torch.LongTensor(train_data['item_id'].values)
train_ratings = torch.FloatTensor(train_data['rating'].values)

val_users = torch.LongTensor(val_data['user_id'].values)
val_items = torch.LongTensor(val_data['item_id'].values)
val_ratings = torch.FloatTensor(val_data['rating'].values)

# %%
# 1. Train Random Baseline
print("\n" + "="*50)
print("1. RANDOM BASELINE")
print("="*50)
random_model = RandomModel(n_items)
print("✓ Random model ready (no training needed)")

# %%
# 2. Train Popularity Baseline
print("\n" + "="*50)
print("2. POPULARITY BASELINE")
print("="*50)
popularity_model = PopularityModel(n_items)
popularity_model.fit(train_data)
print("✓ Popularity model trained")

# %%
# 3. Train Matrix Factorization
print("\n" + "="*50)
print("3. MATRIX FACTORIZATION")
print("="*50)
mf_model = MatrixFactorization(n_users, n_items, CONFIG['embedding_dim'])
mf_model.fit(train_users, train_items, train_ratings,
             val_users, val_items, val_ratings,
             epochs=CONFIG['epochs'], lr=CONFIG['learning_rate'])
print("✓ Matrix Factorization trained")

# %%
# 4. Train Standard NCF
print("\n" + "="*50)
print("4. NEURAL COLLABORATIVE FILTERING (NCF)")
print("="*50)
ncf_model = NCF(n_users, n_items, CONFIG['embedding_dim'], CONFIG['hidden_layers'])
ncf_model.fit(train_users, train_items, train_ratings,
              val_users, val_items, val_ratings,
              epochs=CONFIG['epochs'], lr=CONFIG['learning_rate'])
print("✓ NCF trained")

# %%
# 5. Train Hybrid Neural Network (Our Model)
print("\n" + "="*50)
print("5. HYBRID NEURAL NETWORK (PROPOSED)")
print("="*50)

# Prepare demographic and content features
user_feat_tensor = torch.FloatTensor(user_features.drop('user_id', axis=1).values)
item_feat_tensor = torch.FloatTensor(item_features.drop('item_id', axis=1).values)

hybrid_model = HybridNN(
    n_users=n_users,
    n_items=n_items,
    user_feat_dim=user_feat_tensor.shape[1],
    item_feat_dim=item_feat_tensor.shape[1],
    embedding_dim=CONFIG['embedding_dim'],
    hidden_layers=CONFIG['hidden_layers'],
    dropout=CONFIG['dropout_rate']
)

hybrid_model.fit(
    train_users, train_items, train_ratings,
    user_feat_tensor, item_feat_tensor,
    val_users, val_items, val_ratings,
    epochs=CONFIG['epochs'], lr=CONFIG['learning_rate']
)
print("✓ Hybrid NN trained")

# %%
# Save all models
print("\n" + "="*50)
print("SAVING MODELS")
print("="*50)

models = {
    'random': random_model,
    'popularity': popularity_model,
    'mf': mf_model,
    'ncf': ncf_model,
    'hybrid_nn': hybrid_model
}

with open('../experiments/results/trained_models.pkl', 'wb') as f:
    pickle.dump(models, f)

print("✓ All models saved to ../experiments/results/trained_models.pkl")
print("\n✓ Training complete!")