# Model Training for Movie Recommendation System

This notebook covers:
1. Collaborative Filtering models
2. Content-based filtering
3. Hybrid approaches
4. Model evaluation and comparison

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import sys
sys.path.append('../src')

# Custom modules
from models.collaborative_filtering import CollaborativeFiltering
from models.content_based import ContentBasedFiltering
from models.hybrid import HybridRecommender
from models.evaluation import ModelEvaluator

## 1. Load Processed Data

In [None]:
# Load processed data
train_data = pd.read_csv('../data/processed/train_data.csv')
val_data = pd.read_csv('../data/processed/val_data.csv')
user_features = pd.read_csv('../data/processed/user_features.csv')
movie_features = pd.read_csv('../data/processed/movie_features.csv')

# Load user-item matrix
with open('../data/processed/user_item_matrix.pkl', 'rb') as f:
    user_item_matrix = pickle.load(f)

print("Data loaded successfully!")

## 2. Collaborative Filtering Models

In [None]:
# Initialize collaborative filtering model
cf_model = CollaborativeFiltering()

# Train matrix factorization model
cf_model.train_matrix_factorization(train_data)

# Train user-based collaborative filtering
cf_model.train_user_based(user_item_matrix)

# Train item-based collaborative filtering
cf_model.train_item_based(user_item_matrix)

print("Collaborative filtering models trained!")

## 3. Content-Based Filtering

In [None]:
# Initialize content-based filtering model
cb_model = ContentBasedFiltering()

# Train content-based model
cb_model.train(movie_features)

print("Content-based filtering model trained!")

## 4. Hybrid Model

In [None]:
# Initialize hybrid recommender
hybrid_model = HybridRecommender(cf_model, cb_model)

# Train hybrid model
hybrid_model.train(train_data, movie_features, user_features)

print("Hybrid model trained!")

## 5. Model Evaluation

In [None]:
# Initialize model evaluator
evaluator = ModelEvaluator()

# Evaluate models
models = {
    'Collaborative Filtering': cf_model,
    'Content-Based': cb_model,
    'Hybrid': hybrid_model
}

results = {}
for name, model in models.items():
    rmse, mae, precision, recall = evaluator.evaluate(model, val_data)
    results[name] = {
        'RMSE': rmse,
        'MAE': mae,
        'Precision@10': precision,
        'Recall@10': recall
    }

# Display results
results_df = pd.DataFrame(results).T
print("Model Evaluation Results:")
print(results_df)

## 6. Save Trained Models

In [None]:
# Save trained models
with open('../src/models/trained/cf_model.pkl', 'wb') as f:
    pickle.dump(cf_model, f)

with open('../src/models/trained/cb_model.pkl', 'wb') as f:
    pickle.dump(cb_model, f)

with open('../src/models/trained/hybrid_model.pkl', 'wb') as f:
    pickle.dump(hybrid_model, f)

print("Models saved successfully!")

## 7. Generate Sample Recommendations

In [None]:
# Generate recommendations for a sample user
sample_user_id = 1
recommendations = hybrid_model.recommend(sample_user_id, n_recommendations=10)

print(f"Top 10 recommendations for user {sample_user_id}:")
print(recommendations)