# Model Training
Train Two-Tower and Deep Ranking models

In [None]:
import pandas as pd
import numpy as np
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

# Import our custom modules
from models import TwoTowerModel, PairwiseDeepRankingModel, FeatureEncoder
from datasets import (
    InvestorDealDataset, PairwiseRankingDataset, 
    collate_fn, pairwise_collate_fn
)

# Set seeds
np.random.seed(42)
torch.manual_seed(42)

## Load Data

In [None]:
interactions_df = pd.read_csv('data/enhanced_interactions.csv', parse_dates=['timestamp'])
investor_df = pd.read_csv('data/investor_features.csv')
deal_df = pd.read_csv('data/deal_features.csv')

print(f"Loaded {len(interactions_df)} interactions")
print(f"Loaded {len(investor_df)} investors")
print(f"Loaded {len(deal_df)} deals")

## Prepare Feature Encoder

In [None]:
encoder = FeatureEncoder()
encoder.fit(investor_df, deal_df)

# Create models directory
os.makedirs('models', exist_ok=True)
os.makedirs('models/checkpoints', exist_ok=True)

# Save encoder for later use
import pickle
with open('models/feature_encoder.pkl', 'wb') as f:
    pickle.dump(encoder, f)

print("Feature dimensions:", encoder.feature_dims)

## Train-Test Split

In [None]:
# Sort by timestamp and take most recent as test
interactions_df = interactions_df.sort_values('timestamp')
train_interactions = interactions_df.iloc[:-100]  # All but last 100
test_interactions = interactions_df.iloc[-100:]   # Last 100

print(f"Train: {len(train_interactions)}, Test: {len(test_interactions)}")

## Train Two-Tower Model

In [None]:
# Create datasets
train_dataset = InvestorDealDataset(
    train_interactions, investor_df, deal_df, 
    deal_df['dealId'].values, negative_samples=4
)
val_dataset = InvestorDealDataset(
    test_interactions, investor_df, deal_df,
    deal_df['dealId'].values, negative_samples=4
)

# Create dataloaders
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=128, shuffle=True, 
    collate_fn=collate_fn, num_workers=4 if torch.cuda.is_available() else 0
)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=128, shuffle=False,
    collate_fn=collate_fn, num_workers=4 if torch.cuda.is_available() else 0
)

In [None]:
# Initialize Two-Tower model
two_tower_model = TwoTowerModel(
    n_investors=len(investor_df),
    n_deals=len(deal_df),
    feature_dims=encoder.feature_dims
)

# Setup callbacks
checkpoint_callback = ModelCheckpoint(
    dirpath='models/checkpoints',
    filename='two-tower-{epoch:02d}-{val_loss:.3f}',
    save_top_k=1,
    monitor='val_loss',
    mode='min'
)

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min'
)

In [None]:
# Train Two-Tower
trainer = pl.Trainer(
    max_epochs=20,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    callbacks=[checkpoint_callback, early_stop_callback],
    enable_progress_bar=True
)

trainer.fit(two_tower_model, train_loader, val_loader)

## Plot Two-Tower Training Curves

In [None]:
# Plot training curves
plt.figure(figsize=(10, 4))
plt.plot(two_tower_model.train_losses, label='Train Loss')
plt.plot(two_tower_model.val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Two-Tower Model Loss')
plt.legend()
plt.grid(True)
plt.savefig('models/two_tower_loss.png')
plt.show()

## Train Deep Ranking Model with Pairwise Loss

In [None]:
# Create pairwise datasets
train_pairwise = PairwiseRankingDataset(
    train_interactions, investor_df, deal_df,
    n_pairs_per_positive=5
)
val_pairwise = PairwiseRankingDataset(
    test_interactions, investor_df, deal_df,
    n_pairs_per_positive=3
)

# Create dataloaders
train_pairwise_loader = torch.utils.data.DataLoader(
    train_pairwise, batch_size=128, shuffle=True,
    collate_fn=pairwise_collate_fn, num_workers=4 if torch.cuda.is_available() else 0
)
val_pairwise_loader = torch.utils.data.DataLoader(
    val_pairwise, batch_size=128, shuffle=False,
    collate_fn=pairwise_collate_fn, num_workers=4 if torch.cuda.is_available() else 0
)

In [None]:
# Initialize Deep Ranking model
deep_ranking_model = PairwiseDeepRankingModel(
    n_investors=len(investor_df),
    n_deals=len(deal_df),
    feature_dims=encoder.feature_dims
)

# Setup callbacks
dr_checkpoint_callback = ModelCheckpoint(
    dirpath='models/checkpoints',
    filename='deep-ranking-{epoch:02d}-{val_loss:.3f}',
    save_top_k=1,
    monitor='val_loss',
    mode='min'
)

In [None]:
# Train Deep Ranking
dr_trainer = pl.Trainer(
    max_epochs=20,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    callbacks=[dr_checkpoint_callback, early_stop_callback],
    enable_progress_bar=True
)

dr_trainer.fit(deep_ranking_model, train_pairwise_loader, val_pairwise_loader)

## Save Training Summary

In [None]:
summary = {
    'two_tower_best_ckpt': checkpoint_callback.best_model_path,
    'deep_ranking_best_ckpt': dr_checkpoint_callback.best_model_path,
    'n_investors': len(investor_df),
    'n_deals': len(deal_df),
    'feature_dims': encoder.feature_dims
}

import json
with open('models/training_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("Training completed!")
print(f"Two-Tower best checkpoint: {summary['two_tower_best_ckpt']}")
print(f"Deep Ranking best checkpoint: {summary['deep_ranking_best_ckpt']}")