In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os
import signal
import sys
import pickle
import json

In [5]:
class AnimeRecommendationSystem:
    def __init__(self, model_path='anime_recommendation_model.h5',
                 checkpoint_path='model_checkpoint.h5',
                 training_state_path='training_state.json'):
        self.model_path = model_path
        self.checkpoint_path = checkpoint_path
        self.training_state_path = training_state_path
        self.model = None
        self.training_interrupted = False
        self.current_epoch = 0
        self.history = {'loss': [], 'val_loss': [], 'mae': [], 'val_mae': []}

        # Set up signal handler for graceful shutdown
        signal.signal(signal.SIGINT, self.signal_handler)

    def signal_handler(self, signum, frame):
        """Handle Ctrl+C gracefully by saving the model and training state"""
        print("\n\n🛑 Training interrupted! Saving model and training state...")
        self.training_interrupted = True

        if self.model is not None:
            try:
                # Save the current model
                self.model.save(self.checkpoint_path)
                print(f"✅ Model saved to {self.checkpoint_path}")

                # Save training state
                self.save_training_state()
                print(f"✅ Training state saved to {self.training_state_path}")

            except Exception as e:
                print(f"❌ Error saving model: {e}")

        print("👋 Exiting gracefully...")
        sys.exit(0)

    def save_training_state(self):
        """Save current training state to resume later"""
        training_state = {
            'current_epoch': self.current_epoch,
            'history': self.history,
            'interrupted': True
        }

        with open(self.training_state_path, 'w') as f:
            json.dump(training_state, f, indent=2)

    def load_training_state(self):
        """Load previous training state if it exists"""
        if os.path.exists(self.training_state_path):
            try:
                with open(self.training_state_path, 'r') as f:
                    state = json.load(f)
                    self.current_epoch = state.get('current_epoch', 0)
                    self.history = state.get('history', {'loss': [], 'val_loss': [], 'mae': [], 'val_mae': []})
                    was_interrupted = state.get('interrupted', False)

                if was_interrupted:
                    print(f"📁 Found interrupted training session at epoch {self.current_epoch}")
                    return True
            except Exception as e:
                print(f"⚠️ Error loading training state: {e}")

        return False

    def load_or_create_model(self, num_users, num_anime, embedding_dim=128):
        """Load existing model or create a new one"""

        # Check for interrupted training first
        was_interrupted = self.load_training_state()

        # Try to load the main model
        if os.path.exists(self.model_path):
            try:
                self.model = load_model(self.model_path, custom_objects={'mse': MeanSquaredError()})
                print("✅ Main model loaded successfully!")
                return True, False  # loaded, not_newly_created
            except Exception as e:
                print(f"⚠️ Error loading main model: {e}")

        # Try to load checkpoint if interrupted training exists
        if was_interrupted and os.path.exists(self.checkpoint_path):
            try:
                self.model = load_model(self.checkpoint_path, custom_objects={'mse': MeanSquaredError()})
                print("✅ Checkpoint model loaded successfully! Ready to resume training.")
                return True, True  # loaded, needs_training
            except Exception as e:
                print(f"⚠️ Error loading checkpoint: {e}")

        # Create new model if no existing model found
        print("🔨 Creating new model...")
        self.model = self.create_model(num_users, num_anime, embedding_dim)
        return False, True  # not_loaded, needs_training

    def create_model(self, num_users, num_anime, embedding_dim=128):
        """Create a new neural network model"""
        user_input = tf.keras.layers.Input(shape=(1,), name='user_input')
        anime_input = tf.keras.layers.Input(shape=(1,), name='anime_input')

        user_embedding = tf.keras.layers.Embedding(
            input_dim=num_users, output_dim=embedding_dim, name='user_embedding'
        )(user_input)
        anime_embedding = tf.keras.layers.Embedding(
            input_dim=num_anime, output_dim=embedding_dim, name='anime_embedding'
        )(anime_input)

        user_vec = tf.keras.layers.Flatten()(user_embedding)
        anime_vec = tf.keras.layers.Flatten()(anime_embedding)

        concat = tf.keras.layers.Concatenate()([user_vec, anime_vec])
        dense = tf.keras.layers.Dense(256, activation='relu')(concat)
        dense = tf.keras.layers.Dropout(0.2)(dense)  # Added dropout for regularization
        dense = tf.keras.layers.Dense(128, activation='relu')(dense)
        dense = tf.keras.layers.Dropout(0.2)(dense)
        output = tf.keras.layers.Dense(1, activation='linear')(dense)

        model = tf.keras.models.Model(inputs=[user_input, anime_input], outputs=output)
        model.compile(optimizer='adam', loss=MeanSquaredError(), metrics=['mae'])

        return model

    def train_model(self, X_train, y_train, X_test, y_test, epochs=50, batch_size=64):
        """Train the model with resume capability"""

        if self.model is None:
            raise ValueError("Model not initialized. Call load_or_create_model first.")

        # Calculate remaining epochs if resuming
        remaining_epochs = epochs - self.current_epoch
        if remaining_epochs <= 0:
            print("🎯 Training already completed!")
            return self.history

        print(f"🚀 Starting training from epoch {self.current_epoch + 1}/{epochs}")

        # Callbacks
        callbacks = [
            # Save best model during training
            ModelCheckpoint(
                filepath=self.checkpoint_path,
                monitor='val_loss',
                save_best_only=True,
                save_weights_only=False,
                verbose=1
            ),
            # Early stopping
            EarlyStopping(
                monitor='val_loss',
                patience=5,
                restore_best_weights=True,
                verbose=1
            ),
            # Custom callback to handle graceful interruption
            GracefulInterruptCallback(self)
        ]

        try:
            # Train the model
            history = self.model.fit(
                [X_train[:, 0], X_train[:, 1]],
                y_train,
                validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
                epochs=remaining_epochs,
                batch_size=batch_size,
                callbacks=callbacks,
                verbose=1,
                initial_epoch=self.current_epoch
            )

            # Update training history
            for key in history.history:
                if key in self.history:
                    self.history[key].extend(history.history[key])
                else:
                    self.history[key] = history.history[key]

            # Save the final model
            if not self.training_interrupted:
                self.model.save(self.model_path)
                print(f"✅ Training completed! Model saved to {self.model_path}")

                # Clean up checkpoint and training state
                self.cleanup_training_files()

        except KeyboardInterrupt:
            print("\n🛑 Training interrupted by user!")
            self.signal_handler(signal.SIGINT, None)

        return self.history

    def cleanup_training_files(self):
        """Clean up temporary training files after successful completion"""
        try:
            if os.path.exists(self.checkpoint_path):
                os.remove(self.checkpoint_path)
                print(f"🧹 Cleaned up checkpoint: {self.checkpoint_path}")

            if os.path.exists(self.training_state_path):
                os.remove(self.training_state_path)
                print(f"🧹 Cleaned up training state: {self.training_state_path}")
        except Exception as e:
            print(f"⚠️ Error cleaning up files: {e}")

    def recommend_nn_based(self, user_id, user_id_mapping, anime_id_mapping, anime_df, top_n=10):
        """Recommend top N anime for a given user using neural network-based rating prediction"""
        if self.model is None:
            raise ValueError("Model not loaded. Call load_or_create_model first.")

        if user_id not in user_id_mapping:
            print(f"❌ User ID {user_id} not found in training data.")
            return pd.DataFrame()

        mapped_user_id = user_id_mapping[user_id]
        all_anime = np.array(list(anime_id_mapping.values()))

        user_array = np.full_like(all_anime, mapped_user_id)
        predicted_ratings = self.model.predict([user_array, all_anime], verbose=0)

        # Sort anime by predicted rating
        top_indices = np.argsort(predicted_ratings[:, 0])[::-1][:top_n]

        # Map back to original anime IDs
        reverse_anime_mapping = {v: k for k, v in anime_id_mapping.items()}
        recommended_anime_ids = [reverse_anime_mapping[all_anime[idx]] for idx in top_indices]

        # Return recommendations with ratings
        recommendations = anime_df[anime_df['anime_id'].isin(recommended_anime_ids)].copy()

        # Add predicted ratings to the recommendations
        rating_dict = {reverse_anime_mapping[all_anime[idx]]: predicted_ratings[idx, 0]
                      for idx in top_indices}
        recommendations['predicted_rating'] = recommendations['anime_id'].map(rating_dict)

        # Sort by predicted rating and return relevant columns
        recommendations = recommendations.sort_values('predicted_rating', ascending=False)
        return recommendations[['name', 'genre', 'predicted_rating']].head(top_n)


class GracefulInterruptCallback(tf.keras.callbacks.Callback):
    """Custom callback to handle graceful interruption during training"""

    def __init__(self, recommendation_system):
        super().__init__()
        self.rec_system = recommendation_system

    def on_epoch_end(self, epoch, logs=None):
        # Update current epoch in the recommendation system
        self.rec_system.current_epoch = epoch + 1

        # Check if training was interrupted
        if self.rec_system.training_interrupted:
            print(f"\n🛑 Stopping training at epoch {epoch + 1}")
            self.model.stop_training = True


# --- Evaluation Metrics (Fixed) ---
def precision_at_k(recommended, relevant, k=5):
    """Calculate precision at k"""
    recommended_top_k = recommended[:k]
    relevant_in_top_k = sum([1 for anime in recommended_top_k if anime in relevant])
    return relevant_in_top_k / k if k > 0 else 0

def recall_at_k(recommended, relevant, k=5):
    """Calculate recall at k"""
    recommended_top_k = recommended[:k]
    relevant_in_top_k = sum([1 for anime in recommended_top_k if anime in relevant])
    return relevant_in_top_k / len(relevant) if len(relevant) > 0 else 0

def ndcg_at_k(recommended, relevant, k=5):
    """Calculate NDCG at k"""
    dcg = 0
    for i, anime in enumerate(recommended[:k]):
        if anime in relevant:
            dcg += 1 / np.log2(i + 2)

    idcg = sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])
    return dcg / idcg if idcg > 0 else 0


# --- Data Loading and Preprocessing ---
def load_and_prepare_data(anime_csv_path, rating_csv_path):
    """Load and prepare anime and rating data for training"""
    print("📁 Loading data...")

    # Load datasets
    anime_df = pd.read_csv(anime_csv_path)
    rating_df = pd.read_csv(rating_csv_path)

    print(f"✅ Loaded {len(anime_df)} anime and {len(rating_df)} ratings")

    # Create ID mappings
    anime_id_mapping = {anime_id: idx for idx, anime_id in enumerate(anime_df['anime_id'].unique())}
    user_id_mapping = {user_id: idx for idx, user_id in enumerate(rating_df['user_id'].unique())}

    print(f"📊 Found {len(user_id_mapping)} unique users and {len(anime_id_mapping)} unique anime")

    # Map IDs to indices
    rating_df['anime_id_mapped'] = rating_df['anime_id'].map(anime_id_mapping)
    rating_df['user_id_mapped'] = rating_df['user_id'].map(user_id_mapping)

    # Clean data - remove rows with invalid mappings
    initial_len = len(rating_df)
    rating_df = rating_df.dropna(subset=['anime_id_mapped', 'user_id_mapped'])
    rating_df = rating_df[(rating_df['anime_id_mapped'] >= 0) & (rating_df['user_id_mapped'] >= 0)]

    print(f"🧹 Cleaned data: {initial_len} -> {len(rating_df)} ratings")

    # Prepare features and target
    X = rating_df[['user_id_mapped', 'anime_id_mapped']].values
    y = rating_df['rating'].values

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print(f"📈 Training set: {len(X_train)} samples")
    print(f"📊 Test set: {len(X_test)} samples")

    return (X_train, X_test, y_train, y_test,
            len(user_id_mapping), len(anime_id_mapping),
            user_id_mapping, anime_id_mapping, anime_df)

In [6]:
def main():
    """Complete example usage of the enhanced recommendation system"""

    # File paths (update these to your actual file paths)
    anime_csv_path = '/content/cleaned_anime.csv'
    rating_csv_path = '/content/cleaned_rating.csv'

    try:
        # Load and prepare data
        (X_train, X_test, y_train, y_test,
         num_users, num_anime,
         user_id_mapping, anime_id_mapping, anime_df) = load_and_prepare_data(
            anime_csv_path, rating_csv_path
        )

        # Initialize the recommendation system
        rec_system = AnimeRecommendationSystem()

        # Load or create model
        print("🔍 Loading or creating model...")
        model_loaded, needs_training = rec_system.load_or_create_model(
            num_users, num_anime, embedding_dim=50
        )

        # Train if needed
        if needs_training:
            print("🎯 Starting training... (Press Ctrl+C to save and stop)")
            history = rec_system.train_model(
                X_train, y_train, X_test, y_test,
                epochs=50, batch_size=64
            )
            print("✅ Training completed!")
        else:
            print("✅ Model ready for recommendations!")

        # Make recommendations for a sample user
        sample_user_id = list(user_id_mapping.keys())[0]  # Get first user
        print(f"\n🎬 Getting recommendations for user {sample_user_id}:")

        recommendations = rec_system.recommend_nn_based(
            user_id=sample_user_id,
            user_id_mapping=user_id_mapping,
            anime_id_mapping=anime_id_mapping,
            anime_df=anime_df,
            top_n=10
        )

        print("\n📋 Top 10 Recommendations:")
        print(recommendations)

    except FileNotFoundError as e:
        print(f"❌ File not found: {e}")
        print("Please update the file paths in the main() function")
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()

In [7]:
main()

📁 Loading data...
✅ Loaded 12294 anime and 4708211 ratings
📊 Found 45003 unique users and 12294 unique anime
🧹 Cleaned data: 4708211 -> 4708204 ratings
📈 Training set: 3766563 samples
📊 Test set: 941641 samples
🔍 Loading or creating model...
🔨 Creating new model...
🎯 Starting training... (Press Ctrl+C to save and stop)
🚀 Starting training from epoch 1/50
Epoch 1/50
[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.8654 - mae: 0.9824
Epoch 1: val_loss improved from inf to 1.22916, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 3ms/step - loss: 1.8653 - mae: 0.9824 - val_loss: 1.2292 - val_mae: 0.7921
Epoch 2/50
[1m58845/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.1905 - mae: 0.7824
Epoch 2: val_loss improved from 1.22916 to 1.15926, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 3ms/step - loss: 1.1905 - mae: 0.7824 - val_loss: 1.1593 - val_mae: 0.7596
Epoch 3/50
[1m58841/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.1342 - mae: 0.7566
Epoch 3: val_loss improved from 1.15926 to 1.14277, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 4ms/step - loss: 1.1342 - mae: 0.7566 - val_loss: 1.1428 - val_mae: 0.7578
Epoch 4/50
[1m58846/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.0979 - mae: 0.7421
Epoch 4: val_loss improved from 1.14277 to 1.13884, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 4ms/step - loss: 1.0979 - mae: 0.7421 - val_loss: 1.1388 - val_mae: 0.7608
Epoch 5/50
[1m58849/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.0617 - mae: 0.7276
Epoch 5: val_loss improved from 1.13884 to 1.12512, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 3ms/step - loss: 1.0617 - mae: 0.7276 - val_loss: 1.1251 - val_mae: 0.7535
Epoch 6/50
[1m58851/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.0304 - mae: 0.7152
Epoch 6: val_loss improved from 1.12512 to 1.11645, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 3ms/step - loss: 1.0304 - mae: 0.7152 - val_loss: 1.1165 - val_mae: 0.7505
Epoch 7/50
[1m58843/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 1.0036 - mae: 0.7047
Epoch 7: val_loss improved from 1.11645 to 1.10614, saving model to model_checkpoint.h5




[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3ms/step - loss: 1.0036 - mae: 0.7047 - val_loss: 1.1061 - val_mae: 0.7408
Epoch 8/50
[1m58845/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 0.9755 - mae: 0.6937
Epoch 8: val_loss did not improve from 1.10614
[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3ms/step - loss: 0.9755 - mae: 0.6937 - val_loss: 1.1074 - val_mae: 0.7420
Epoch 9/50
[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9539 - mae: 0.6851
Epoch 9: val_loss did not improve from 1.10614
[1m58853/58853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 4ms/step - loss: 0.9539 - mae: 0.6851 - val_loss: 1.1161 - val_mae: 0.7417
Epoch 10/50
[1m58848/58853[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - loss: 0.9320 - mae: 0.6766
Epoch 10: val_loss did not improv



✅ Training completed! Model saved to anime_recommendation_model.h5
🧹 Cleaned up checkpoint: model_checkpoint.h5
✅ Training completed!

🎬 Getting recommendations for user 1:

📋 Top 10 Recommendations:
                                           name  \
0                                Kimi no Na wa.   
255                           Fairy Tail (2014)   
804                            Sword Art Online   
686               Bishoujo Senshi Sailor Moon S   
884    Bishoujo Senshi Sailor Moon R: The Movie   
534   Bishoujo Senshi Sailor Moon: Sailor Stars   
1177         Bishoujo Senshi Sailor Moon SuperS   
876                               Ranma ½ Super   
967               Bishoujo Senshi Sailor Moon R   
1068                Ranma ½: Akumu! Shunmin Kou   

                                                  genre  predicted_rating  
0                  Drama, Romance, School, Supernatural          8.522452  
255   Action, Adventure, Comedy, Fantasy, Magic, Sho...          8.295575  
804       

In [8]:
import pickle

pickle.dump(user_id_mapping, open('user_id_mapping.pkl', 'wb'))
pickle.dump(anime_id_mapping, open('anime_id_mapping.pkl', 'wb'))