In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Concatenate, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras import layers

In [2]:
# ✅ Register the custom Mean Squared Error (MSE) function
@tf.keras.utils.register_keras_serializable()
def mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Load datasets
movies = pd.read_csv("/content/drive/MyDrive/movies_metadata.csv", low_memory=False)
ratings = pd.read_csv("/content/drive/MyDrive/ratings.csv")

In [5]:
# Preprocessing movies dataset
from ast import literal_eval
movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
movies['genres'] = movies['genres'].apply(lambda x: ','.join(x))
movies['release_year'] = pd.to_datetime(movies['release_date'], errors='coerce').dt.year


In [6]:
# Merge movies and ratings
movies['movieId'] = pd.to_numeric(movies['id'], errors='coerce')
data = pd.merge(ratings, movies, on='movieId', how='inner')
data = data[['userId', 'movieId', 'rating', 'title', 'genres', 'release_date']]
data['release_year'] = pd.to_datetime(data['release_date'], errors='coerce').dt.year

In [7]:
# Create mappings for embedding layers
user_ids = data['userId'].unique()
movie_ids = data['movieId'].unique()
user_to_index = {user_id: i for i, user_id in enumerate(user_ids)}
movie_to_index = {movie_id: i for i, movie_id in enumerate(movie_ids)}
data['user_index'] = data['userId'].map(user_to_index)
data['movie_index'] = data['movieId'].map(movie_to_index)

In [8]:
# Prepare training data
X = data[['user_index', 'movie_index']].values
y = data['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
def build_deepfm_model(num_users, num_movies, embedding_size=32):
    # Input layers
    user_input = Input(shape=(1,), name='user_input')
    movie_input = Input(shape=(1,), name='movie_input')

    # Embedding layers
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
    movie_embedding = Embedding(input_dim=num_movies, output_dim=embedding_size, name='movie_embedding')(movie_input)

    # Flatten embeddings
    user_vec = Flatten()(user_embedding)
    movie_vec = Flatten()(movie_embedding)

    # Factorization Machine (FM) layer
    fm_interaction = layers.Multiply()([user_vec, movie_vec])  # Element-wise multiplication
    fm_interaction = layers.Lambda(
        lambda x: tf.reduce_sum(x, axis=1, keepdims=True),
        output_shape=(1,)
    )(fm_interaction)  # Summation with explicit output shape

    # Concatenate embeddings
    concatenated = Concatenate()([user_vec, movie_vec])

    # Dense layers for DNN
    x = Dense(128, activation='relu')(concatenated)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    dnn_output = Dense(1)(x)

    # Final output combining FM and DNN
    output = tf.keras.layers.Add()([fm_interaction, dnn_output])

    model = Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [10]:
# Build and train the model
num_users = len(user_ids)
num_movies = len(movie_ids)
model = build_deepfm_model(num_users, num_movies, embedding_size=16)

from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

history = model.fit([X_train[:, 0], X_train[:, 1]], y_train,
                    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
                    epochs=20, batch_size=256,
                    callbacks=[early_stopping])

Epoch 1/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 3ms/step - loss: 1.0141 - mae: 0.7683 - val_loss: 0.7457 - val_mae: 0.6588
Epoch 2/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 3ms/step - loss: 0.6983 - mae: 0.6366 - val_loss: 0.7024 - val_mae: 0.6405
Epoch 3/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 3ms/step - loss: 0.6176 - mae: 0.5921 - val_loss: 0.6917 - val_mae: 0.6336
Epoch 4/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 3ms/step - loss: 0.5591 - mae: 0.5581 - val_loss: 0.6983 - val_mae: 0.6378
Epoch 5/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 3ms/step - loss: 0.5236 - mae: 0.5364 - val_loss: 0.7028 - val_mae: 0.6375
Epoch 6/20
[1m35743/35743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 3ms/step - loss: 0.5024 - mae: 0.5225 - val_loss: 0.7101 - val_mae: 0.6409
Epoch 6: early stopping
Restoring model weights from

In [13]:
# ✅ Compile the model with custom loss (important)
model.compile(optimizer='adam', loss=mse, metrics=['mae'])

# ✅ Re-evaluate model performance
y_pred = model.predict([X_test[:, 0], X_test[:, 1]]).flatten()

mse_value = mean_squared_error(y_test, y_pred)
mae_value = mean_absolute_error(y_test, y_pred)

y_test_binary = (y_test > 3.5).astype(int)
y_pred_binary = (y_pred > 3.5).astype(int)

accuracy = accuracy_score(y_test_binary, y_pred_binary)
precision = precision_score(y_test_binary, y_pred_binary, zero_division=0)

print(f"Mean Squared Error (MSE): {mse_value}")
print(f"Mean Absolute Error (MAE): {mae_value}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")

# ✅ Save the trained model with registered mse
model.save("deepfm_model_fixed_registered.h5")
print("Model saved successfully as 'deepfm_model_fixed_registered.h5'.")

[1m71486/71486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 2ms/step




Mean Squared Error (MSE): 0.7223467303548055
Mean Absolute Error (MAE): 0.6475455631790543
Accuracy: 0.7233791236653715
Precision: 0.7060353214545102
Model saved successfully as 'deepfm_model_fixed_registered.h5'.


In [14]:
import pickle

# Save the user-to-index mapping
with open("user_to_index.pkl", "wb") as f:
    pickle.dump(user_to_index, f)
print("User-to-index mapping saved successfully as 'user_to_index.pkl'.")

# Save the movie-to-index mapping
with open("movie_to_index.pkl", "wb") as f:
    pickle.dump(movie_to_index, f)
print("Movie-to-index mapping saved successfully as 'movie_to_index.pkl'.")

# Save the movie IDs (needed for recommendations)
with open("movie_ids.pkl", "wb") as f:
    pickle.dump(movie_ids, f)
print("Movie IDs saved successfully as 'movie_ids.pkl'.")



User-to-index mapping saved successfully as 'user_to_index.pkl'.
Movie-to-index mapping saved successfully as 'movie_to_index.pkl'.
Movie IDs saved successfully as 'movie_ids.pkl'.


In [16]:
import pickle
from tensorflow.keras.models
import load_model

# Load the user-to-index mapping
with open("user_to_index.pkl", "rb") as f:
    user_to_index = pickle.load(f)
print("User-to-index mapping loaded successfully.")

# Load the movie-to-index mapping
with open("movie_to_index.pkl", "rb") as f:
    movie_to_index = pickle.load(f)
print("Movie-to-index mapping loaded successfully.")

# Load the movie IDs
with open("movie_ids.pkl", "rb") as f:
    movie_ids = pickle.load(f)
print("Movie IDs loaded successfully.")


User-to-index mapping loaded successfully.
Movie-to-index mapping loaded successfully.
Movie IDs loaded successfully.


In [17]:
# Chatbot for recommendations
def recommend_movies(user_id, top_n=5):
    user_index = user_to_index.get(user_id, None)
    if user_index is None:
        return "User not found."

    all_movie_indices = np.arange(num_movies)
    user_indices = np.full_like(all_movie_indices, user_index)

    predictions = model.predict([user_indices, all_movie_indices]).flatten()
    top_movie_indices = predictions.argsort()[-top_n:][::-1]

    top_movies = [movie_ids[i] for i in top_movie_indices]
    return movies[movies['movieId'].isin(top_movies)][['title', 'genres']]

In [18]:
# Example chatbot interaction
user_id = 1  # Replace with dynamic input
print("Recommended movies for user:")
print(recommend_movies(user_id))

Recommended movies for user:
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
                    title                           genres
10625  Cinderella Liberty                    Drama,Romance
11130          Miami Vice  Action,Adventure,Crime,Thriller
11662                 300             Action,Adventure,War
19929     Caesar Must Die                Drama,Documentary
36791   Woman of the Lake                            Drama


In [19]:
# Example chatbot interaction
user_id = 3  # Replace with dynamic input
print("Recommended movies for user:")
print(recommend_movies(user_id))

Recommended movies for user:
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
                          title                           genres
286          Once Were Warriors                            Drama
1651                  Afterglow             Drama,Romance,Comedy
4020   The Million Dollar Hotel                   Drama,Thriller
11130                Miami Vice  Action,Adventure,Crime,Thriller
27971        The Shuttered Room             Drama,Mystery,Horror


In [20]:
def recommend_for_new_user(top_n=5):
    popular_movies = data.groupby('movieId').agg({'rating': 'mean'}).sort_values('rating', ascending=False)
    top_movies = popular_movies.head(top_n).index
    return movies[movies['movieId'].isin(top_movies)][['title', 'genres']]

print("Cold Start Recommendations:")
print(recommend_for_new_user())

Cold Start Recommendations:
                                        title                    genres
13621                  The Man Behind The Gun                   Western
22518                                  Harvey                   Fantasy
31620                                  Brutal                     Crime
37655                    Palermo or Wolfsburg                          
43733  Monster High: Escape from Skull Shores  Animation,Family,Fantasy


In [21]:
# Recommend popular movies in a specific genre
def recommend_popular_movies_in_genre(genre, top_n=5):
    genre_movies = movies[movies['genres'].apply(lambda genres: genre in genres)]
    top_movies = genre_movies.sort_values(by='popularity', ascending=False).head(top_n)
    print(f"Top {top_n} popular movies in genre '{genre}':\n", top_movies[['title', 'popularity']])
    return top_movies[['title', 'popularity']]

In [22]:
recommend_popular_movies_in_genre("Drama")

Top 5 popular movies in genre 'Drama':
                    title popularity
34313  Chameli Ki Shaadi      9e-06
536         Blade Runner  96.272374
10517            Jarhead   9.997032
11252      The Last Kiss   9.996541
18029        Dream House   9.995617


Unnamed: 0,title,popularity
34313,Chameli Ki Shaadi,9e-06
536,Blade Runner,96.272374
10517,Jarhead,9.997032
11252,The Last Kiss,9.996541
18029,Dream House,9.995617


In [23]:
# Recommend movies based on release year
def recommend_movies_by_year(user_id, release_year, top_n=5):
    user_index = user_to_index.get(user_id, None)
    if user_index is None:
        return "User not found."

    filtered_movies = movies[movies['release_year'] == release_year]
    if filtered_movies.empty:
        return f"No movies found for the year {release_year}."

    movie_indices = filtered_movies['movieId'].map(movie_to_index).dropna().astype(int).values
    user_indices = np.full_like(movie_indices, user_index)

    predictions = model.predict([user_indices, movie_indices]).flatten()
    top_movie_indices = predictions.argsort()[-top_n:][::-1]

    top_movies = [movie_ids[i] for i in top_movie_indices if i < len(movie_ids)]
    return movies[movies['movieId'].isin(top_movies)][['title', 'genres']]

In [24]:
# Example chatbot interaction
user_id = 2

print("Recommended movies for user by year:")
print(recommend_movies_by_year(user_id, release_year=2000))

Recommended movies for user by year:
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
                               title                genres
495                   Mrs. Doubtfire   Comedy,Drama,Family
3144   Twin Peaks: Fire Walk with Me         Drama,Mystery
5740                     Talk to Her         Drama,Romance
8146          The Motorcycle Diaries                 Drama
33216                          Sissi  Comedy,Drama,Romance
