In [None]:
!pip uninstall tensorflow

In [None]:
# Install libraries if not already installed
!pip install surprise scikit-surprise xgboost tensorflow keras nltk scikit-learn pandas numpy matplotlib seaborn

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import xgboost as xgb
import nltk

from surprise import Dataset, Reader, SVD, NMF, accuracy
from surprise.model_selection import cross_validate, train_test_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, precision_recall_fscore_support, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from nltk.tokenize import word_tokenize

# For visualization
sns.set_style("darkgrid")

In [None]:
# Load MovieLens Dataset
df_ratings = pd.read_csv("ratings.csv")
df_movies = pd.read_csv("movies.csv")

# Merge ratings with movies for exploration
df = df_ratings.merge(df_movies, on="movieId")

# Display dataset info
print(df.head())

# Check missing values
print(df.isnull().sum())

# Basic stats
print(df.describe())

In [None]:
# Convert ratings to binary (1 = liked, 0 = not liked)
df["liked"] = np.where(df["rating"] >= 3.5, 1, 0)

# Split dataset into training and testing
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Define Surprise Reader
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(df_ratings[["userId", "movieId", "rating"]], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [1]:
#Train Matrix Factorization Models
#Singular Value Decomposition (SVD)

svd = SVD()
svd.fit(trainset)
predictions_svd = svd.test(testset)

# Evaluate SVD
rmse_svd = accuracy.rmse(predictions_svd)
mae_svd = accuracy.mae(predictions_svd)

In [None]:
#Non-Negative Matrix Factorization (NMF)

nmf = NMF()
nmf.fit(trainset)
predictions_nmf = nmf.test(testset)

# Evaluate NMF
rmse_nmf = accuracy.rmse(predictions_nmf)
mae_nmf = accuracy.mae(predictions_nmf)

In [None]:
#Train XGBoost Model

# Prepare data for XGBoost
features = df[["userId", "movieId"]]
labels = df["rating"]

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100, learning_rate=0.1)
xgb_model.fit(X_train, y_train)

# Predictions
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate XGBoost
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))

In [None]:
#Neural Collaborative Filtering (NCF - Deep Learning)

# Define embedding size
embedding_size = 50

# User and Movie Input Layers
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=df["userId"].nunique()+1, output_dim=embedding_size)(user_input)
movie_embedding = Embedding(input_dim=df["movieId"].nunique()+1, output_dim=embedding_size)(movie_input)

user_vec = Flatten()(user_embedding)
movie_vec = Flatten()(movie_embedding)

concat = Concatenate()([user_vec, movie_vec])
dense = Dense(128, activation="relu")(concat)
dense = Dense(64, activation="relu")(dense)
dense = Dense(32, activation="relu")(dense)
output = Dense(1, activation="sigmoid")(dense)

# Compile model
ncf_model = Model([user_input, movie_input], output)
ncf_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train model
ncf_model.fit([train["userId"], train["movieId"]], train["liked"], epochs=5, batch_size=256)

# Evaluate model
accuracy_ncf = ncf_model.evaluate([test["userId"], test["movieId"]], test["liked"])[1]

In [None]:
#Content-Based Filtering (TF-IDF)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df_movies["genres"])

# Nearest Neighbors Model
nn_model = NearestNeighbors(metric="cosine", algorithm="brute")
nn_model.fit(tfidf_matrix)

# Function to recommend movies
def recommend_movies(movie_title, n=5):
    idx = df_movies[df_movies["title"] == movie_title].index[0]
    distances, indices = nn_model.kneighbors(tfidf_matrix[idx], n_neighbors=n+1)
    
    print("Recommended Movies:")
    for i in indices.flatten()[1:]:
        print(df_movies.iloc[i]["title"])

# Example Recommendation
recommend_movies("Toy Story (1995)")