# **Importing the packages**

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model
from sklearn.preprocessing import MultiLabelBinarizer

# **Loading the Datasets**

In [21]:
ratings_data = pd.read_csv('C:/Users/srika/OneDrive/Desktop/Bharat internship/Movie Recommendations/ratings.csv')

print("Ratings Data:")
print(ratings_data)

movies_data = pd.read_csv('C:/Users/srika/OneDrive/Desktop/Bharat internship/Movie Recommendations/movies.csv')

print("\nMovies Data:")
print(movies_data)

Ratings Data:
    userId  movieId  rating
0        1        1      64
1        2        2      68
2        3        3      43
3        4        4      15
4        5        5      28
..     ...      ...     ...
72      73       73      54
73      74       74      89
74      75       75      79
75      76       76      40
76      77       77      87

[77 rows x 3 columns]

Movies Data:
    movieId                               title   genres
0         1          Zack and Miri Make a Porno  Romance
1         2                     Youth in Revolt   Comedy
2         3  You Will Meet a Tall Dark Stranger   Comedy
3         4                        When in Rome   Comedy
4         5               What Happens in Vegas   Comedy
..      ...                                 ...      ...
72       73                 Across the Universe  romance
73       74                       A Serious Man    Drama
74       75                  A Dangerous Method    Drama
75       76                          27 Dre

# **Merge Data**

In [22]:
data = pd.merge(ratings_data, movies_data, on='movieId')

# **One-Hot Encode Genres**

In [23]:
genres_list = [genres.split('|') for genres in data['genres']]
mlb = MultiLabelBinarizer()
genres_encoded = pd.DataFrame(mlb.fit_transform(genres_list), columns=mlb.classes_)
data = pd.concat([data, genres_encoded], axis=1)

# **Create Model**

In [24]:
genres_input = Input(shape=[len(mlb.classes_)], name='genres')

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define the model using only genre information
embedding_size = 30
dropout_rate = 0.2

genres_embedding = Embedding(input_dim=len(data['movieId'].unique()), output_dim=embedding_size, name='genres_embedding')(genres_input)
flattened_genres = Flatten()(genres_embedding)
dense_layer_1 = Dense(128, activation='relu')(flattened_genres)
dropout_layer = Dropout(dropout_rate)(dense_layer_1)
dense_layer_2 = Dense(64, activation='relu')(dropout_layer)
output_layer = Dense(1)(dense_layer_2)

model = Model(inputs=genres_input, outputs=output_layer)
model.compile(optimizer='adam', loss='mean_squared_error')

# **Train Model**

In [25]:
model.fit(train_data[mlb.classes_], train_data['rating'], epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x16784ba9810>

# **Evaluate Model**

In [26]:
model.fit(train_data[mlb.classes_], train_data['rating'], epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x167fff08410>

# **User Input for Genres**

In [27]:
user_genre = input("Enter a genre: ")

Enter a genre:  Comedy


# **Filter Movies**

In [28]:
filtered_movies = movies_data[movies_data['genres'].str.contains(user_genre, case=False)]

# **Make Recommendations**

In [29]:
if not filtered_movies.empty:
    # Get predictions for filtered movies
    filtered_movies_data = pd.merge(test_data, filtered_movies[['movieId']], on='movieId', how='inner')
    recommendations = model.predict(filtered_movies_data[mlb.classes_])

    # Combine movie indices and predictions
    recommendations_df = pd.DataFrame({'movieId': filtered_movies_data['movieId'], 'prediction': recommendations.squeeze()})

    # Display top N recommendations
    top_n_recommendations = recommendations_df.sort_values(by='prediction', ascending=False).head(5)
    top_n_movies = movies_data[movies_data['movieId'].isin(top_n_recommendations['movieId'])]
    print(f"\nTop 5 Movie Recommendations based on Genre '{user_genre}':\n")
    print(top_n_movies[['title', 'genres']])
else:
    print(f"No movies found for the genre '{user_genre}'.")


Top 5 Movie Recommendations based on Genre 'Comedy':

                         title  genres
4        What Happens in Vegas  Comedy
45                  Mamma Mia!  Comedy
50          Life as We Know It  Comedy
53                   Leap Year  Comedy
66  Ghosts of Girlfriends Past  Comedy
