In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Display inline plots in Jupyter notebooks
%matplotlib inline


In [98]:

# Load data
movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')



In [99]:

# Extract the year from the title column and remove parentheses
movies_df['year'] = movies_df['title'].str.extract(r'\((\d{4})\)', expand=False)
movies_df['title'] = movies_df['title'].str.replace(r'\(\d{4}\)', '', regex=True).str.strip()


In [100]:

# Split genres into a list
movies_df['genres'] = movies_df['genres'].str.split('|')


In [101]:

# Copy dataframe for further processing
moviesWithGenres_df = movies_df.copy()


In [102]:

# One-hot encode genres
for genre in set(np.hstack(moviesWithGenres_df['genres'].values)):
    moviesWithGenres_df[genre] = moviesWithGenres_df['genres'].apply(lambda x: 1 if genre in x else 0)


In [103]:

# Fill missing values with 0
moviesWithGenres_df = moviesWithGenres_df.fillna(0)


In [104]:

# Drop the timestamp column from the ratings dataframe
ratings_df = ratings_df.drop('timestamp', axis=1)


In [105]:

# User input data
userInput = [
    {'title': 'Breakfast Club, The', 'rating': 5},
    {'title': 'Toy Story', 'rating': 3.5},
    {'title': 'Jumanji', 'rating': 2},
    {'title': 'Pulp Fiction', 'rating': 5},
    {'title': 'Akira', 'rating': 4.5}
]

In [106]:
inputMovies = pd.DataFrame(userInput)


In [107]:

# Merge input data with movie IDs
inputId = movies_df[movies_df['title'].isin(inputMovies['title'].tolist())]
inputMovies = pd.merge(inputId, inputMovies)


In [108]:

# Drop unnecessary columns
inputMovies = inputMovies.drop(['genres', 'year'], axis=1)


In [109]:

# Filter the movies based on the user's input
userMovies = moviesWithGenres_df[moviesWithGenres_df['movieId'].isin(inputMovies['movieId'].tolist())].reset_index(drop=True)


In [110]:

# Drop unnecessary columns
userGenreTable = userMovies.drop(['movieId', 'title', 'genres', 'year'], axis=1)


In [111]:



# Get the genre table for all movies
genreTable = moviesWithGenres_df.set_index('movieId').drop(['title', 'genres', 'year'], axis=1)
userProfile = userGenreTable.T.dot(inputMovies['rating'])

In [112]:

# Calculate weighted average for recommendation
recommendationTable_df = ((genreTable * userProfile).sum(axis=1)) / userProfile.sum()


In [113]:

# Sort the recommendations in descending order
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)


In [114]:

# Show top 20 recommendations
recommended_movies = movies_df[movies_df['movieId'].isin(recommendationTable_df.head(20).index)]
recommended_movies


Unnamed: 0,movieId,title,genres,year
559,673,Space Jam,"[Adventure, Animation, Children, Comedy, Fanta...",1996
1390,1907,Mulan,"[Adventure, Animation, Children, Comedy, Drama...",1998
2250,2987,Who Framed Roger Rabbit?,"[Adventure, Animation, Children, Comedy, Crime...",1988
3460,4719,Osmosis Jones,"[Action, Animation, Comedy, Crime, Drama, Roma...",2001
4631,6902,Interstate 60,"[Adventure, Comedy, Drama, Fantasy, Mystery, S...",2002
5490,26340,"Twelve Tasks of Asterix, The (Les douze travau...","[Action, Adventure, Animation, Children, Comed...",1976
5819,32031,Robots,"[Adventure, Animation, Children, Comedy, Fanta...",2005
6448,51939,TMNT (Teenage Mutant Ninja Turtles),"[Action, Adventure, Animation, Children, Comed...",2007
6455,52287,Meet the Robinsons,"[Action, Adventure, Animation, Children, Comed...",2007
6462,52462,Aqua Teen Hunger Force Colon Movie Film for Th...,"[Action, Adventure, Animation, Comedy, Fantasy...",2007
