In [75]:
#Basic working model using collaborative filtering

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [None]:

# Sample user data
user_data = {
    'id': [1, 2, 3],
    'happy_movie': [['Drama', 'Comedy'], ['Romance', 'Family'], ['Adventure']],
    'sad_movie': [['Drama', 'Thriller'], ['Crime'], ['Horror', 'Science Fiction']],
    'neutral_movie': [['Comedy'], ['Action', 'Adventure'], ['Crime']],
    'fav_movie': ['The Godfather', "Schindler's List", 'The Shawshank Redemption']
}
user = pd.DataFrame(user_data)


# Load movie data from the specified file
file_path = '/content/drive/MyDrive/final project sample data/10000 Movies Data'
minfo = pd.read_csv(file_path)
movie = minfo[['Movie_id', 'title', 'Genres', 'popularity', 'vote_average', 'vote_count']]



# Sample watched data
watched_data = {
    'userid': [1, 2, 3, 1, 2],
    'movieid': [238, 278, 240, 19404, 122],
    'rating_given': [8.0, 5.5, 9.5, 7.0, 6.5],
    'watching_mood': ['Happy', 'Sad', 'Neutral', 'Happy', 'Neutral']
}
watched = pd.DataFrame(watched_data)

In [None]:
movie

Unnamed: 0,Movie_id,title,Genres,popularity,vote_average,vote_count
0,238,The Godfather,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",93.552,8.7,16814
1,278,The Shawshank Redemption,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",78.664,8.7,22542
2,240,The Godfather Part II,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",55.752,8.6,10187
3,19404,Dilwale Dulhania Le Jayenge,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",22.150,8.6,3927
4,424,Schindler's List,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",53.542,8.6,13382
...,...,...,...,...,...,...
9975,384737,Precious Cargo,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",14.663,5.2,243
9976,282813,The Pyramid,"[{'id': 27, 'name': 'Horror'}]",17.892,5.2,861
9977,134375,Home Alone: The Holiday Heist,"[{'id': 35, 'name': 'Comedy'}, {'id': 10751, '...",23.978,5.2,335
9978,10477,Driven,"[{'id': 28, 'name': 'Action'}]",13.200,5.2,594


In [None]:
# prompt: Using dataframe movie: i want Genres to be reformatted with only list of name of genres, right now it has id and name and addtional text

import ast

# Create a copy of the dataframe
movie_genres = movie.copy()

# Convert the Genres column from string to list of dictionaries
movie_genres['Genres'] = movie_genres['Genres'].apply(ast.literal_eval)

# Extract the names of the genres from the list of dictionaries
movie_genres['Genres'] = movie_genres['Genres'].apply(lambda x: [i['name'] for i in x])

# Display the first 10 rows of the dataframe
movie_genres.head(10)


Unnamed: 0,Movie_id,title,Genres,popularity,vote_average,vote_count
0,238,The Godfather,"[Drama, Crime]",93.552,8.7,16814
1,278,The Shawshank Redemption,"[Drama, Crime]",78.664,8.7,22542
2,240,The Godfather Part II,"[Drama, Crime]",55.752,8.6,10187
3,19404,Dilwale Dulhania Le Jayenge,"[Comedy, Drama, Romance]",22.15,8.6,3927
4,424,Schindler's List,"[Drama, History, War]",53.542,8.6,13382
5,129,Spirited Away,"[Animation, Family, Fantasy]",78.519,8.5,13476
6,667257,Impossible Things,"[Family, Drama]",10.676,8.5,271
7,389,12 Angry Men,[Drama],27.767,8.5,6754
8,372058,Your Name.,"[Romance, Animation, Drama]",120.614,8.5,9178
9,496243,Parasite,"[Comedy, Thriller, Drama]",61.872,8.5,14608


In [None]:
# Assuming user, watched, and movie DataFrames are already defined as per your description.

# Merge the watched movies data with the movie details
watched_movies = pd.merge(watched, movie_genres, left_on='movieid', right_on='Movie_id')



In [None]:
watched_movies

Unnamed: 0,userid,movieid,rating_given,watching_mood,Movie_id,title,Genres,popularity,vote_average,vote_count
0,1,238,8.0,Happy,238,The Godfather,"[Drama, Crime]",93.552,8.7,16814
1,2,278,5.5,Sad,278,The Shawshank Redemption,"[Drama, Crime]",78.664,8.7,22542
2,3,240,9.5,Neutral,240,The Godfather Part II,"[Drama, Crime]",55.752,8.6,10187
3,1,19404,7.0,Happy,19404,Dilwale Dulhania Le Jayenge,"[Comedy, Drama, Romance]",22.15,8.6,3927
4,2,122,6.5,Neutral,122,The Lord of the Rings: The Return of the King,"[Adventure, Fantasy, Action]",119.972,8.5,20423


In [None]:

from itertools import chain

# Assuming 'user' is your DataFrame containing user preferences.
# Sample data structure for 'user' DataFrame is provided in your initial description.

# Function to expand and tag mood to genres
def expand_user_genres(user_df, mood_column):
    # Explode the DataFrame by the specified mood to handle multiple genres
    expanded_df = user_df[['id', mood_column]].explode(mood_column)
    # Assign the current mood as a new column
    expanded_df['mood'] = mood_column.split('_')[0].capitalize()
    # Rename the genre column to a generic 'genre' column
    expanded_df = expanded_df.rename(columns={mood_column: 'genre'})
    return expanded_df

# Process each mood
happy_genres = expand_user_genres(user, 'happy_movie')
sad_genres = expand_user_genres(user, 'sad_movie')
neutral_genres = expand_user_genres(user, 'neutral_movie')

# Concatenate all moods together
all_genres = pd.concat([happy_genres, sad_genres, neutral_genres])

# Optional: Reset index if you want a clean DataFrame index
all_genres.reset_index(drop=True, inplace=True)

all_genres.head()


Unnamed: 0,id,genre,mood
0,1,Drama,Happy
1,1,Comedy,Happy
2,2,Romance,Happy
3,2,Family,Happy
4,3,Adventure,Happy


In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

# Assuming movie_genres DataFrame has a 'Genres' column with lists of genres
mlb = MultiLabelBinarizer()
movie_genres_encoded = mlb.fit_transform(movie_genres['Genres'])
movie_genres_df = pd.DataFrame(movie_genres_encoded, columns=mlb.classes_, index=movie_genres['Movie_id'])



In [None]:
def recommend_movies_with_genres(user_id, current_mood, top_n=5):
    # Filter user preferences for the current mood
    mood_genres = all_genres[(all_genres['id'] == user_id) & (all_genres['mood'] == current_mood)]
    user_pref_genres = mood_genres['genre'].unique()

    # Encode user preferences using the same MultiLabelBinarizer used for movies
    user_pref_encoded = mlb.transform([user_pref_genres])

    # Calculate similarity between user preferences and each movie's genres
    similarity = cosine_similarity(user_pref_encoded, movie_genres_encoded)

    # Get top N recommended movie IDs based on similarity scores
    recommended_movie_ids = similarity.argsort()[0][-top_n:][::-1]

    # Map movie IDs back to movie titles and genres
    recommended_movies_info = movie_genres.iloc[recommended_movie_ids][['title', 'Genres']]

    # Create a more readable output format
    recommended_movies_output = recommended_movies_info.apply(lambda x: f"{x['title']} (Genres: {', '.join(x['Genres'])})", axis=1)

    return recommended_movies_output.tolist()

# Example: Recommend movies for user 1 in a "Happy" mood, including genres
recommended_movies = recommend_movies_with_genres(1, 'Happy', 5)
for movie in recommended_movies:
    print(movie)


Ingrid Goes West (Genres: Comedy, Drama)
Jenny's Wedding (Genres: Comedy, Drama)
Boy (Genres: Drama, Comedy)
Diner (Genres: Comedy, Drama)
The Toy (Genres: Comedy, Drama)


In [None]:
recommended_movies = recommend_movies_with_genres(1, 'Sad', 5)
for movie in recommended_movies:
    print(movie)

Cold in July (Genres: Drama, Thriller)
The Nightingale (Genres: Drama, Thriller)
The Truth About Emanuel (Genres: Drama, Thriller)
New Order (Genres: Thriller, Drama)
Derailed (Genres: Drama, Thriller)


In [None]:
recommended_movies = recommend_movies_with_genres(1, 'Neutral', 5)
for movie in recommended_movies:
    print(movie)

The ComDads (Genres: Comedy)
Caddyshack (Genres: Comedy)
I Can Quit Whenever I Want (Genres: Comedy)
Blended (Genres: Comedy)
Billy Madison (Genres: Comedy)


In [None]:
recommended_movies = recommend_movies_with_genres(2, 'Happy', 5)
for movie in recommended_movies:
    print(movie)


A Christmas Prince: The Royal Baby (Genres: Romance, Family)
The Holiday Calendar (Genres: Romance, Comedy, Family)
Keith (Genres: Drama, Family, Romance)
The Princess Diaries (Genres: Comedy, Family, Romance)
Miss Potter (Genres: Drama, Family, Romance)


In [None]:
recommended_movies = recommend_movies_with_genres(2, 'Sad', 5)
for movie in recommended_movies:
    print(movie)

The Good Liar (Genres: Crime)
The Sicilian Clan (Genres: Crime)
Oh Mercy (Genres: Crime)
Extremely Wicked, Shockingly Evil and Vile (Genres: Crime)
The Beast (Genres: Crime)


In [None]:
recommended_movies = recommend_movies_with_genres(2, 'Neutral', 5)
for movie in recommended_movies:
    print(movie)

Indiana Jones and the Last Crusade (Genres: Adventure, Action)
12 Rounds 2: Reloaded (Genres: Action, Adventure)
Mission: Impossible - Fallout (Genres: Action, Adventure)
Cutthroat Island (Genres: Action, Adventure)
Raiders of the Lost Ark (Genres: Adventure, Action)


In [None]:
recommended_movies = recommend_movies_with_genres(3, 'Happy', 5)
for movie in recommended_movies:
    print(movie)

The Flight of the Phoenix (Genres: Adventure)
Belle and Sebastian (Genres: Adventure)
The Water Man (Genres: Adventure)
Young Sherlock Holmes (Genres: Adventure)
Ben 10 Alien Swarm (Genres: Adventure)


In [None]:
recommended_movies = recommend_movies_with_genres(3, 'Sad', 5)
for movie in recommended_movies:
    print(movie)

Dr. Jekyll and Mr. Hyde (Genres: Horror, Science Fiction)
Screamers (Genres: Horror, Science Fiction)
Chopping Mall (Genres: Horror, Science Fiction)
C.H.U.D. (Genres: Horror, Science Fiction)
The Curse of Frankenstein (Genres: Horror, Science Fiction)


In [None]:
recommended_movies = recommend_movies_with_genres(3, 'Neutral', 5)
for movie in recommended_movies:
    print(movie)

The Good Liar (Genres: Crime)
The Sicilian Clan (Genres: Crime)
Oh Mercy (Genres: Crime)
Extremely Wicked, Shockingly Evil and Vile (Genres: Crime)
The Beast (Genres: Crime)


In [None]:
user

Unnamed: 0,id,happy_movie,sad_movie,neutral_movie,fav_movie
0,1,"[Drama, Comedy]","[Drama, Thriller]",[Comedy],The Godfather
1,2,"[Romance, Family]",[Crime],"[Action, Adventure]",Schindler's List
2,3,[Adventure],"[Horror, Science Fiction]",[Crime],The Shawshank Redemption


Relevant code ends here