In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Dropout, concatenate
import ast

# Load movie data
file_path = '/content/drive/MyDrive/final project sample data/10000 Movies Data'
minfo = pd.read_csv(file_path)
movie = minfo[['Movie_id', 'title', 'Genres', 'popularity', 'vote_average', 'vote_count']]

# Normalize movie features
scaler = MinMaxScaler()
movie[['popularity', 'vote_average', 'vote_count']] = scaler.fit_transform(movie[['popularity', 'vote_average', 'vote_count']])

# Function to convert genres string to list
def extract_genres(genres_str):
    genres_list = ast.literal_eval(genres_str)
    genres = [genre['name'] for genre in genres_list]
    return genres

movie['Genres'] = movie['Genres'].apply(extract_genres)

# Initialize MultiLabelBinarizer for genres
mlb = MultiLabelBinarizer()
movie_genres = mlb.fit_transform(movie['Genres'])
movie_genres_df = pd.DataFrame(movie_genres, columns=mlb.classes_, index=movie['Movie_id'])

# Sample user data
user_data = {
    'id': [1, 2, 3],
    'happy_movie': [['Drama', 'Comedy'], ['Romance', 'Family'], ['Adventure']],
    'sad_movie': [['Drama', 'Thriller'], ['Crime'], ['Horror', 'Science Fiction']],
    'neutral_movie': [['Comedy'], ['Action', 'Adventure'], ['Crime']],
    'fav_movie': ['The Godfather', "Schindler's List", 'The Shawshank Redemption']
}
user = pd.DataFrame(user_data)

# Sample watched data
watched_data = {
    'userid': [1, 2, 3, 1, 2],
    'movieid': [238, 278, 240, 19404, 122],
    'rating_given': [8.0, 5.5, 9.5, 7.0, 6.5],
    'watching_mood': ['Happy', 'Sad', 'Neutral', 'Happy', 'Neutral']
}
watched = pd.DataFrame(watched_data)

# Encode user and movie IDs
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

user['encoded_id'] = user_encoder.fit_transform(user['id'])
movie['encoded_id'] = movie_encoder.fit_transform(movie['Movie_id'])
watched['encoded_userid'] = user_encoder.transform(watched['userid'])
watched['encoded_movieid'] = movie_encoder.transform(watched['movieid'])

# Merge watched data with user and movie data for DNN inputs
merged_df = pd.merge(watched, user, left_on='encoded_userid', right_on='encoded_id', how='left')
merged_df = pd.merge(merged_df, movie, left_on='encoded_movieid', right_on='encoded_id', how='left')

# Ensure genres_input is prepared for all movies
genres_input = movie_genres_df.to_numpy()

# Define the DNN model architecture
def build_model(num_users, num_movies, num_genres):
    user_input = Input(shape=(1,), name='user_input')
    user_embedding = Embedding(num_users, 8, name='user_embedding')(user_input)
    user_vec = Flatten(name='user_flatten')(user_embedding)

    movie_input = Input(shape=(1,), name='movie_input')
    movie_embedding = Embedding(num_movies, 8, name='movie_embedding')(movie_input)
    movie_vec = Flatten(name='movie_flatten')(movie_embedding)

    genres_input = Input(shape=(num_genres,), name='genres_input')
    concat = concatenate([user_vec, movie_vec, genres_input], axis=-1)

    dense = Dense(128, activation='relu')(concat)
    dropout = Dropout(0.5)(dense)
    output = Dense(1, activation='linear')(dropout)

    model = Model(inputs=[user_input, movie_input, genres_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Instantiate and train the model
num_users = user['encoded_id'].nunique()
num_movies = movie['encoded_id'].nunique()
num_genres = len(mlb.classes_)

model = build_model(num_users, num_movies, num_genres)

# Example usage with a specific user and mood
user_id = 2  # Dynamically set based on input
current_mood = 'happy'  # Dynamically set based on input

# Function to retrieve preferred genres for the user and mood
def get_preferred_genres(user_id, current_mood, user_data):
    mood_to_column = {
        'happy': 'happy_movie',
        'sad': 'sad_movie',
        'neutral': 'neutral_movie'
    }
    mood_column = mood_to_column[current_mood]
    preferred_genres = user_data[user_data['id'] == user_id][mood_column].values[0]
    return preferred_genres

# Retrieve preferred genres for the user and mood
preferred_genres = get_preferred_genres(user_id, current_mood, user)
print(f"User {user_id}'s preferred genres when {current_mood}: {preferred_genres}")

# Continue with filtering unwatched movies by preferred genres and predicting ratings
# ...


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/final project sample data/10000 Movies Data'

In [None]:


# Encode user ID and watched movie IDs
encoded_user_id = user_encoder.transform([user_id])[0]
watched_movie_ids = watched[watched['userid'] == user_id]['movieid'].tolist()
encoded_watched_movie_ids = movie_encoder.transform(watched_movie_ids)

# Identify all possible encoded movie IDs and filter out watched ones to find unwatched movie IDs
unwatched_movie_ids = np.setdiff1d(movie['encoded_id'].values, encoded_watched_movie_ids)

# Intersection with movie_genres_df index to ensure valid access
valid_unwatched_movie_ids = np.intersect1d(unwatched_movie_ids, movie_genres_df.index)

# Filter unwatched movies by preferred genres
unwatched_and_preferred = movie_genres_df.loc[valid_unwatched_movie_ids]
unwatched_and_preferred_indices = movie_genres_df.columns.intersection(preferred_genres)
unwatched_and_preferred = unwatched_and_preferred[(unwatched_and_preferred[unwatched_and_preferred_indices].sum(axis=1) > 0)].index.values

# Prepare model inputs
user_ids_input = np.array([encoded_user_id] * len(unwatched_and_preferred))
movie_ids_input = unwatched_and_preferred
genres_input_filtered = genres_input[movie_ids_input]

# Predict ratings
predicted_ratings = model.predict([user_ids_input, movie_ids_input, genres_input_filtered])

# Sort movies based on predicted ratings
top_indices = np.argsort(-predicted_ratings.flatten())[:5]  # Adjust the number of recommendations as needed
top_movie_ids = movie_ids_input[top_indices]

# Decode recommended movie IDs to original IDs
recommended_movie_ids = movie_encoder.inverse_transform(top_movie_ids)

# Fetch recommended movie titles and genres
recommended_movies_info = movie[movie['Movie_id'].isin(recommended_movie_ids)][['title', 'Genres']]
print("Top recommended movies for user when happy:")
print(recommended_movies_info.to_string(index=False))
