In [13]:
import sqlite3
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import numpy as np
import random
from joblib import Parallel, delayed

In [14]:
# Connect to the SQLite database
conn = sqlite3.connect('movie_ratings_db.sqlite')

# Query the data from the ratings table and join with the movies table
query = """
    SELECT r.userId, r.movieId, r.rating, m.title, m.genres
    FROM ratings r
    JOIN movies m ON r.movieId = m.movieId
"""
df = pd.read_sql_query(query, conn)

In [15]:
# Assuming the genres column is in the format "Genre1|Genre2|Genre3"
# Convert genres into a list
df['genres'] = df['genres'].str.split('|')


In [16]:
# Content-Based Filtering (using Genres)
df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres_str'])


In [17]:
# Approximate Nearest Neighbors with NearestNeighbors
nn = NearestNeighbors(n_neighbors=5, algorithm='auto', metric='cosine')
nn.fit(tfidf_matrix)

In [18]:
# Collaborative Filtering (User-Item Interactions)
user_movie_ratings = df.pivot_table(index='userId', columns='title', values='rating')
user_movie_ratings = user_movie_ratings.fillna(0)
movie_user_ratings = user_movie_ratings.T
movie_similarity = cosine_similarity(movie_user_ratings)

In [19]:
# Function to get similar items based on NearestNeighbors
def get_similar_items_nn(movie_index):
    distances, indices = nn.kneighbors(tfidf_matrix[movie_index])
    similar_items = df.iloc[indices[0]]['title'].tolist()
    return similar_items

In [20]:
# Function to get similar items based on Collaborative Filtering
def get_similar_items_cf(movie_title, top_n=5):
    movie_ratings = user_movie_ratings[movie_title]
    
    # Ensure the movie_ratings is a 2D array (column vector)
    movie_ratings = movie_ratings.values.reshape(-1, 1)
    
    # Calculate similarity scores using a loop
    similar_scores = [np.sum(movie_ratings.T * movie_similarity[:, i].reshape(-1, 1)) for i in range(movie_similarity.shape[0])]
    
    # Create a DataFrame with movie titles and similarity scores
    similar_movies_df = pd.DataFrame({'movie': movie_user_ratings.index, 'similarity': similar_scores})
    
    # Sort by similarity and get the top N
    similar_movies_df = similar_movies_df.sort_values(by='similarity', ascending=False).head(top_n)
    
    return similar_movies_df['movie'].tolist()

In [21]:
# Function to get hybrid recommendations (combining CF and CB)
def get_hybrid_recommendations(selected_movies, top_n=5):
    # Get similar movies using collaborative filtering for each selected movie
    cf_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_cf)(movie_title, top_n=top_n) for movie_title in selected_movies)
    cf_recommendations = [item for sublist in cf_recommendations for item in sublist]
    
    # Get similar movies using content-based filtering with genre filter
    cb_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_nn)(df[df['title'] == movie_title].index[0]) for movie_title in selected_movies)
    cb_recommendations = [item for sublist in cb_recommendations for item in sublist]
    
    # Filter content-based recommendations based on genre relevance
    genre_filter = set().union(*(tuple(genre) for movie_title in selected_movies for genre in df[df['title'] == movie_title]['genres']))
    cb_recommendations_filtered = [movie for movie in cb_recommendations if any(set(genre) & genre_filter for genre in df[df['title'] == movie]['genres'])]

    # Combine the recommendations from both methods
    hybrid_recommendations = list(set(cf_recommendations + cb_recommendations_filtered))[:top_n]
    
    return hybrid_recommendations

In [22]:
# Function to allow the user to choose movies from a random list
def choose_movies():
    # Get a list of 10 random movies
    random_movies = random.sample(df['title'].tolist(), 10)
    
    print("Choose up to 5 movies from the list:")
    selected_movies = []
    count = 0
    while count < 5:
        print(f"{count + 1}. {random_movies[count]}")
        choice = input("Enter 'yes' if you like this movie or 'no' if you don't: ").lower()
        if choice == 'yes':
            selected_movies.append(random_movies[count])
            count += 1
        elif choice == 'no':
            # Choose a different movie randomly
            random_movies.pop(count)
            random_movies.append(random.choice(df['title'].tolist()))
        elif choice != 'no':
            print("Invalid choice. Please enter 'yes' or 'no'.")
    return selected_movies

In [23]:
# Example: Get user-selected movies and generate recommendations
selected_movies = choose_movies()
hybrid_recommendations = get_hybrid_recommendations(selected_movies, top_n=5)
print(f'Hybrid recommendations based on user-selected movies:\n{hybrid_recommendations}')

Choose up to 5 movies from the list:
1. Emma (1996)
2. Battleship Potemkin (1925)
3. Johnny Mnemonic (1995)
Invalid choice. Please enter 'yes' or 'no'.
3. Johnny Mnemonic (1995)
4. Louis C.K.: Live at the Beacon Theater (2011)
5. Acid House, The (1998)
