In [98]:
import pandas as pd
import numpy as np
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.decomposition import TruncatedSVDr

In [99]:
movies = pd.read_csv(r'C:\Users\Dell.com\Desktop\ml\project 1\movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [100]:
movies['genres'] = movies['genres'].fillna('')

In [101]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'])

In [102]:
n_components = min(50, tfidf_matrix.shape[1])
svd = TruncatedSVD(n_components=n_components)
tfidf_matrix_svd = svd.fit_transform(tfidf_matrix)

In [103]:
cosine_sim_svd = np.load("cosine_sim_svd.npy")
sample_size = 1000000
subset_cosine_sim_svd = np.random.choice(cosine_sim_svd, size=sample_size)
output_file = "subset_cosine_sim_svd.npy"
np.save(output_file, subset_cosine_sim_svd)

In [105]:
def get_recommendations(movie_title, tfidf_matrix, cosine_sim_svd, dataframe, top_n=5):
    idx = dataframe[dataframe['title'] == movie_title].index

    if not idx.empty:
        idx = idx[0]
        sim_scores = linear_kernel(tfidf_matrix[idx], tfidf_matrix) + cosine_sim_svd[idx]

        movie_indices = np.argsort(sim_scores[0])[-top_n:][::-1]

        recommended_movies = [dataframe['title'].iloc[i] for i in movie_indices]

        print(f"Top {top_n} recommended movies for '{movie_title}':")
        for i, movie in enumerate(recommended_movies):
            print(f"{i+1}. '{movie}'")
    else:
        print(f"Movie '{movie_title}' not found in the dataset.")

movie_title = 'Bad Boys (1995)'
recommendations = get_recommendations(movie_title, tfidf_matrix, subset_cosine_sim_svd, movies, top_n=5)

Top 5 recommended movies for 'Bad Boys (1995)':
1. 'Wasabi (2001)'
2. 'Fifth Commandment, The (2008)'
3. 'Business, The (2005)'
4. 'Black Shampoo (1976)'
5. 'Money Train (1995)'
