In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the dataset
df = pd.read_csv('C:\\Users\\disch\\Desktop\\Projects\\ML_REC_SYSTEM\\ml-latest-small\\movies.csv')

# Create a TF-IDF vectorizer to convert genre text to numerical features
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres'])

# Calculate the cosine similarity between movies based on their genre
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create a function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    indices = df.index[df['title'] == title].tolist()
    
    if not indices:
        print(f"Movie with title '{title}' not found in the dataset.")
        return
    
    idx = indices[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get the top 10 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Test the recommendation system
movie_title = "Hunger Games"
recommendations = get_recommendations(movie_title)
print(f"Recommendations for {movie_title}:")
print(recommendations)

Movie with title 'Hunger Games' not found in the dataset.
Recommendations for Hunger Games:
None
