In [6]:
import os
import pickle
import pandas as pd
from scipy.sparse import load_npz
from sklearn.metrics.pairwise import cosine_similarity

# Paths
trained_dir = "D:/movie_recommendation_system/rcmndn_model/trained_models"
data_dir = "D:/movie_recommendation_system/rcmndn_model/data/processed"

# Load data
X_content = load_npz(os.path.join(trained_dir, "X_content_v2.npz"))
movies = pd.read_csv(os.path.join(data_dir, "movies_cleaned.csv"))

print(" Loaded features and movies dataset")
print(X_content.shape, movies.shape)


 Loaded features and movies dataset
(3537, 5025) (3537, 16)


In [7]:
# Compute cosine similarity
cosine_sim = cosine_similarity(X_content, X_content)

# Save similarity matrix
with open(os.path.join(trained_dir, "cosine_sim.pkl"), "wb") as f:
    pickle.dump(cosine_sim, f)

print(" Cosine similarity matrix computed and saved")


 Cosine similarity matrix computed and saved


In [8]:
def recommend_movies(title, top_n=10):
    # Check if movie exists
    if title not in movies['title'].values:
        print(" Movie not found in dataset")
        return []
    
    # Get index of the movie
    idx = movies[movies['title'] == title].index[0]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity (skip the movie itself at index 0)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    # Get top movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Return relevant columns
    return movies.iloc[movie_indices][['title', 'genres', 'vote_average']]


In [9]:
print("\n🎬 Recommendations for 'Toy Story (1995)':")
print(recommend_movies("Toy Story (1995)", top_n=5))



🎬 Recommendations for 'Toy Story (1995)':
                           title  \
2743          Toy Story 3 (2010)   
865           Toy Story 2 (1999)   
1286       Monsters, Inc. (2001)   
3167      Despicable Me 2 (2013)   
2325  Simpsons Movie, The (2007)   

                                                genres  vote_average  
2743  Adventure|Animation|Children|Comedy|Fantasy|IMAX           7.6  
865        Adventure|Animation|Children|Comedy|Fantasy           7.3  
1286       Adventure|Animation|Children|Comedy|Fantasy           7.5  
3167                    Animation|Children|Comedy|IMAX           7.0  
2325                                  Animation|Comedy           6.9  


In [10]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import os
import pickle

# Paths
ratings_path = "D:/movie_recommendation_system/rcmndn_model/data/processed/ratings_final.csv"
movies_path = "D:/movie_recommendation_system/rcmndn_model/data/processed/movies_cleaned.csv"
save_path = "D:/movie_recommendation_system/rcmndn_model/trained_models/item_similarity.pkl"

# 1. Load data
ratings = pd.read_csv(ratings_path)
movies = pd.read_csv(movies_path)

# 2. Create user–movie matrix
user_movie_matrix = ratings.pivot(index="userId", columns="movieId", values="rating").fillna(0)
print(f" User-Movie matrix created: {user_movie_matrix.shape}")

# 3. Compute cosine similarity between movies
item_similarity = cosine_similarity(user_movie_matrix.T)  # movies × movies
item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_movie_matrix.columns,
    columns=user_movie_matrix.columns
)
print(f" Item-Item similarity matrix created: {item_similarity_df.shape}")

# 4. Save similarity matrix
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, "wb") as f:
    pickle.dump(item_similarity_df, f)

print(f" Item similarity matrix saved at: {save_path}")

# 5. Function to recommend movies
def recommend_movies_cf(movie_id, top_n=5):
    if movie_id not in item_similarity_df.index:
        return pd.DataFrame({"Message": [f"Movie ID {movie_id} not found in dataset"]})

    # Get similarity scores
    sim_scores = item_similarity_df[movie_id].sort_values(ascending=False)

    # Drop the same movie
    sim_scores = sim_scores.drop(movie_id)

    # Top N
    top_movies = sim_scores.head(top_n).index
    return movies[movies["movieId"].isin(top_movies)][["movieId", "title", "genres", "vote_average"]]

# --- Quick test ---
test_movie_id = 1  # Toy Story (1995)
print(f"\n🎬 Collaborative Filtering Recommendations for '{movies[movies['movieId']==test_movie_id]['title'].values[0]}':")
print(recommend_movies_cf(test_movie_id, top_n=5))


 User-Movie matrix created: (610, 2187)
 Item-Item similarity matrix created: (2187, 2187)
 Item similarity matrix saved at: D:/movie_recommendation_system/rcmndn_model/trained_models/item_similarity.pkl

🎬 Collaborative Filtering Recommendations for 'Toy Story (1995)':
     movieId                                      title  \
80       260  Star Wars: Episode IV - A New Hope (1977)   
103      356                        Forrest Gump (1994)   
128      480                       Jurassic Park (1993)   
187      780       Independence Day (a.k.a. ID4) (1996)   
865     3114                         Toy Story 2 (1999)   

                                          genres  vote_average  
80                       Action|Adventure|Sci-Fi           8.1  
103                     Comedy|Drama|Romance|War           8.2  
128             Action|Adventure|Sci-Fi|Thriller           7.6  
187             Action|Adventure|Sci-Fi|Thriller           6.7  
865  Adventure|Animation|Children|Comedy|Fantasy 