In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

ratings_data = pd.read_csv('prepared_ratings.csv')

user_item_matrix = ratings_data.pivot_table(
    index='userId', columns='movieId', values='rating'
).fillna(0)


sparse_matrix = csr_matrix(user_item_matrix)

item_similarity = cosine_similarity(sparse_matrix.T)

item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

movie_id_to_name = ratings_data[['movieId', 'name']].drop_duplicates().set_index('movieId')['name'].to_dict()

def get_similar_movies(movie_id, top_n=5):
    """Get the top N most similar movies to the given movie ID."""
    if movie_id not in item_similarity_df.index:
        return f"Movie ID {movie_id} not found in the dataset."
    similar_scores = item_similarity_df[movie_id].sort_values(ascending=False)
    similar_movies = similar_scores.iloc[1:top_n + 1]  # Skip the first (self-similarity)
    # Map movie IDs to names and scores 
    similar_movies_named = [
        (m_id, movie_id_to_name.get(m_id, "Unknown"), score)
        for m_id, score in similar_movies.items()
    ]
    return similar_movies_named

def save_similar_movies_to_csv(movie_name, file_name, top_n=5):
    """Find similar movies for a given movie name and save results to a CSV file."""
    # Get the movieId for the given movie name
    movie_id = ratings_data[ratings_data['name'] == movie_name]['movieId'].unique()
    if len(movie_id) == 0:
        print(f"Movie '{movie_name}' not found.")
        return
    movie_id = movie_id[0]
    
    # Get similar movies
    similar_movies = get_similar_movies(movie_id, top_n=top_n)
    
    # Prepare DataFrame for saving
    similar_movies_df = pd.DataFrame(similar_movies, columns=['Movie ID', 'Movie Name', 'Similarity Score'])
    similar_movies_df.to_csv(file_name, index=False)
    print(f"Saved similar movies for '{movie_name}' to {file_name}")

# Save results for the three specified movies
save_similar_movies_to_csv("My Neighbor Totoro", "totoro_collaborative.csv")
save_similar_movies_to_csv("Howl's Moving Castle", "howl_collaborative.csv")
save_similar_movies_to_csv("Kiki's Delivery Service", "kiki_collaborative.csv")





Saved similar movies for 'My Neighbor Totoro' to totoro_collaborative.csv
Saved similar movies for 'Howl's Moving Castle' to howl_collaborative.csv
Saved similar movies for 'Kiki's Delivery Service' to kiki_collaborative.csv
