In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#sample movie dataset
movies = pd.DataFrame({
    'title' : ["Inception", "Interstellar", "The Dark Knight", "The Matrix", "Avengers"],
    'genre': ["Sci-Fi Thriller", "Sci-Fi Space", "Action Crime", "Sci-Fi Action", "Action Fantasy"]
})

In [None]:
#TF-IDF encoding
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genre'])

#Compute similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [None]:
tfidf_matrix.toarray()

In [None]:
def recommend_movies(index):
    similarity_scores = list(enumerate(cosine_sim[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    recs = [movies['title'][1] for i, _ in similarity_scores[1:3]]
    return recs
print("Recommendations for Interstellar: ",recommend_movies(1))

In [None]:
import numpy as np

In [None]:
ratings = np.array([[5,4,0,0],
 [4,0,0,2],
 [0,0,4,4],
  [2,2,0,5]])


In [None]:
#Cosine similarity between users
from sklearn.metrics.pairwise import cosine_similarity
user_similarity = cosine_similarity(ratings)

print("User Similarity Matrix:")
print(user_similarity)

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
file_path = "movie_reviews_manuel - Sheet1.csv"  # update path if needed
df = pd.read_csv(file_path)

# Set movie titles as index
ratings = df.set_index("Title")

# Transpose to get users as rows and movies as columns
ratings_T = ratings.T

# Compute cosine similarity between users
user_similarity = cosine_similarity(ratings_T)
user_similarity_df = pd.DataFrame(
    user_similarity, index=ratings_T.index, columns=ratings_T.index
)

def recommend_movies(user, num_recommendations=5):
    """
    Recommend top movies for a given user based on collaborative filtering.
    """
    user_ratings = ratings[user]
    similar_users = user_similarity_df[user].sort_values(ascending=False).drop(user)

    weighted_ratings = pd.Series(dtype=float)
    for other_user, similarity in similar_users.items():
        weighted_ratings = weighted_ratings.add(
            ratings[other_user] * similarity, fill_value=0
        )

    sim_sums = similar_users.sum()
    weighted_ratings = weighted_ratings / sim_sums

    improvement = weighted_ratings - user_ratings
    recommendations = improvement.sort_values(ascending=False).head(num_recommendations)

    return recommendations

def top_movies(num=5):
    """
    Get the top movies overall based on average rating.
    """
    avg_ratings = ratings.mean(axis=1)
    return avg_ratings.sort_values(ascending=False).head(num)

# Example usage
print("Top 5 recommendations for aiswarya:")
print(recommend_movies("aiswarya", 5))

print("\nTop 5 movies overall:")
print(top_movies(5))
