In [32]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from scipy.sparse import csr_matrix

In [68]:
anime_df = pd.read_csv("../dataset/anime.csv")
rating_df = pd.read_csv("../dataset/rating.csv")
anime_df = anime_df.drop("rating",axis=1)
df = pd.merge(rating_df, anime_df, on='anime_id', how = "inner")

In [69]:
#remove empty ratings
df = df[df['rating'] != -1]
print(len(df))

6337239


## Create user-item matrix

In [72]:
df = df.head(10000)
user_item_matrix = df.pivot_table(index='user_id', columns='anime_id', values='rating')
user_item_matrix = user_item_matrix.fillna(0)
print(user_item_matrix)
print(user_item_matrix.shape)


anime_id    20
user_id       
3          8.0
5          6.0
21         8.0
28         9.0
34         9.0
...        ...
33162      7.0
33165     10.0
33167      7.0
33172      7.0
33176      8.0

[10000 rows x 1 columns]
(10000, 1)


In [73]:
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
print(user_similarity_df)

user_id  3      5      21     28     34     38     39     41     43     46     \
user_id                                                                         
3          1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
5          1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
21         1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
28         1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
34         1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
...        ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
33162      1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
33165      1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
33167      1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
33172      1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0    1.0   
33176      1.0    1.0    1.0

In [79]:
def get_top_n_recommendations(user_id, n=10):
    # Get the similarity scores for the user
    user_sim_scores = user_similarity_df[user_id]
    
    # Get the user's ratings
    user_ratings = user_item_matrix.loc[user_id]
    print(user_ratings)
    
    # Calculate the weighted sum of ratings for all items
    weighted_sum_ratings = np.dot(user_similarity, user_item_matrix)
    
    # Normalize by the sum of similarities to get the predicted ratings
    sum_similarities = user_similarity.sum(axis=1)
    predicted_ratings = weighted_sum_ratings / sum_similarities[:, np.newaxis]
    
    # Convert the predictions to a DataFrame
    predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)
   
    
    # Get the anime the user hasn't rated yet
    unrated_anime = user_ratings[user_ratings == 0].index
    print(unrated_anime)
    
    # Get the predicted ratings for these anime
    recommendations = predicted_ratings_df.loc[user_id, unrated_anime]
    
    # Sort the predictions by rating in descending order and return the top n
    top_n_recommendations = recommendations.sort_values(ascending=False).head(n)
    
    return top_n_recommendations

recommendations = get_top_n_recommendations(user_id=3, n=10)
print(recommendations)


anime_id
20    8.0
Name: 3, dtype: float64
Index([], dtype='int64', name='anime_id')
Series([], Name: 3, dtype: float64)


In [44]:
# Get anime names for the recommended anime_ids
recommended_anime = [(anime_df[anime_df['anime_id'] == anime_id]['name'].values[0], rating) 
                     for anime_id, rating in recommendations.items()]

# Print the recommended anime
for anime_name, rating in recommended_anime:
    print(f"{anime_name}: {rating:.2f}")
