In [37]:
import pandas as pd 
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import difflib
import string
from collections import Counter
from tqdm import tqdm
import pickle

In [38]:
df = pd.read_csv("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/users-score-2023.csv")

In [20]:
# Open the pickle file in binary read mode and load the pickled object
with open("my_animelist.pkl", 'rb') as f:
    loaded_list = pickle.load(f)

In [21]:
len(loaded_list)

11565

In [22]:
df = df[df['anime_id'].isin(loaded_list)]

In [23]:
anime_count = df.groupby('user_id')['anime_id'].count()

user_ids_to_drop = anime_count[anime_count < 10].index

filtered_df = df[~df['user_id'].isin(user_ids_to_drop)]

In [24]:
user_item_matrix = filtered_df.pivot_table(index='user_id', columns='Anime Title', values='rating')

user_item_matrix = user_item_matrix.fillna(0)

user_ratings_mean = user_item_matrix.mean(axis=1)
user_item_matrix_centered = user_item_matrix.sub(user_ratings_mean, axis=0)

item_similarity = cosine_similarity(user_item_matrix_centered.T)

In [25]:
def jaccard_similarity(title1, title2):
    title1_tokens = set(title1.lower().translate(str.maketrans('', '', string.punctuation)).split())
    title2_tokens = set(title2.lower().translate(str.maketrans('', '', string.punctuation)).split())
    intersection = len(title1_tokens.intersection(title2_tokens))
    union = len(title1_tokens.union(title2_tokens))
    return intersection / union if union else 0

def item_collaborative_recommender(anime_title, user_item_matrix, item_similarity, top_n=10):
    sim_scores = item_similarity[user_item_matrix.columns.get_loc(anime_title)]
    
    top_indices = np.argsort(sim_scores)[::-1][1:2*top_n+1]  
    
    similar_titles = []
    for idx in top_indices:
        title = user_item_matrix.columns[idx]
        if jaccard_similarity(anime_title, title) < 0.5:  
            similar_titles.append(title)
        if len(similar_titles) >= top_n:
            break
    
    return similar_titles[:top_n]


In [35]:
recommendations = item_collaborative_recommender('Hajime no Ippo', user_item_matrix, item_similarity)
print(recommendations)

['Great Teacher Onizuka', 'Hunter x Hunter', 'Shijou Saikyou no Deshi Kenichi', 'One Piece', 'Eyeshield 21', 'Kenpuu Denki Berserk', 'Hunter x Hunter (2011)', 'Major S1', 'Fullmetal Alchemist: Brotherhood', 'Code Geass: Hangyaku no Lelouch']


In [64]:
# Create an empty dictionary to store recommendations
anime_recommendations_dict = {}

# Group the DataFrame by 'Anime Title' to avoid repeated computations
grouped_df = filtered_df.groupby('Anime Title')

# Loop through unique anime titles with tqdm for a progress bar
for anime_title, anime_group in tqdm(grouped_df, total=len(grouped_df), desc="Processing Anime Titles"):
    # Assuming you want to use the first anime_id for each anime_title (you may need to adjust this logic)
    anime_id = anime_group['anime_id'].iloc[0]
    
    # Generate recommendations for the current anime title
    recommendations = item_collaborative_recommender(anime_title, user_item_matrix, item_similarity)
    
    # Store the recommendations in the dictionary with anime_id as the key
    anime_recommendations_dict[anime_id] = recommendations

Processing Anime Titles: 100%|██████████| 9652/9652 [00:08<00:00, 1109.97it/s]


In [None]:
import pickle

# File path to save the dictionary
file_path = "anime_recommendations_item_knn_CF_10k.pkl"

# Save the dictionary using pickle
with open(file_path, 'wb') as f:
    pickle.dump(anime_recommendations_dict, f)

In [42]:
title2id = {x:y for x,y in zip(df['Anime Title'], df['anime_id'])}

In [60]:
def replace_anime_titles(user_dict, replacement_dict):
    for user_id, anime_list in user_dict.items():
        for i, anime_title in enumerate(anime_list):
            if anime_title in replacement_dict:
                user_dict[user_id][i] = replacement_dict[anime_title]
    return user_dict

In [61]:
cf_dict_recs = replace_anime_titles(anime_recommendations_dict, title2id)

In [69]:
import pickle

# File path to save the dictionary
file_path = "anime_recommendations_item_knn_CF_10k_num.pkl"

# Save the dictionary using pickle
with open(file_path, 'wb') as f:
    pickle.dump(cf_dict_recs, f)