In [10]:
import pandas as pd 
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import difflib
import string
from collections import Counter
from tqdm import tqdm


In [2]:
df = pd.read_csv("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/users-score-2023.csv")

In [3]:
anime_count = df.groupby('user_id')['anime_id'].count()

user_ids_to_drop = anime_count[anime_count < 10].index

filtered_df = df[~df['user_id'].isin(user_ids_to_drop)]

In [4]:
user_item_matrix = filtered_df.pivot_table(index='user_id', columns='Anime Title', values='rating')

user_item_matrix = user_item_matrix.fillna(0)

user_ratings_mean = user_item_matrix.mean(axis=1)
user_item_matrix_centered = user_item_matrix.sub(user_ratings_mean, axis=0)

item_similarity = cosine_similarity(user_item_matrix_centered.T)



In [7]:
def jaccard_similarity(title1, title2):
    title1_tokens = set(title1.lower().translate(str.maketrans('', '', string.punctuation)).split())
    title2_tokens = set(title2.lower().translate(str.maketrans('', '', string.punctuation)).split())
    intersection = len(title1_tokens.intersection(title2_tokens))
    union = len(title1_tokens.union(title2_tokens))
    return intersection / union if union else 0

def item_collaborative_recommender(anime_title, user_item_matrix, item_similarity, top_n=10):
    sim_scores = item_similarity[user_item_matrix.columns.get_loc(anime_title)]
    
    top_indices = np.argsort(sim_scores)[::-1][1:2*top_n+1]  
    
    similar_titles = []
    for idx in top_indices:
        title = user_item_matrix.columns[idx]
        if jaccard_similarity(anime_title, title) < 0.5:  
            similar_titles.append(title)
        if len(similar_titles) >= top_n:
            break
    
    return similar_titles[:top_n]


In [8]:
recommendations = item_collaborative_recommender('Elfen Lied', user_item_matrix, item_similarity)
print(recommendations)

['Death Note', 'Code Geass: Hangyaku no Lelouch', 'Fullmetal Alchemist', 'Elfen Lied: Tooriame nite Arui wa, Shoujo wa Ikani Shite Sono Shinjou ni Itatta ka? - Regenschauer', 'Code Geass: Hangyaku no Lelouch R2', 'Higurashi no Naku Koro ni', 'Claymore', 'Hellsing', 'Clannad', 'Highschool of the Dead']


In [11]:
# Create an empty dictionary to store recommendations
anime_recommendations_dict = {}

# Group the DataFrame by 'Anime Title' to avoid repeated computations
grouped_df = filtered_df.groupby('Anime Title')

# Loop through unique anime titles with tqdm for a progress bar
for anime_title, anime_group in tqdm(grouped_df, total=len(grouped_df), desc="Processing Anime Titles"):
    # Assuming you want to use the first anime_id for each anime_title (you may need to adjust this logic)
    anime_id = anime_group['anime_id'].iloc[0]
    
    # Generate recommendations for the current anime title
    recommendations = item_collaborative_recommender(anime_title, user_item_matrix, item_similarity)
    
    # Store the recommendations in the dictionary with anime_id as the key
    anime_recommendations_dict[anime_id] = recommendations

Processing Anime Titles: 100%|██████████| 16608/16608 [00:20<00:00, 811.63it/s]


In [14]:
anime_recommendations_dict

{51478: ['Adobe Student and Teacher Edition',
  'A lot of life',
  'After Hours',
  'A Viva Non Non',
  '32',
  'ABC Tenkiyohou',
  '2005',
  "A Doodlin' Song",
  'Accept',
  '(OO)'],
 20707: ['2010',
  'Ai Uta: Since 2007',
  '3-Nen C-Gumi 14-Ban Kubozono Chiyoko no Nyuukaku',
  '"Star"t',
  '7-kakan.',
  '3-D Heaven',
  '365-nichi no Love Song',
  '2',
  '"Tokyo"',
  '112 Sabsections of Skyline'],
 7669: ['"Bungaku Shoujo" Memoire',
  '"Bungaku Shoujo" Movie',
  'Cencoroll',
  'Denpa-teki na Kanojo',
  'Amagami SS: Tachibana Miya-hen - Imouto',
  'Another: The Other - Inga',
  "Arata naru Sekai: World's/Start/Load/End",
  'Black★Rock Shooter (OVA)',
  'Boku wa Tomodachi ga Sukunai: Yaminabe wa Bishoujo ga Zannen na Nioi',
  'Angel Beats! Another Epilogue'],
 8481: ['"Bungaku Shoujo" Kyou no Oyatsu: Hatsukoi',
  'Denpa-teki na Kanojo',
  'Amagami SS: Tachibana Miya-hen - Imouto',
  'Amagami SS',
  "Arata naru Sekai: World's/Start/Load/End",
  'Amagami SS+ Plus',
  'Black★Rock Shooter 

In [15]:
import pickle

# File path to save the dictionary
file_path = "anime_recommendations.pkl"

# Save the dictionary using pickle
with open(file_path, 'wb') as f:
    pickle.dump(anime_recommendations_dict, f)
