In [1]:
import pandas as pd
import numpy as np 
from tqdm import tqdm
import pickle

In [2]:
df = pd.read_csv("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/users-score-2023.csv")

In [3]:
df2 = pd.read_json("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/fin_anime_dfv2.json")

In [4]:
anime_list = df2['anime_id'].tolist()

In [5]:
filtered_df = df[df['anime_id'].isin(anime_list)]

In [6]:
# Get unique anime and user IDs
anime_ids = filtered_df['anime_id'].unique()
user_ids = filtered_df['user_id'].unique()

# Create a dictionary to map anime IDs to indices
anime_id_to_index = {anime_id: index for index, anime_id in enumerate(anime_ids)}

In [7]:
class ThompsonSamplingPopularity:
    def __init__(self, num_anime):
        self.num_anime = num_anime
        self.alpha = np.ones(num_anime)  
        self.beta = np.ones(num_anime)   

    def update_parameters(self, user_ratings, user_interactions):
        self.alpha += user_ratings
        self.beta += user_interactions - user_ratings

    def recommend_top_popular_anime(self, anime_popularity, num_recommendations=50):
        sorted_anime_indices = np.argsort(anime_popularity)[::-1]  
        return sorted_anime_indices[:num_recommendations]

    def recommend_anime(self, user_ratings, user_interactions, anime_popularity):
        self.update_parameters(user_ratings, user_interactions)

        expected_theta = self.alpha / (self.alpha + self.beta)
        adjusted_theta = expected_theta * anime_popularity

        return self.recommend_top_popular_anime(adjusted_theta)

In [8]:
#Calculate popularity of each anime based on the total number of interactions
anime_interactions = filtered_df.groupby('anime_id')['user_id'].count().sort_values(ascending=False)
anime_popularity = anime_interactions.values / anime_interactions.values.sum()

# Initialize Thompson Sampling with popularity
thompson_sampling = ThompsonSamplingPopularity(len(anime_ids))

In [9]:
# Initialize an empty set to store unique recommended anime IDs
all_recommended_anime_ids = set()

# Iterate through user IDs with tqdm for progress tracking
for user_id in tqdm(user_ids, desc="Processing users"):
    user_data =filtered_df[filtered_df['user_id'] == user_id]
    user_ratings = np.zeros(len(anime_ids))
    user_interactions = np.zeros(len(anime_ids))

    for _, row in user_data.iterrows():
        anime_index = anime_id_to_index[row['anime_id']]
        user_ratings[anime_index] += row['rating']
        user_interactions[anime_index] += 1

    recommended_anime_indices = thompson_sampling.recommend_anime(user_ratings, user_interactions, anime_popularity)
    recommended_anime_ids = [anime_ids[index] for index in recommended_anime_indices]

    # Add recommended anime IDs to the set
    all_recommended_anime_ids.update(recommended_anime_ids)

# Convert the set to a list
all_recommended_anime_ids = list(all_recommended_anime_ids)

Processing users: 100%|██████████| 265292/265292 [46:59<00:00, 94.09it/s] 


In [41]:
pop_recs1 = all_recommended_anime_ids[0:15]
len(pop_recs1)

15

In [42]:
df3 = df2[df2['anime_id'].isin(pop_recs1)].drop_duplicates('Genres')
# Filter and drop rows containing 'Ecchi' and 'Hentai'
filtered_df = df3[~df3['Genres'].str.contains('Ecchi|Hentai')]

In [43]:
popular_dict_10 = filtered_df.sort_values("Favorites",ascending = False).head(15)

In [44]:
popular_dict_10

Unnamed: 0,anime_Rating,anime_Score,anime_Synopsis,plot,Producers,Licensors,Studios,Image URL,Episodes,Genres,...,Favorites,Aired,Members,Duration,text,anime_id,tokens,Name,image_y,imdb_name_basics_primaryName
4641,UNKNOWN,8.75,"Crime is timeless. By the year 2071, humanity ...","Cowboy Bebop (Japanese: カウボーイビバップ, Hepburn: Ka...",Bandai Visual,"Funimation, Bandai Entertainment",Sunrise,https://cdn.myanimelist.net/images/anime/4/196...,26.0,"Action, Award Winning, Sci-Fi",...,78525,"Apr 3, 1998 to Apr 24, 1999",1771505,24 min per ep,"Cowboy Bebop (Japanese: カウボーイビバップ, Hepburn: Ka...",1,"['cowboy', 'bebop', 'japanese', 'kaubo', 'ibib...",Cowboy Bebop,https://cdn.myanimelist.net/images/anime/4/196...,"Isshin Chiba, Kevin Seymour, Hajime Yatate, Un..."
4885,UNKNOWN,8.76,"In his father's absence, teenager Ippo Makunou...","Hajime no Ippo (はじめの一歩, lit. ""The First Step"")...",VAP,"Discotek Media, Geneon Entertainment USA",Madhouse,https://cdn.myanimelist.net/images/anime/4/863...,75.0,Sports,...,20143,"Oct 4, 2000 to Mar 27, 2002",546859,23 min per ep,"Hajime no Ippo (はじめの一歩, lit. ""The First Step"")...",263,"['hajime', 'ippo', 'hazimenoYi', 'Bu', 'light'...",Hajime no Ippo,https://cdn.myanimelist.net/images/anime/4/863...,"Ichirô Nagai, Kôhei Kiyasu, Paul St. Peter, Ke..."
6931,UNKNOWN,8.41,Hunters are specialized in a wide variety of f...,Hunter × Hunter (stylized as HUNTER×HUNTER and...,Fuji TV,VIZ Media,Nippon Animation,https://cdn.myanimelist.net/images/anime/1305/...,62.0,"Action, Adventure, Fantasy",...,10442,"Oct 16, 1999 to Mar 31, 2001",564771,23 min per ep,Hunter × Hunter (stylized as HUNTER×HUNTER and...,136,"['hunter', 'x', 'hunter', 'stylize', 'hunterxh...",Hunter x Hunter,https://cdn.myanimelist.net/images/anime/1305/...,"Junko Takeuchi, Annika Odegard, Carol-Anne Day..."
4877,UNKNOWN,8.0,"Yuuta Takemoto, a sophomore at an arts college...","Honey and Clover (Japanese: ハチミツとクローバー, Hepbur...","Dentsu, Genco, Fuji TV, Asmik Ace, Shueisha","VIZ Media, Discotek Media",J.C.Staff,https://cdn.myanimelist.net/images/anime/1301/...,24.0,"Comedy, Drama, Romance",...,4136,"Apr 15, 2005 to Sep 27, 2005",260166,23 min per ep,"Honey and Clover (Japanese: ハチミツとクローバー, Hepbur...",16,"['honey', 'clover', 'japanese', 'hachimitsutok...",Hachimitsu to Clover,https://cdn.myanimelist.net/images/anime/1301/...,"Sam Riegel, Tomokazu Sugita, Hiroshi Kamiya, C..."
6657,UNKNOWN,6.99,"Thought your life was bad? Sometimes, death is...",Gantz (stylized in all caps) is a Japanese man...,Fuji TV,"ADV Films, Funimation",Gonzo,https://cdn.myanimelist.net/images/anime/13/59...,13.0,"Action, Drama, Horror, Sci-Fi",...,2360,"Apr 13, 2004 to Jun 22, 2004",347630,22 min per ep,Gantz (stylized in all caps) is a Japanese man...,384,"['gantz', 'stylize', 'cap', 'japanese', 'manga...",Gantz,https://cdn.myanimelist.net/images/anime/13/59...,"Hiroshi Kamiya, John Gremillion, Chris Patton,..."
4951,UNKNOWN,8.08,While searching through his grandfather's atti...,"Hikaru no Go (ヒカルの碁, lit. Hikaru's Go) is a Ja...","TV Tokyo, Dentsu, Dream Force",VIZ Media,Pierrot,https://cdn.myanimelist.net/images/anime/12/78...,75.0,"Comedy, Drama, Supernatural",...,2341,"Oct 10, 2001 to Mar 26, 2003",133599,23 min per ep,"Hikaru no Go (ヒカルの碁, lit. Hikaru's Go) is a Ja...",135,"['hikaru', 'hikarunoqi', 'light', 'hikaru', 'j...",Hikaru no Go,https://cdn.myanimelist.net/images/anime/12/78...,"Tomoko Kawakami, Sam Vincent, Brad Swaile, Mat..."
6398,UNKNOWN,8.38,"Another day, another bounty—such is the life o...","Cowboy Bebop (Japanese: カウボーイビバップ, Hepburn: Ka...","Sunrise, Bandai Visual",Sony Pictures Entertainment,Bones,https://cdn.myanimelist.net/images/anime/1439/...,1.0,"Action, Sci-Fi",...,1448,"Sep 1, 2001",360978,1 hr 55 min,"Cowboy Bebop (Japanese: カウボーイビバップ, Hepburn: Ka...",5,"['cowboy', 'bebop', 'japanese', 'kaubo', 'ibib...",Cowboy Bebop: Tengoku no Tobira,https://cdn.myanimelist.net/images/anime/1439/...,"Melissa Fahn, Tensai Okamura, Nicholas Guest, ..."
4815,PG-13 - Teens 13 or older,7.58,Mido Ban and Amano Ginji are known as the Get ...,"GetBackers (Japanese: ゲットバッカーズ -奪還屋-, Hepburn:...","TBS, Kodansha, Rondo Robe, Rakuonsha","ADV Films, Sentai Filmworks",Studio Deen,https://cdn.myanimelist.net/images/anime/4/754...,49.0,"Action, Mystery, Supernatural",...,867,"Oct 5, 2002 to Sep 20, 2003",119875,24 min per ep,"GetBackers (Japanese: ゲットバッカーズ -奪還屋-, Hepburn:...",132,"['getbacker', 'japanese', 'getsutobatsuka', 'z...",GetBackers,https://cdn.myanimelist.net/images/anime/1056/...,"Kelly Dealyn, Darren Pleavin, Matt Hislope, Sh..."
6904,UNKNOWN,7.12,"It is the year 2046, Noboru Terao and Mikako N...","The classical Japanese language (文語 bungo, ""li...",UNKNOWN,"ADV Films, GKIDS",CoMix Wave Films,https://cdn.myanimelist.net/images/anime/1980/...,1.0,"Award Winning, Drama, Romance, Sci-Fi",...,528,"Feb 2, 2002",152141,24 min,"The classical Japanese language (文語 bungo, ""li...",256,"['classical', 'japanese', 'language', 'Wen', '...",Hoshi no Koe,https://cdn.myanimelist.net/images/anime/1980/...,"Yoshihiro Hagiwara, Tenmon, Mika Shinohara, Su..."
4741,UNKNOWN,7.06,Hitomi Kanzaki is in a very depressed mood. Sh...,The Vision of Escaflowne (Japanese: 天空のエスカフローネ...,Atelier Musa,"Funimation, Bandai Entertainment","Bones, Sunrise",https://cdn.myanimelist.net/images/anime/1539/...,1.0,"Adventure, Drama, Fantasy, Romance, Sci-Fi",...,199,"Jun 24, 2000",67624,1 hr 37 min,The Vision of Escaflowne (Japanese: 天空のエスカフローネ...,393,"['vision', 'escaflowne', 'japanese', 'Tian', '...",Escaflowne,https://cdn.myanimelist.net/images/anime/1539/...,"Kazuki Akane, Tomokazu Seki, Kazuki Sekine, Jô..."


In [45]:
popular_dict = df[df['anime_id'].isin(all_recommended_anime_ids)].drop_duplicates('anime_id')[['anime_id', 'Anime Title']].to_dict(orient='records')

In [46]:
popular_dict_10 = filtered_df[filtered_df['anime_id'].isin(all_recommended_anime_ids)].drop_duplicates('anime_id')[['anime_id', 'Name']].to_dict(orient='records')

In [18]:
file_path = "popular_dict.pkl"

# Open the file in binary write mode and save the list using pickle.dump()
with open(file_path, 'wb') as f:
    pickle.dump(popular_dict, f)

In [48]:
file_path = "popular_dict_10.pkl"

# Open the file in binary write mode and save the list using pickle.dump()
with open(file_path, 'wb') as f:
    pickle.dump(popular_dict_10, f)

In [47]:
popular_dict_10

[{'anime_id': 1, 'Name': 'Cowboy Bebop'},
 {'anime_id': 393, 'Name': 'Escaflowne'},
 {'anime_id': 132, 'Name': 'GetBackers'},
 {'anime_id': 16, 'Name': 'Hachimitsu to Clover'},
 {'anime_id': 263, 'Name': 'Hajime no Ippo'},
 {'anime_id': 135, 'Name': 'Hikaru no Go'},
 {'anime_id': 5, 'Name': 'Cowboy Bebop: Tengoku no Tobira'},
 {'anime_id': 384, 'Name': 'Gantz'},
 {'anime_id': 256, 'Name': 'Hoshi no Koe'},
 {'anime_id': 136, 'Name': 'Hunter x Hunter'}]