# Recommendations from MyAnimeList Dataset

Original Dataset: https://www.kaggle.com/datasets/svanoo/myanimelist-dataset

In [1]:
import pandas as pd

# Anime Info Dataset

In [2]:
animes_df = pd.read_csv('../data/myanimelist_info_original.tsv', sep='\t')
print(animes_df.columns)
print(animes_df.shape)
animes_df.head(3)

Index(['anime_id', 'anime_url', 'title', 'synopsis', 'main_pic', 'type',
       'source_type', 'num_episodes', 'status', 'start_date', 'end_date',
       'season', 'studios', 'genres', 'score', 'score_count', 'score_rank',
       'popularity_rank', 'members_count', 'favorites_count', 'watching_count',
       'completed_count', 'on_hold_count', 'dropped_count',
       'plan_to_watch_count', 'total_count', 'score_10_count',
       'score_09_count', 'score_08_count', 'score_07_count', 'score_06_count',
       'score_05_count', 'score_04_count', 'score_03_count', 'score_02_count',
       'score_01_count', 'clubs', 'pics'],
      dtype='object')
(13379, 38)


Unnamed: 0,anime_id,anime_url,title,synopsis,main_pic,type,source_type,num_episodes,status,start_date,...,score_08_count,score_07_count,score_06_count,score_05_count,score_04_count,score_03_count,score_02_count,score_01_count,clubs,pics
0,2366,https://myanimelist.net/anime/2366/Touma_Kishi...,Touma Kishinden Oni,Shuramaru is hated and feared by the villagers...,https://cdn.myanimelist.net/images/anime/9/829...,TV,Game,25.0,Finished Airing,1995-10-05 00:00:00,...,0,0,0,0,0,0,0,0,14045|10778|342,https://cdn.myanimelist.net/images/anime/9/544...
1,4940,https://myanimelist.net/anime/4940/Sabaku_no_K...,Sabaku no Kaizoku! Captain Kuppa,"Sometime in the future, the world was complete...",https://cdn.myanimelist.net/images/anime/9/736...,TV,Manga,26.0,Finished Airing,2001-08-13 00:00:00,...,0,0,0,0,0,0,0,0,8494,https://cdn.myanimelist.net/images/anime/9/736...
2,50285,https://myanimelist.net/anime/50285/On_Air_Dek...,On Air Dekinai!,"Set in 2014, the anime follows the adventures ...",https://cdn.myanimelist.net/images/anime/1021/...,TV,Manga,12.0,Currently Airing,2022-01-10 00:00:00,...,0,0,0,0,0,0,0,0,27907|8652,https://cdn.myanimelist.net/images/anime/1021/...


In [3]:
# Drop rows with missing values
animes_df = animes_df.dropna()

# Keep columns that are necessary
animes_df = animes_df[['anime_id', 'title', 'type', 'num_episodes', 'anime_url', 'main_pic', 'synopsis', 'score', 'score_rank', 'popularity_rank', 'favorites_count']]

# Sort by popularity rank
animes_df = animes_df.sort_values(by='popularity_rank')

print(animes_df.shape)
animes_df.head()

(3477, 11)


Unnamed: 0,anime_id,title,type,num_episodes,anime_url,main_pic,synopsis,score,score_rank,popularity_rank,favorites_count
3672,16498,Shingeki no Kyojin,TV,25.0,https://myanimelist.net/anime/16498/Shingeki_n...,https://cdn.myanimelist.net/images/anime/10/47...,"Centuries ago, mankind was slaughtered to near...",8.52,103.0,1,148196
1979,1535,Death Note,TV,37.0,https://myanimelist.net/anime/1535/Death_Note,https://cdn.myanimelist.net/images/anime/9/945...,"Brutal murders, petty thefts, and senseless vi...",8.61,65.0,2,151551
4334,5114,Fullmetal Alchemist: Brotherhood,TV,64.0,https://myanimelist.net/anime/5114/Fullmetal_A...,https://cdn.myanimelist.net/images/anime/1223/...,After a horrific alchemy experiment goes wrong...,9.03,1.0,3,197551
3922,30276,One Punch Man,TV,12.0,https://myanimelist.net/anime/30276/One_Punch_Man,https://cdn.myanimelist.net/images/anime/12/76...,The seemingly unimpressive Saitama has a rathe...,8.49,111.0,4,56545
4279,11757,Sword Art Online,TV,25.0,https://myanimelist.net/anime/11757/Sword_Art_...,https://cdn.myanimelist.net/images/anime/11/39...,"In the year 2022, virtual reality has progress...",7.19,2940.0,5,63171


# Anime Recommendations Dataset

In [4]:
anime_recs_df = pd.read_csv('../data/myanimelist_recs_original.tsv', sep='\t')
print(anime_recs_df.columns)
print(anime_recs_df.shape)
anime_recs_df.head(3)

Index(['animeA', 'animeB', 'recommendation', 'recommendation_url',
       'num_recommenders', 'related', 'relation_type'],
      dtype='object')
(214271, 7)


Unnamed: 0,animeA,animeB,recommendation,recommendation_url,num_recommenders,related,relation_type
0,20955,16371,0,,,1,Other
1,40761,10087,0,,,1,Other
2,31675,20583,0,,,1,Other


In [5]:
pd.set_option('display.max_colwidth', None)

# Drop rows with where recommendation url is missing
anime_recs_df = anime_recs_df.dropna(subset=['recommendation_url'])

# Keep columns that are necessary
anime_recs_df = anime_recs_df[['animeA', 'animeB', 'recommendation_url']]

print(anime_recs_df.shape)
anime_recs_df.head(3)

(86540, 3)


Unnamed: 0,animeA,animeB,recommendation_url
127731,7,6,https://myanimelist.net/recommendations/anime/6-7
127732,6,7,https://myanimelist.net/recommendations/anime/6-7
127733,1,20,https://myanimelist.net/recommendations/anime/1-20


In [6]:
# Create new csv file with cleaned data
animes_df.to_csv('../data/myanimelist_info_cleaned.csv', index=False)
anime_recs_df.to_csv('../data/myanimelist_recs_cleaned.csv', index=False)