## Merge

In [247]:
import pandas as pd

mal = pd.read_csv('data/anime_mal.csv')
jikan = pd.read_csv('data/anime_jikan.csv') # Contains "pending approval" and some deleted anime

anime = pd.merge(mal, jikan, left_on='id', right_on='mal_id', how='outer', indicator=True)

print(anime._merge.value_counts())

both          20741
right_only     3852
left_only         0
Name: _merge, dtype: int64


### Merging rules

Rule of thumb:
- MyAnimeList API sempre té els resultats actualitzats, Jikan API pot tenir algun una mica endarrerit
- Els Strings són més macos en Jikan

Notes
- num_scoring_users també apareix quan no té nota
- end_date té més valors (movies acabades en 1 dia) i és més fàcil d'usar
- num_episodes tenia 0 falsos...
- start_season sembla que coincideix amb "Seasonal", l'altre és només "Premiered"

In [252]:
mal_cols = ['mean', 'num_scoring_users', 'start_date', 'end_date', 'num_list_users', 'num_favorites', 'average_episode_duration', 'sfw',
            'start_season_year', 'start_season_season', 'broadcast_day_of_the_week', 'broadcast_start_time', 'created_at', 'updated_at', 'main_picture_medium']
            
jikan_cols = ['mal_id', 'title', 'type', 'status', 'episodes', 'source', 'rating', 'genres', 'themes', 'demographics', 'studios', 'producers', 'licensors',
              'synopsis', 'background', 'title_english', 'title_japanese', 'title_synonyms', 'url', 'trailer_url']

# title and synopsis from mal too

dubte = ['start_season_year', 'start_season_season']

anime = pd.merge(mal[['id'] + mal_cols], jikan[jikan_cols], left_on='id', right_on='mal_id').drop(columns=['id'])

order = ['mal_id', 'title', 'type', 'mean', 'num_scoring_users',                    # 10 Most important attributes, 
         'status', 'episodes', 'start_date', 'end_date', 'source',                  # appearing first on kaggle
         
         'num_list_users', 'num_favorites', 'average_episode_duration',             # Other important
         'rating', 'sfw', 'start_season_year', 'start_season_season',               # attributes
         'broadcast_day_of_the_week', 'broadcast_start_time',
         
         'genres', 'themes', 'demographics', 'studios', 'producers', 'licensors',   # Multivalued attributes
         'synopsis', 'background', 'created_at', 'updated_at',                      # Description, MyAnimeList edits
         
         'main_picture_medium', 'url', 'trailer_url',                               # Media data
         'title_english', 'title_japanese', 'title_synonyms']                       # Other titles

anime = anime[order]

anime.rename(columns={'mean': 'score', 'num_scoring_users': 'scored_by', 'start_date': 'airing_from', 'end_date': 'airing_to', 'num_list_users': 'members',
                      'num_favorites': 'favorites', 'average_episode_duration': 'episode_duration', 'start_season_year': 'start_year',
                      'start_season_season': 'start_season', 'broadcast_day_of_the_week': 'broadcast_day', 'broadcast_start_time': 'broadcast_time',
                      'created_at': 'mal_created_at', 'updated_at': 'mal_updated_at', 'main_picture_medium': 'picture_url', 'url': 'mal_url'}, inplace=True)

print(anime.shape)

anime.to_csv('data/anime.csv', index=False)

anime.head(1)

(20741, 35)


Unnamed: 0,mal_id,title,type,score,scored_by,status,episodes,airing_from,airing_to,source,members,favorites,episode_duration,rating,sfw,start_year,start_season,broadcast_day,broadcast_time,genres,themes,demographics,studios,producers,licensors,synopsis,background,mal_created_at,mal_updated_at,picture_url,mal_url,trailer_url,title_english,title_japanese,title_synonyms
0,5114,Fullmetal Alchemist: Brotherhood,TV,9.13,1865027,Finished Airing,64.0,2009-04-05,2010-07-04,Manga,2922030,204080,0 days 00:24:20,R - 17+ (violence & profanity),True,2009.0,spring,sunday,17:00:00,"['Action', 'Adventure', 'Drama', 'Fantasy']",['Military'],['Shounen'],['Bones'],"['Aniplex', 'Square Enix', 'Mainichi Broadcast...","['Funimation', 'Aniplex of America']",After a horrific alchemy experiment goes wrong...,,2008-08-21 03:35:22+00:00,2022-04-18 05:06:13+00:00,https://api-cdn.myanimelist.net/images/anime/1...,https://myanimelist.net/anime/5114/Fullmetal_A...,https://www.youtube.com/watch?v=--IcmZkvL0Q,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,['Hagane no Renkinjutsushi: Fullmetal Alchemis...


## Load Data

In [263]:
import pandas as pd
import ast

anime = pd.read_csv('data/anime.csv')

for col in ['episodes', 'start_year']:
    anime[col] = anime[col].astype('Int64')

for col in ['airing_from', 'airing_to']:
    anime[col] = pd.to_datetime(anime[col]).dt.date

anime['episode_duration'] = pd.to_timedelta(anime['episode_duration'])

anime['broadcast_time'] = pd.to_datetime(anime['broadcast_time']).dt.time

for col in ['genres', 'themes', 'demographics', 'studios', 'producers', 'licensors', 'title_synonyms']:
    anime[col] = anime[col].apply(ast.literal_eval)

for col in ['mal_created_at', 'mal_updated_at']:
    anime[col] = pd.to_datetime(anime[col])

anime.head(1)

Unnamed: 0,mal_id,title,type,score,scored_by,status,episodes,airing_from,airing_to,source,members,favorites,episode_duration,rating,sfw,start_year,start_season,broadcast_day,broadcast_time,genres,themes,demographics,studios,producers,licensors,synopsis,background,mal_created_at,mal_updated_at,picture_url,mal_url,trailer_url,title_english,title_japanese,title_synonyms
0,5114,Fullmetal Alchemist: Brotherhood,TV,9.13,1865027,Finished Airing,64,2009-04-05,2010-07-04,Manga,2922030,204080,0 days 00:24:20,R - 17+ (violence & profanity),True,2009,spring,sunday,17:00:00,"[Action, Adventure, Drama, Fantasy]",[Military],[Shounen],[Bones],"[Aniplex, Square Enix, Mainichi Broadcasting S...","[Funimation, Aniplex of America]",After a horrific alchemy experiment goes wrong...,,2008-08-21 03:35:22+00:00,2022-04-18 05:06:13+00:00,https://api-cdn.myanimelist.net/images/anime/1...,https://myanimelist.net/anime/5114/Fullmetal_A...,https://www.youtube.com/watch?v=--IcmZkvL0Q,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,[Hagane no Renkinjutsushi: Fullmetal Alchemist...
