## Characters Cleaning

In [1]:
import pandas as pd
import numpy as np

characters = pd.read_json('data/raw/characters.json')

# Drop Duplicates
old_size = characters.shape[0]
characters = characters.drop_duplicates(subset=['mal_id']).reset_index(drop=True)
print('Duplicates:', old_size - characters.shape[0])

# Missing name kanji
characters['name_kanji'] = characters['name_kanji'].replace('N/A', np.nan).replace(' ', np.nan)

# Missing about
no_va = 'No voice actors have been added to this character. Help improve our database by searching for a voice actor, ' \
            'and adding this character to their roles .'
characters['about'] = characters['about'].replace('', np.nan).replace(no_va, np.nan)

# Simplify main_picture, delete default. Options: .jpg, .webp, t.webp
default_image = 'https://cdn.myanimelist.net/img/sp/icon/apple-touch-icon-256.png'
characters['main_picture'] = characters['images'].str['jpg'].str['image_url'].replace(default_image, np.nan)
characters.drop(columns=['images'], inplace=True)

# Better order
order = ['mal_id', 'name', 'name_kanji', 'nicknames', 'favorites', 'about', 'main_picture', 'url']
characters = characters[order]

# Sort by desc favorites and asc mal_id
characters = characters.sort_values(['favorites', 'mal_id'], ascending=[False, True]).reset_index(drop=True)

# Save as csv
characters.to_csv('data/characters.csv', index=False)

print(characters.shape)

characters.head(3)

Duplicates: 66
(146049, 8)


Unnamed: 0,mal_id,name,name_kanji,nicknames,favorites,about,main_picture,url
0,417,Lelouch Lamperouge,ルルーシュ・ランペルージ,"[Lelouch vi Britannia, Zero, Lulu, Black Princ...",156066,"Age: 17 (first season), 18 (second season)\nDa...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/417/Lelouch_...
1,45627,Levi,リヴァイ,[],130849,"Birthday: December 25\nHeight: 160 cm (5'3"")\n...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/45627/Levi
2,71,L Lawliet,エル ローライト,"[Ryuga Hideki, Ryuzaki, Eraldo Coil, Deneuve]",119353,"Birthday: October 31, 1979 (1982 in anime)\nZo...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/71/L_Lawliet


# Load Characters

In [2]:
import pandas as pd
import ast

characters = pd.read_csv('data/characters.csv')

characters['nicknames'] = characters['nicknames'].apply(ast.literal_eval)

characters.head(3)

Unnamed: 0,mal_id,name,name_kanji,nicknames,favorites,about,main_picture,url
0,417,Lelouch Lamperouge,ルルーシュ・ランペルージ,"[Lelouch vi Britannia, Zero, Lulu, Black Princ...",156066,"Age: 17 (first season), 18 (second season)\nDa...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/417/Lelouch_...
1,45627,Levi,リヴァイ,[],130849,"Birthday: December 25\nHeight: 160 cm (5'3"")\n...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/45627/Levi
2,71,L Lawliet,エル ローライト,"[Ryuga Hideki, Ryuzaki, Eraldo Coil, Deneuve]",119353,"Birthday: October 31, 1979 (1982 in anime)\nZo...",https://cdn.myanimelist.net/images/characters/...,https://myanimelist.net/character/71/L_Lawliet
