# 캐릭터 추천

In [177]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [178]:
characters_df = pd.read_csv('drive/MyDrive/CUAI_Summer_2022_RecSys/data/fixed/character_full.csv')
characters_df.head()

Unnamed: 0,ID,CharName,Gender,HairColor,Tags,Description,url,MAL_ID
0,0,L,Male,Black,"Analytical,Barefoot,Detectives,EyeBags,SweetTooth","Secretive, meticulous and cunning, L's desire ...",https://www.anime-planet.com/characters/l-deat...,1535.0
1,98,Kakashi HATAKE,Male,Grey,"Adults,Anti-GravityHair,Beastmasters,ExoticEye...",Kakashi is a jounin ninja from Konoha who is k...,https://www.anime-planet.com/characters/kakash...,34566.0
2,348,Light YAGAMI,Male,Brown,"Analytical,BigEgo,Charismatic,GodComplex,Manip...","Armed with the power of Ryuk's Death Note, Lig...",https://www.anime-planet.com/characters/light-...,1535.0
3,279,Naruto UZUMAKI,Male,Blonde,"Beastmasters,FaceMarkings,Headbands,Hot-Headed...",Naruto is a young ninja with a single dream: t...,https://www.anime-planet.com/characters/naruto...,35072.0
4,103,Edward ELRIC,Male,Blonde,"Alchemists,ArmBlades,ArtificialLimbs,Braids,Gl...",Known as a prodigy and one of the strongest al...,https://www.anime-planet.com/characters/edward...,121.0


In [179]:
characters_df['Tags'].head()

0    Analytical,Barefoot,Detectives,EyeBags,SweetTooth
1    Adults,Anti-GravityHair,Beastmasters,ExoticEye...
2    Analytical,BigEgo,Charismatic,GodComplex,Manip...
3    Beastmasters,FaceMarkings,Headbands,Hot-Headed...
4    Alchemists,ArmBlades,ArtificialLimbs,Braids,Gl...
Name: Tags, dtype: object

In [180]:
# Gender 소문자로 변환
characters_df['Gender'] = characters_df['Gender'].map(lambda x: x.lower())

# HairColor 소문자로 변환
characters_df['HairColor'] = characters_df['HairColor'].map(lambda x: x.lower())

# 'Tags'를 리스트로 변환
characters_df['Tags'] = characters_df['Tags'].map(lambda x: x.lower().split(','))

In [181]:
characters = characters_df.copy()

characters.set_index('CharName', inplace = True)
characters.drop(['ID', 'Description', 'url', 'MAL_ID'], axis = 1, inplace = True)
characters.head()

Unnamed: 0_level_0,Gender,HairColor,Tags
CharName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
L,male,black,"[analytical, barefoot, detectives, eyebags, sw..."
Kakashi HATAKE,male,grey,"[adults, anti-gravityhair, beastmasters, exoti..."
Light YAGAMI,male,brown,"[analytical, bigego, charismatic, godcomplex, ..."
Naruto UZUMAKI,male,blonde,"[beastmasters, facemarkings, headbands, hot-he..."
Edward ELRIC,male,blonde,"[alchemists, armblades, artificiallimbs, braid..."


In [182]:
# words 덩어리 생성
characters['bag_of_words'] = ''
columns = characters.columns
for index, row in characters.iterrows():
    words = ''
    for col in columns:
        if col == 'Tags':
            words = words + ' '.join(row[col]) + ' '
        else:
            words = words + row[col] + ' '
    row['bag_of_words'] = words


characters.drop(columns = [col for col in characters.columns if col!= 'bag_of_words'], inplace = True)

In [183]:
characters.head()

Unnamed: 0_level_0,bag_of_words
CharName,Unnamed: 1_level_1
L,male black analytical barefoot detectives eyeb...
Kakashi HATAKE,male grey adults anti-gravityhair beastmasters...
Light YAGAMI,male brown analytical bigego charismatic godco...
Naruto UZUMAKI,male blonde beastmasters facemarkings headband...
Edward ELRIC,male blonde alchemists armblades artificiallim...


In [184]:
characters.shape

(13888, 1)

In [185]:
# count_matrix 생성
count = CountVectorizer()
count_matrix = count.fit_transform(characters['bag_of_words'])

# charName index
indices = pd.Series(characters.index)
indices[:5]

0                 L
1    Kakashi HATAKE
2      Light YAGAMI
3    Naruto UZUMAKI
4      Edward ELRIC
Name: CharName, dtype: object

In [186]:
# cosine_similarity matrix 생성
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1.        , 0.10910895, 0.25197632, ..., 0.21821789, 0.        ,
        0.        ],
       [0.10910895, 1.        , 0.09622504, ..., 0.33333333, 0.14433757,
        0.10206207],
       [0.25197632, 0.09622504, 1.        , ..., 0.38490018, 0.16666667,
        0.        ],
       ...,
       [0.21821789, 0.33333333, 0.38490018, ..., 1.        , 0.28867513,
        0.        ],
       [0.        , 0.14433757, 0.16666667, ..., 0.28867513, 1.        ,
        0.1767767 ],
       [0.        , 0.10206207, 0.        , ..., 0.        , 0.1767767 ,
        1.        ]])

In [187]:
# character name받으면 상위 10개 character 추천
def recommendations(character_name, cosine_sim = cosine_sim):
    recommended_character = []
    
    # character name과 일치하는 index
    idx = indices[indices == character_name].index[0]

    # similarity scores 내림차순 정렬
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # 유사도 top10 index list 생성
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # 유사도 top10 리스트 생성
    for i in top_10_indexes:
        recommended_character.append(list(characters.index)[i])
        
    return recommended_character

In [188]:
recommendations('Light YAGAMI')

['Deishuu KAIKI',
 'Gakuhou ASANO',
 'Tsukasa SHISHIOU',
 'Seiya KANIE',
 'Johan LIEBERT',
 'Shen Qing Qiu',
 'Yu Wenzhou',
 'Akushima',
 'Fukusuke HIKYAKUYA',
 'Ichiya SUZAKU']

# 애니메이션 추천

추천된 애니매이션 캐릭터에 해당하는 애니메이션 추천

In [189]:
anime_df = pd.read_csv('drive/MyDrive/CUAI_Summer_2022_RecSys/data/fixed/anime_full.csv')
anime_df.head()

Unnamed: 0,MAL_ID,Name,Genres,synopsis,Type,Source,Year
0,1,Cowboy Bebop,"Action,Adventure,Comedy,Drama,Sci-Fi,Space","In the year 2071, humanity has colonized sever...",TV,Original,1990.0
1,5,Cowboy Bebop: Tengoku no Tobira,"Action,Drama,Mystery,Sci-Fi,Space","other day, another bounty—such is the life of ...",Movie,Original,2000.0
2,6,Trigun,"Action,Sci-Fi,Adventure,Comedy,Drama,Shounen","Vash the Stampede is the man with a $$60,000,0...",TV,Manga,1990.0
3,7,Witch Hunter Robin,"Action,Mystery,Police,Supernatural,Drama,Magic",ches are individuals with special powers like ...,TV,Original,2000.0
4,8,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",It is the dark century and the people are suff...,TV,Manga,2000.0


In [190]:
# 캐릭터 추천 받기
recommended_character = recommendations('Light YAGAMI')

# 추천 받은 캐릭터의 전체 df 가져오기
characters_df = characters_df[characters_df['CharName'].isin(recommended_character)]
characters_df = characters_df[['CharName','MAL_ID']]

# MAL_ID를 기준으로 characters_df, anime_df 병합
merge = pd.merge(characters_df, anime_df[['MAL_ID', 'Name']], on = 'MAL_ID', how = 'inner')
merge

Unnamed: 0,CharName,MAL_ID,Name
0,Johan LIEBERT,19.0,Monster
1,Gakuhou ASANO,34389.0,Koro-sensei Quest!
2,Deishuu KAIKI,21855.0,Hanamonogatari
3,Seiya KANIE,22147.0,Amagi Brilliant Park
4,Tsukasa SHISHIOU,38691.0,Dr. Stone
5,Shen Qing Qiu,38990.0,Chuan Shu Zijiu Zhinan
6,Akushima,23333.0,DRAMAtical Murder
7,Ichiya SUZAKU,32360.0,Qualidea Code
8,Fukusuke HIKYAKUYA,20.0,Naruto
9,Yu Wenzhou,33926.0,Quanzhi Gaoshou


In [191]:
# 유사도 탑10 순서로 merge df 정렬
sorterIndex = dict(zip(recommended_character, range(len(recommended_character)))) # 정렬을 위한 dictionary 생성
merge['sorter'] = merge['CharName'].map(sorterIndex) # 해당 dictionary를 이용해 정렬을 위한 열('sorter') 생성
merge.sort_values('sorter',inplace=True) # sorter를 기준으로 정렬
merge.drop('sorter', 1, inplace = True) # sorter 열 삭제
merge

  """


Unnamed: 0,CharName,MAL_ID,Name
2,Deishuu KAIKI,21855.0,Hanamonogatari
1,Gakuhou ASANO,34389.0,Koro-sensei Quest!
4,Tsukasa SHISHIOU,38691.0,Dr. Stone
3,Seiya KANIE,22147.0,Amagi Brilliant Park
0,Johan LIEBERT,19.0,Monster
5,Shen Qing Qiu,38990.0,Chuan Shu Zijiu Zhinan
9,Yu Wenzhou,33926.0,Quanzhi Gaoshou
6,Akushima,23333.0,DRAMAtical Murder
8,Fukusuke HIKYAKUYA,20.0,Naruto
7,Ichiya SUZAKU,32360.0,Qualidea Code


In [192]:
for anime in merge['Name']:
    print(anime)

Hanamonogatari
Koro-sensei Quest!
Dr. Stone
Amagi Brilliant Park
Monster
Chuan Shu Zijiu Zhinan
Quanzhi Gaoshou
DRAMAtical Murder
Naruto
Qualidea Code
