# Anime character description 

- 애니메이션 캐릭터 dataset에서 description 추출
- Description 문자열 벡터화
- 최애캐와 다른 모든 캐릭터 description 사이 코사인 유사도 추출
- 유사도 기반 top 20 characters 추천

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
characters_data = pd.read_csv('/content/drive/MyDrive/CUAI_summer/character_full.csv')

In [3]:
characters_data.head(3)

Unnamed: 0,ID,CharName,Gender,HairColor,Tags,Description,url,MAL_ID
0,0,L,Male,Black,"Analytical,Barefoot,Detectives,EyeBags,SweetTooth","Secretive, meticulous and cunning, L's desire ...",https://www.anime-planet.com/characters/l-deat...,1535.0
1,98,Kakashi HATAKE,Male,Grey,"Adults,Anti-GravityHair,Beastmasters,ExoticEye...",Kakashi is a jounin ninja from Konoha who is k...,https://www.anime-planet.com/characters/kakash...,34566.0
2,348,Light YAGAMI,Male,Brown,"Analytical,BigEgo,Charismatic,GodComplex,Manip...","Armed with the power of Ryuk's Death Note, Lig...",https://www.anime-planet.com/characters/light-...,1535.0


In [4]:
characters_data['Description'].head(3)

0    Secretive, meticulous and cunning, L's desire ...
1    Kakashi is a jounin ninja from Konoha who is k...
2    Armed with the power of Ryuk's Death Note, Lig...
Name: Description, dtype: object

In [5]:
tfidf = TfidfVectorizer(stop_words='english')    # 객체 지정. 'the', 'a' 같은 stop words 제거

characters_data['Description'] = characters_data['Description'].fillna('')    # NaN은 공백문자열로 변환

tfidf_matrix = tfidf.fit_transform(characters_data['Description'])    # 구축

tfidf_matrix.shape

(13888, 9396)

In [6]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
indices = pd.Series(characters_data.index, index = characters_data['CharName']).drop_duplicates()

In [8]:
def get_recommendations(CharName, cosine_sim=cosine_sim):

  # character name에 해당하는 character의 index 불러오기
  idx = indices[CharName]

  # idx character와 모든 character 사이 코사인 유사도 리스트 생성
  sim_scores = list(enumerate(cosine_sim[idx]))

  # 코사인 유사도 기준 정렬
  sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)

  # 가장 유사한 점수 10개 반환
  sim_scores = sim_scores[1:21]

  # character의 인덱스 반환
  char_indices = [i[0] for i in sim_scores]

  # top 20 character 반환
  return characters_data['CharName'].iloc[char_indices], sim_scores

In [9]:
get_recommendations('Light YAGAMI')

(3050       Sachiko YAGAMI
 599                Kizaru
 233         Ritsu TAINAKA
 5387       Kazunobu CHIBA
 2233          Johnny GILL
 1433        Heiji HATTORI
 243      Tsumugi KOTOBUKI
 445      Silvers RAYLEIGH
 1534               Sosuke
 10962            Cybersix
 2558                Dorry
 2058       Chitose HIBIYA
 117           Mio AKIYAMA
 2219                Genzo
 161          Azusa NAKANO
 803       Pesche GUATICHE
 7632            The Raven
 8446        Heizo HATTORI
 294        Kagami HIIRAGI
 3195         Yuusaku KUDO
 Name: CharName, dtype: object,
 [(3050, 0.2292060802179585),
  (599, 0.14666905552538628),
  (233, 0.11982614856625073),
  (5387, 0.11778749919925739),
  (2233, 0.10582935961816688),
  (1433, 0.0988748791891399),
  (243, 0.0979099322900219),
  (445, 0.0954233418060913),
  (1534, 0.08921436100748942),
  (10962, 0.0846446325776154),
  (2558, 0.07728473204253333),
  (2058, 0.07707414017698819),
  (117, 0.07670302981368075),
  (2219, 0.07627830515501613),
  (

# Anime Synopsis 

- Anime dataset에서 synopsis 추출
- Synopsis 문자열 벡터화
- 최애 Anime와 다른 모든 Anime description 사이 코사인 유사도 추출
- 유사도 기반 top 20 Anime 추천

In [10]:
anime_data = pd.read_csv('/content/drive/MyDrive/CUAI_summer/anime_full.csv')

In [11]:
anime_data.head(3)

Unnamed: 0,MAL_ID,Name,Genres,synopsis,Type,Source,Year
0,1,Cowboy Bebop,"Action,Adventure,Comedy,Drama,Sci-Fi,Space","In the year 2071, humanity has colonized sever...",TV,Original,1990.0
1,5,Cowboy Bebop: Tengoku no Tobira,"Action,Drama,Mystery,Sci-Fi,Space","other day, another bounty—such is the life of ...",Movie,Original,2000.0
2,6,Trigun,"Action,Sci-Fi,Adventure,Comedy,Drama,Shounen","Vash the Stampede is the man with a $$60,000,0...",TV,Manga,1990.0


In [12]:
anime_data['synopsis'].head(3)

0    In the year 2071, humanity has colonized sever...
1    other day, another bounty—such is the life of ...
2    Vash the Stampede is the man with a $$60,000,0...
Name: synopsis, dtype: object

In [13]:
tfidf = TfidfVectorizer(stop_words='english')    # 객체 지정. 'the', 'a' 같은 stop words 제거

anime_data['synopsis'] = anime_data['synopsis'].fillna('')    # NaN은 공백문자열로 변환

tfidf_matrix2 = tfidf.fit_transform(anime_data['synopsis'])    # 구축

tfidf_matrix2.shape

(12181, 40927)

In [14]:
cosine_sim2 = linear_kernel(tfidf_matrix2, tfidf_matrix2)

In [15]:
indices2 = pd.Series(anime_data.index, index = anime_data['Name']).drop_duplicates()

In [16]:
def get_recommendations_anime(Name, cosine_sim=cosine_sim2):

  # Anime name에 해당하는 Anime의 index 불러오기
  idx2 = indices2[Name]

  # idx Anime와 모든 Anime 사이 코사인 유사도 리스트 생성
  sim_scores2 = list(enumerate(cosine_sim2[idx2]))

  # 코사인 유사도 기준 정렬
  sim_scores2 = sorted(sim_scores2, key = lambda x: x[1], reverse = True)

  # 가장 유사한 점수 20개 반환
  sim_scores2 = sim_scores2[1:21]

  # character의 인덱스 반환
  anime_indices2 = [i[0] for i in sim_scores2]

  # top 20 character 반환
  return [anime_data['Name'].iloc[anime_indices2], sim_scores2]

In [17]:
get_recommendations_anime('Death Note')

[2106                          Death Note: Rewrite
 2343                                   Soul Eater
 668                          Shinigami no Ballad.
 1463                   YAT Anshin! Uchuu Ryokou 2
 2249                               Kite Liberator
 8717                           Dia Horizon (Kabu)
 11864      Nissan Note x The World of Golden Eggs
 494                               Yami no Matsuei
 651                  Bleach: Memories in the Rain
 1093                       Yami no Shihosha Judge
 5728                             Ai Tenchi Muyou!
 1800                                  Zombie-Loan
 9257              Pochitto Hatsumei: Pikachin-Kit
 6575     Persona 3 the Movie 4: Winter of Rebirth
 9787                                       Sketch
 3407         Onegai☆Teacher: Marie, Ai no Gekijou
 8068                 Owara Nai Chinkonka wo Utaou
 3575                   Bleach Movie 4: Jigoku-hen
 340                                         Gantz
 4064                       Da 

#Character Recommender
#Anime Recommender

In [18]:
get_recommendations('Light YAGAMI')

(3050       Sachiko YAGAMI
 599                Kizaru
 233         Ritsu TAINAKA
 5387       Kazunobu CHIBA
 2233          Johnny GILL
 1433        Heiji HATTORI
 243      Tsumugi KOTOBUKI
 445      Silvers RAYLEIGH
 1534               Sosuke
 10962            Cybersix
 2558                Dorry
 2058       Chitose HIBIYA
 117           Mio AKIYAMA
 2219                Genzo
 161          Azusa NAKANO
 803       Pesche GUATICHE
 7632            The Raven
 8446        Heizo HATTORI
 294        Kagami HIIRAGI
 3195         Yuusaku KUDO
 Name: CharName, dtype: object,
 [(3050, 0.2292060802179585),
  (599, 0.14666905552538628),
  (233, 0.11982614856625073),
  (5387, 0.11778749919925739),
  (2233, 0.10582935961816688),
  (1433, 0.0988748791891399),
  (243, 0.0979099322900219),
  (445, 0.0954233418060913),
  (1534, 0.08921436100748942),
  (10962, 0.0846446325776154),
  (2558, 0.07728473204253333),
  (2058, 0.07707414017698819),
  (117, 0.07670302981368075),
  (2219, 0.07627830515501613),
  (

In [19]:
get_recommendations_anime('Death Note')

[2106                          Death Note: Rewrite
 2343                                   Soul Eater
 668                          Shinigami no Ballad.
 1463                   YAT Anshin! Uchuu Ryokou 2
 2249                               Kite Liberator
 8717                           Dia Horizon (Kabu)
 11864      Nissan Note x The World of Golden Eggs
 494                               Yami no Matsuei
 651                  Bleach: Memories in the Rain
 1093                       Yami no Shihosha Judge
 5728                             Ai Tenchi Muyou!
 1800                                  Zombie-Loan
 9257              Pochitto Hatsumei: Pikachin-Kit
 6575     Persona 3 the Movie 4: Winter of Rebirth
 9787                                       Sketch
 3407         Onegai☆Teacher: Marie, Ai no Gekijou
 8068                 Owara Nai Chinkonka wo Utaou
 3575                   Bleach Movie 4: Jigoku-hen
 340                                         Gantz
 4064                       Da 