Nama : Muhammad Aldi Surya Putra \
Asal : Universitas Pendidikan Indonesia

Anime Recommendation

# Mengambil dataset dari kaggle

## Menghubungkan colab dan kaggle

In [88]:
! pip install -q kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

## Download dan unzip dataset

In [89]:
! kaggle datasets download -d CooperUnion/anime-recommendations-database
! unzip anime-recommendations-database.zip -d anime

anime-recommendations-database.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  anime-recommendations-database.zip
replace anime/anime.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: anime/anime.csv         
replace anime/rating.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: anime/rating.csv        


In [90]:
# Import library yang dibutuhkan

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

# Data Exploratory

In [91]:
df = pd.read_csv('/content/anime/anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


## Mengecek informasi terkait dataset

In [92]:
print("*Jumlah kolom dan index*")
print(f"Index : {df.shape[0]}\n Kolom : {df.shape[1]}")
print("\n")
print("*Kolom pada dataframe*")
print(df.columns)
print("\n")
print("*Info pada kolom*")
print(df.info())
print("\n")
print("*Info dataframe*")
print(df.describe())

*Jumlah kolom dan index*
Index : 12294
 Kolom : 7


*Kolom pada dataframe*
Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')


*Info pada kolom*
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None


*Info dataframe*
           anime_id        rating       members
count  12294.000000  12064.000000  1.229400e+04
mean   14058.221653      6.473902  1.807134e+04
std    11455.294701      1.026746  5.482068e+04
min        1.000000      1.670000  5.000000e+00
25%     3484.250000     

# Data Preparation

## Mengatasi missing value pada dataset

### Jumlah missing value pada tiap kolom dataset

In [93]:
print(df.isnull().sum())

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


### Drop data yang missing

In [94]:
df.dropna(inplace=True)

In [95]:
print(df.isnull().sum())

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64


## Menghapus kolom - kolom yang tidak dibutuhkan

In [96]:
df.drop(['type', 'episodes', 'members'], axis=1, inplace=True)
df

Unnamed: 0,anime_id,name,genre,rating
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25
3,9253,Steins;Gate,"Sci-Fi, Thriller",9.17
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16
...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,4.15
12290,5543,Under World,Hentai,4.28
12291,5621,Violence Gekiga David no Hoshi,Hentai,4.88
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,4.98


## Sorting data berdasarkan 'anime_id'

In [97]:
df = df.sort_values('anime_id', ignore_index=True)

In [98]:
df

Unnamed: 0,anime_id,name,genre,rating
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",8.82
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",8.40
2,6,Trigun,"Action, Comedy, Sci-Fi",8.32
3,7,Witch Hunter Robin,"Action, Drama, Magic, Mystery, Police, Superna...",7.36
4,8,Beet the Vandel Buster,"Adventure, Fantasy, Shounen, Supernatural",7.06
...,...,...,...,...
12012,34476,Platonic Chain: Ansatsu Jikkouchuu,"Sci-Fi, Slice of Life",1.67
12013,34490,Sushi Azarashi,Comedy,3.00
12014,34503,Kochinpa! Dainiki,Comedy,3.40
12015,34514,Pokemon Generations,"Action, Adventure, Fantasy, Game, Kids",7.21


# Model Development

## Melakukan perhitungan IDF pada data kolom 'genre'

In [99]:
vectorizer = TfidfVectorizer()
genre_name = vectorizer.fit(df['genre'])
genre_name.get_feature_names()

['action',
 'adventure',
 'ai',
 'arts',
 'cars',
 'comedy',
 'dementia',
 'demons',
 'drama',
 'ecchi',
 'fantasy',
 'fi',
 'game',
 'harem',
 'hentai',
 'historical',
 'horror',
 'josei',
 'kids',
 'life',
 'magic',
 'martial',
 'mecha',
 'military',
 'music',
 'mystery',
 'of',
 'parody',
 'police',
 'power',
 'psychological',
 'romance',
 'samurai',
 'school',
 'sci',
 'seinen',
 'shoujo',
 'shounen',
 'slice',
 'space',
 'sports',
 'super',
 'supernatural',
 'thriller',
 'vampire',
 'yaoi',
 'yuri']

## Fit dan Transform kolom 'genre' menjadi vector

In [100]:
vectors = vectorizer.fit_transform(df['genre'])

## Mengubah vektor menjadi matriks

In [101]:
vectors.todense()

matrix([[0.31832632, 0.34131325, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.30162321, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.4901204 , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.32821436, 0.35191532, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]])

## Menghitung Cosine Similiarity pada matriks

In [102]:
cos_sim = cosine_similarity(vectors)

In [103]:
cos_sim.shape

(12017, 12017)

## Membuat Dataframe yang berisi Cosine Similiarity tiap anime

In [104]:
cos_sim_df = pd.DataFrame(vectors.todense(),
             columns=genre_name.get_feature_names(),
             index=df.name
).sample(len(genre_name.get_feature_names()), axis=1).sample(10, axis=0)

cos_sim_df

Unnamed: 0_level_0,parody,super,historical,supernatural,yuri,vampire,space,thriller,sci,mystery,harem,school,adventure,sports,slice,hentai,psychological,music,police,of,fi,martial,ecchi,drama,arts,samurai,ai,romance,seinen,demons,magic,horror,shounen,game,fantasy,shoujo,mecha,power,action,josei,dementia,cars,military,comedy,kids,yaoi,life
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
Happy Seven: The TV Manga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.806221,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.591614,0.0,0.0,0.0
Tesagure! Bukatsumono Spin-off Purupurun Sharumu to Asobou,0.55339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.41868,0.0,0.0,0.415719,0.0,0.0,0.0,0.0,0.415719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.415719
Toshokan Sensou: Kakumei no Tsubasa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.501863,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.396573,0.0,0.0,0.0,0.700786,0.315851,0.0,0.0,0.0
Itsuka Tenma no Kuro Usagi Special,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.895281,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.445502,0.0,0.0,0.0
Susie-chan to Marvy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.545869,0.837871,0.0,0.0
Trapp Ikka Monogatari Specials,0.0,0.0,0.554633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.546621,0.0,0.0,0.0,0.0,0.0,0.419147,0.0,0.0,0.0,0.466801,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Lupin III: Secret File,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.600078,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.664243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.445745,0.0,0.0,0.0
Cross Fight B-Daman,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Usagi-chan de Cue!!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.561009,0.0,0.0,0.0,0.0,0.44357,0.0,0.0,0.5364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35051,0.0,0.0,0.0,0.0,0.279164,0.0,0.0,0.0
Ranma ½,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.347718,0.0,0.0,0.0,0.0,0.347718,0.0,0.507304,0.0,0.0,0.507304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.282247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.207116,0.0,0.0,0.347718


## Membuat fungsi untuk mendapatkan rekomendasi anime

In [105]:
def anime_recommendation(df, animeName, k = 15):
    index = pd.Series(df.index, index = df["name"])
    index = index[~index.index.duplicated(keep = "last")]
    anime_index = index[animeName]
    sim_score = pd.DataFrame(cos_sim[anime_index], columns = ["rating"])
    sim_anime = sim_score.sort_values(by = "rating", ascending = False).  \
            iloc[1:k].index
    return pd.DataFrame(df[["name", "genre", "rating"]].iloc[sim_anime])
    

# Testing Model

## Melihat hasil rekomendasi 

In [106]:
anime_recommendation(df, 'One Piece')

Unnamed: 0,name,genre,rating
10866,One Piece: Episode of Sabo - 3 Kyoudai no Kizu...,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",7.78
8028,One Piece: Episode of Merry - Mou Hitori no Na...,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",8.29
11,One Piece,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",8.58
430,One Piece Movie 1,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.25
994,One Piece: Umi no Heso no Daibouken-hen,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.44
5264,One Piece Recap,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.34
1128,One Piece: Oounabara ni Hirake! Dekkai Dekkai ...,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.43
7456,One Piece: Glorious Island,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.35
469,Dragon Ball Movie 1: Shen Long no Densetsu,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.05
11162,One Piece: Adventure of Nebulandia,"Action, Adventure, Comedy, Fantasy, Shounen, S...",7.5
