# **RS : CONTENT BASED FILTERING**

In [41]:
# Data

import pandas as pd
import numpy as np

movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

## **DATA EXPLORATION**

### **Movies**

In [42]:
# Sample Data
movies.sample(5)

Unnamed: 0,movieId,title,genres
4674,6982,Forbidden Games (Jeux interdits) (1952),Drama|War
2364,3134,Grand Illusion (La grande illusion) (1937),Drama|War
7221,73268,Daybreakers (2010),Action|Drama|Horror|Thriller
6790,60487,"It's the Great Pumpkin, Charlie Brown (1966)",Animation|Children|Comedy
4365,6383,"2 Fast 2 Furious (Fast and the Furious 2, The)...",Action|Crime|Thriller


In [43]:
# Miss Values

movies.isna().sum()

movieId    0
title      0
genres     0
dtype: int64

In [44]:
# Unique Movie

movies.nunique()

movieId    9742
title      9737
genres      951
dtype: int64

### **Rating**

In [45]:
# Samples

ratings.sample(5)

Unnamed: 0,userId,movieId,rating,timestamp
24367,526,4027,5.0,1502132876
7446,600,1206,5.0,1237712957
94602,334,58105,3.5,1267814078
31507,109,377,3.0,841107538
46918,254,48780,4.0,1180447443


In [46]:
# Missing Values

ratings.isna().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [47]:
# Unique

ratings.nunique()

userId         610
movieId       9724
rating          10
timestamp    85043
dtype: int64

## **DATA PREPARATION**

In [48]:
# New Dataset

film = movies.copy()
film['genres'] = film['genres'].str.split('|')
film.head()

# film['year'] = film['title'].apply(lambda x: x[-5:-1]) -> convert ke int

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]"
2,3,Grumpier Old Men (1995),"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]"
4,5,Father of the Bride Part II (1995),[Comedy]


In [49]:
# Explode

film = film.explode('genres', ignore_index=True)
film

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure
1,1,Toy Story (1995),Animation
2,1,Toy Story (1995),Children
3,1,Toy Story (1995),Comedy
4,1,Toy Story (1995),Fantasy
...,...,...,...
22079,193583,No Game No Life: Zero (2017),Fantasy
22080,193585,Flint (2017),Drama
22081,193587,Bungo Stray Dogs: Dead Apple (2018),Action
22082,193587,Bungo Stray Dogs: Dead Apple (2018),Animation


In [50]:
# Check Genres

film['genres'].value_counts()

genres
Drama                 4361
Comedy                3756
Thriller              1894
Action                1828
Romance               1596
Adventure             1263
Crime                 1199
Sci-Fi                 980
Horror                 978
Fantasy                779
Children               664
Animation              611
Mystery                573
Documentary            440
War                    382
Musical                334
Western                167
IMAX                   158
Film-Noir               87
(no genres listed)      34
Name: count, dtype: int64

In [51]:
# Drop No Genre

film = film[film['genres'] != '(no genres listed)']
film['genres'].value_counts()

genres
Drama          4361
Comedy         3756
Thriller       1894
Action         1828
Romance        1596
Adventure      1263
Crime          1199
Sci-Fi          980
Horror          978
Fantasy         779
Children        664
Animation       611
Mystery         573
Documentary     440
War             382
Musical         334
Western         167
IMAX            158
Film-Noir        87
Name: count, dtype: int64

In [52]:
# Crosstab Genre X Movie

xtab_film = pd.crosstab(
    film['title'], 
    film['genres']
)

xtab_film.head()

genres,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
'71 (2014),1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0
'Hellboy': The Seeds of Creation (2004),1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
'Round Midnight (1986),0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
'Salem's Lot (2004),0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0
'Til There Was You (1997),0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [53]:
xtab_film.loc["'71 (2014)"].values

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0])

In [54]:
# Measure Similarity

from sklearn.metrics import jaccard_score

print(jaccard_score (xtab_film.loc["'71 (2014)"].values,
      xtab_film.loc["'Hellboy': The Seeds of Creation (2004)"].values))

print(jaccard_score (xtab_film.loc["'71 (2014)"].values,
      xtab_film.loc["'Til There Was You (1997)"].values))

0.125
0.2


In [55]:
xtab_film.values

array([[1, 0, 0, ..., 1, 1, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [56]:
# Create CB-RS

from scipy.spatial.distance import pdist, squareform

jaccard_distance = pdist(xtab_film.values, metric='jaccard')
jaccard_matrix = squareform(jaccard_distance)
jaccard_sim = 1 - jaccard_matrix
df_jaccard = pd.DataFrame(
    jaccard_sim, 
    index=xtab_film.index,
    columns=xtab_film.index
)

df_jaccard

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),1.000000,0.125000,0.200000,0.333333,0.200000,0.0,0.0,0.25,0.166667,0.000000,...,0.40,0.40,0.20,0.20,0.200000,0.400000,0.400000,0.400000,0.000000,0.000000
'Hellboy': The Seeds of Creation (2004),0.125000,1.000000,0.000000,0.000000,0.000000,0.0,0.2,0.00,0.142857,0.285714,...,0.00,0.00,0.00,0.00,0.000000,0.142857,0.142857,0.142857,0.166667,0.166667
'Round Midnight (1986),0.200000,0.000000,1.000000,0.200000,0.333333,0.0,0.0,0.50,0.250000,0.000000,...,0.25,0.25,0.00,0.00,0.333333,0.000000,0.000000,0.000000,0.000000,0.333333
'Salem's Lot (2004),0.333333,0.000000,0.200000,1.000000,0.200000,0.0,0.0,0.25,0.166667,0.000000,...,0.40,0.75,0.50,0.50,0.200000,0.166667,0.166667,0.166667,0.000000,0.000000
'Til There Was You (1997),0.200000,0.000000,0.333333,0.200000,1.000000,0.5,0.0,0.50,0.666667,0.000000,...,0.25,0.25,0.00,0.00,0.333333,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXistenZ (1999),0.400000,0.142857,0.000000,0.166667,0.000000,0.0,0.0,0.00,0.000000,0.166667,...,0.20,0.20,0.25,0.25,0.000000,1.000000,0.500000,0.500000,0.000000,0.000000
xXx (2002),0.400000,0.142857,0.000000,0.166667,0.000000,0.0,0.0,0.00,0.000000,0.000000,...,0.50,0.20,0.25,0.25,0.000000,0.500000,1.000000,1.000000,0.000000,0.000000
xXx: State of the Union (2005),0.400000,0.142857,0.000000,0.166667,0.000000,0.0,0.0,0.00,0.000000,0.000000,...,0.50,0.20,0.25,0.25,0.000000,0.500000,1.000000,1.000000,0.000000,0.000000
¡Three Amigos! (1986),0.000000,0.166667,0.000000,0.000000,0.000000,0.0,0.5,0.00,0.250000,0.200000,...,0.00,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,1.000000,0.333333


In [57]:
# Make Recomendation

mov = 'I Love You, Beth Cooper (2009)'
sim = df_jaccard.loc[mov].sort_values(ascending=False)
sim.head(10)

title
L.A. Story (1991)                         1.0
Midsummer Night's Sex Comedy, A (1982)    1.0
Three Men and a Little Lady (1990)        1.0
Carry on Cruising (1962)                  1.0
Three to Tango (1999)                     1.0
Threesome (1994)                          1.0
Moonstruck (1987)                         1.0
Monster-in-Law (2005)                     1.0
Monsoon Wedding (2001)                    1.0
Mo' Money (1992)                          1.0
Name: I Love You, Beth Cooper (2009), dtype: float64

In [58]:
film.sample(10)['title'].values

array(["Jesus' Son (1999)", 'Possession (2002)', 'This Means War (2012)',
       'Starter for 10 (2006)', 'Mack, The (1973)',
       'Trip to the Moon, A (Voyage dans la lune, Le) (1902)',
       'Dead Like Me: Life After Death (2009)',
       'Transformers: Revenge of the Fallen (2009)',
       'Next Stop Wonderland (1998)', 'Why Him? (2016)'], dtype=object)

## **USER PROFILING**

In [59]:
# List Film Kesukaan - Faza

film_kesukaan = ['Invisible Man, The (1933)', 'Follow Me, Boys! (1966)',
       'Grudge, The (2004)',
       'Kung Fu Panda: Secrets of the Furious Five (2008)',
       'Lost in Space (1998)', 'Here on Earth (2000)',
       'Incredibles, The (2004)',
       'Dragons: Gift of the Night Fury (2011)', 'Moonraker (1979)',
       'Legends of the Fall (1994)']

In [60]:
# Crosstab

xtab_film_kesukaan = xtab_film.reindex(film_kesukaan)
xtab_film_kesukaan

genres,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
"Invisible Man, The (1933)",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
"Follow Me, Boys! (1966)",0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
"Grudge, The (2004)",0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0
Kung Fu Panda: Secrets of the Furious Five (2008),1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Lost in Space (1998),1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
Here on Earth (2000),0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
"Incredibles, The (2004)",1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Dragons: Gift of the Night Fury (2011),0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Moonraker (1979),1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
Legends of the Fall (1994),0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1


In [61]:
# User Profile
user_prof = xtab_film_kesukaan.mean().sort_values(ascending=False)
user_prof

genres
Action         0.4
Comedy         0.4
Adventure      0.4
Animation      0.3
Drama          0.3
Sci-Fi         0.3
Children       0.2
Thriller       0.2
Horror         0.2
Romance        0.2
Mystery        0.1
War            0.1
Western        0.1
Musical        0.0
IMAX           0.0
Fantasy        0.0
Documentary    0.0
Crime          0.0
Film-Noir      0.0
dtype: float64

In [62]:
# Create User Personality

from sklearn.metrics.pairwise import cosine_similarity

## Drop Film Kesukaan
xtab_film_subset = xtab_film.drop(film_kesukaan)

## Calculate Similarity
sim_array = cosine_similarity(
    user_prof.values.reshape(1,-1),
    xtab_film_subset
)

sim_df = pd.DataFrame(
    sim_array.T,
    index = xtab_film_subset.index,
    columns=['Similarity Score']
)

sim_df.sort_values(by='Similarity Score', ascending=False).head(10)

Unnamed: 0_level_0,Similarity Score
title,Unnamed: 1_level_1
The Lego Movie (2014),0.842152
"Twelve Tasks of Asterix, The (Les douze travaux d'Astérix) (1976)",0.842152
TMNT (Teenage Mutant Ninja Turtles) (2007),0.842152
Asterix and Cleopatra (Astérix et Cléopâtre) (1968),0.830278
Bolt (2008),0.830278
Alpha and Omega 3: The Great Wolf Games (2014),0.830278
Zootopia (2016),0.830278
Madagascar: Escape 2 Africa (2008),0.800044
Kung Fu Panda 2 (2011),0.800044
Catch That Kid (2004),0.784152


# **TUGAS**
1. Gunakan Data ``IMDB Top 1000``
2. Coba lakukan eksplorasi dari datanya
3. Buat sebuah function recomendation system berdasarkan genre dan directornya
4. Buat user profile dengan mengambil 10 sample secara random

In [63]:
new = pd.read_csv('imdb_top_1000.csv')
new.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


## **Eksplorasi Data**

In [72]:
new['Genre'].value_counts()

Genre
Drama                        85
Drama, Romance               37
Comedy, Drama                35
Comedy, Drama, Romance       31
Action, Crime, Drama         30
                             ..
Adventure, Thriller           1
Animation, Action, Sci-Fi     1
Action, Crime, Comedy         1
Animation, Crime, Mystery     1
Adventure, Comedy, War        1
Name: count, Length: 202, dtype: int64

In [73]:
new['Director'].value_counts()

Director
Alfred Hitchcock    14
Steven Spielberg    13
Hayao Miyazaki      11
Martin Scorsese     10
Akira Kurosawa      10
                    ..
Neill Blomkamp       1
Tomas Alfredson      1
Duncan Jones         1
Jacques Audiard      1
George Stevens       1
Name: count, Length: 548, dtype: int64

In [75]:
# Create New Dataset

pilem = new[['Series_Title','Director']]
pilem['Data'] = pilem['Director'] 
pilem = pilem[['Series_Title', 'Data']]
pilem

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pilem['Data'] = pilem['Director']


Unnamed: 0,Series_Title,Data
0,The Shawshank Redemption,Frank Darabont
1,The Godfather,Francis Ford Coppola
2,The Dark Knight,Christopher Nolan
3,The Godfather: Part II,Francis Ford Coppola
4,12 Angry Men,Sidney Lumet
...,...,...
995,Breakfast at Tiffany's,Blake Edwards
996,Giant,George Stevens
997,From Here to Eternity,Fred Zinnemann
998,Lifeboat,Alfred Hitchcock


## **CARA LAIN : RECOMENDATION SYSTEM - CONTENT BASED**

In [64]:
new.columns

Index(['Poster_Link', 'Series_Title', 'Released_Year', 'Certificate',
       'Runtime', 'Genre', 'IMDB_Rating', 'Overview', 'Meta_score', 'Director',
       'Star1', 'Star2', 'Star3', 'Star4', 'No_of_Votes', 'Gross'],
      dtype='object')

In [65]:
# Create New Dataset

pilem = new[['Series_Title','Director','Star1', 'Star2', 'Star3', 'Star4']]
pilem['Data'] = pilem['Director'] + ' ' + pilem['Star1'] + ' ' + pilem['Star2'] + ' ' +pilem['Star3'] + ' ' + pilem['Star4'] 
pilem = pilem[['Series_Title', 'Data']]
pilem

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pilem['Data'] = pilem['Director'] + ' ' + pilem['Star1'] + ' ' + pilem['Star2'] + ' ' +pilem['Star3'] + ' ' + pilem['Star4']


Unnamed: 0,Series_Title,Data
0,The Shawshank Redemption,Frank Darabont Tim Robbins Morgan Freeman Bob ...
1,The Godfather,Francis Ford Coppola Marlon Brando Al Pacino J...
2,The Dark Knight,Christopher Nolan Christian Bale Heath Ledger ...
3,The Godfather: Part II,Francis Ford Coppola Al Pacino Robert De Niro ...
4,12 Angry Men,Sidney Lumet Henry Fonda Lee J. Cobb Martin Ba...
...,...,...
995,Breakfast at Tiffany's,Blake Edwards Audrey Hepburn George Peppard Pa...
996,Giant,George Stevens Elizabeth Taylor Rock Hudson Ja...
997,From Here to Eternity,Fred Zinnemann Burt Lancaster Montgomery Clift...
998,Lifeboat,Alfred Hitchcock Tallulah Bankhead John Hodiak...


In [67]:
# Tf Idf Vectorizer

from sklearn.feature_extraction.text import TfidfVectorizer
vect = TfidfVectorizer() # Seberapa banyak aaron disebut di row/document terkait terhadap satu dataframe
dtm = vect.fit_transform(pilem['Data'])
dtm_df = pd.DataFrame(
    dtm.toarray(),
    columns=vect.get_feature_names_out()
)

dtm_df


Unnamed: 0,aamir,aaron,abalov,abbass,abbott,abdel,abdellatif,abdi,abdirahman,abel,...,émile,éric,ôkura,ôtomo,ôtsuka,ömer,özberk,özge,öztekin,ünel
0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.342933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
996,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
997,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
998,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [68]:
# Cosine Similarity

cos_sin = cosine_similarity(dtm_df)
pd.DataFrame(cos_sin)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,1.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.061009,0.000000
1,0.000000,1.000000,0.0,0.602929,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.038360,0.000000,0.0,0.000000,0.000000,0.000000,0.039994,0.0,0.000000,0.000000
2,0.000000,0.000000,1.0,0.000000,0.000000,0.0,0.000000,0.0,0.148477,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
3,0.000000,0.602929,0.0,1.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.070418
4,0.000000,0.000000,0.0,0.000000,1.000000,0.0,0.033733,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.031278,0.032386,0.000000,0.000000,0.0,0.031723,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.000000,0.048349,1.000000,0.048317,0.0,0.000000,0.000000
996,0.000000,0.039994,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.037463,0.101768,0.0,0.000000,0.050924,0.048317,1.000000,0.0,0.000000,0.098584
997,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,1.0,0.000000,0.000000
998,0.061009,0.000000,0.0,0.000000,0.031723,0.0,0.031107,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.028844,0.029865,0.000000,0.000000,0.0,1.000000,0.137971


In [69]:
    # Create Function To Get Recomendation

def recomendation (judul) :
    index_mov = pilem[pilem['Series_Title'] == judul].index.values[0]
    similar_film = list(enumerate(cos_sin[index_mov]))
    sorted_mov = sorted(similar_film, key=lambda x : x[1], reverse=True)
    fil = []
    for i in range (1,11) :
        fil.append(sorted_mov[i][0])
    return new[['Series_Title', 'IMDB_Rating']].iloc[fil]

In [70]:
recomendation('Apocalypse Now')

Unnamed: 0,Series_Title,IMDB_Rating
693,The Conversation,7.8
1,The Godfather,9.2
3,The Godfather: Part II,9.0
974,The Godfather: Part III,7.6
305,On the Waterfront,8.1
447,A Streetcar Named Desire,8.0
243,Catch Me If You Can,8.1
808,Le dîner de cons,7.7
280,Network,8.1
968,Falling Down,7.6
