# Movie Recommender: A Content-Based Approach

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Reading and cleaning data

### Movie Rating Dataset

In [2]:
movie_rating_df = pd.read_csv('https://storage.googleapis.com/dqlab-dataset/movie_rating_df.csv')
movie_rating_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,tt0000001,short,Carmencita,Carmencita,0,1894.0,,1.0,"Documentary,Short",5.6,1608
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892.0,,5.0,"Animation,Short",6.0,197
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892.0,,4.0,"Animation,Comedy,Romance",6.5,1285
3,tt0000004,short,Un bon bock,Un bon bock,0,1892.0,,12.0,"Animation,Short",6.1,121
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893.0,,1.0,"Comedy,Short",6.1,2050
...,...,...,...,...,...,...,...,...,...,...,...
751609,tt9916538,movie,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,0,2019.0,,123.0,,8.4,5
751610,tt9916544,short,My Sweet Prince,My Sweet Prince,0,2019.0,,12.0,"Drama,Short",7.2,19
751611,tt9916576,tvEpisode,Destinee's Story,Destinee's Story,0,2019.0,,85.0,,6.0,9
751612,tt9916720,short,The Nun 2,The Nun 2,0,2019.0,,10.0,"Comedy,Horror,Mystery",5.6,49


In [3]:
print(movie_rating_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 751614 entries, 0 to 751613
Data columns (total 11 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   tconst          751614 non-null  object 
 1   titleType       751614 non-null  object 
 2   primaryTitle    751614 non-null  object 
 3   originalTitle   751614 non-null  object 
 4   isAdult         751614 non-null  int64  
 5   startYear       751614 non-null  float64
 6   endYear         16072 non-null   float64
 7   runtimeMinutes  751614 non-null  float64
 8   genres          486766 non-null  object 
 9   averageRating   751614 non-null  float64
 10  numVotes        751614 non-null  int64  
dtypes: float64(4), int64(2), object(5)
memory usage: 63.1+ MB
None


In [4]:
movie_rating_df['endYear'] = movie_rating_df['endYear'].fillna(movie_rating_df['startYear'])
movie_rating_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,tt0000001,short,Carmencita,Carmencita,0,1894.0,1894.0,1.0,"Documentary,Short",5.6,1608
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892.0,1892.0,5.0,"Animation,Short",6.0,197
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892.0,1892.0,4.0,"Animation,Comedy,Romance",6.5,1285
3,tt0000004,short,Un bon bock,Un bon bock,0,1892.0,1892.0,12.0,"Animation,Short",6.1,121
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893.0,1893.0,1.0,"Comedy,Short",6.1,2050
...,...,...,...,...,...,...,...,...,...,...,...
751609,tt9916538,movie,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,0,2019.0,2019.0,123.0,,8.4,5
751610,tt9916544,short,My Sweet Prince,My Sweet Prince,0,2019.0,2019.0,12.0,"Drama,Short",7.2,19
751611,tt9916576,tvEpisode,Destinee's Story,Destinee's Story,0,2019.0,2019.0,85.0,,6.0,9
751612,tt9916720,short,The Nun 2,The Nun 2,0,2019.0,2019.0,10.0,"Comedy,Horror,Mystery",5.6,49


In [5]:
movie_rating_df['genres'] = movie_rating_df['genres'].fillna('Unknown')
movie_rating_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,tt0000001,short,Carmencita,Carmencita,0,1894.0,1894.0,1.0,"Documentary,Short",5.6,1608
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892.0,1892.0,5.0,"Animation,Short",6.0,197
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892.0,1892.0,4.0,"Animation,Comedy,Romance",6.5,1285
3,tt0000004,short,Un bon bock,Un bon bock,0,1892.0,1892.0,12.0,"Animation,Short",6.1,121
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893.0,1893.0,1.0,"Comedy,Short",6.1,2050
...,...,...,...,...,...,...,...,...,...,...,...
751609,tt9916538,movie,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,0,2019.0,2019.0,123.0,Unknown,8.4,5
751610,tt9916544,short,My Sweet Prince,My Sweet Prince,0,2019.0,2019.0,12.0,"Drama,Short",7.2,19
751611,tt9916576,tvEpisode,Destinee's Story,Destinee's Story,0,2019.0,2019.0,85.0,Unknown,6.0,9
751612,tt9916720,short,The Nun 2,The Nun 2,0,2019.0,2019.0,10.0,"Comedy,Horror,Mystery",5.6,49


In [6]:
print(movie_rating_df.isnull().sum())

tconst            0
titleType         0
primaryTitle      0
originalTitle     0
isAdult           0
startYear         0
endYear           0
runtimeMinutes    0
genres            0
averageRating     0
numVotes          0
dtype: int64


### Director Writers Dataset

In [7]:
director_writers = pd.read_csv('https://storage.googleapis.com/dqlab-dataset/directors_writers.csv')
director_writers['director_name'] = director_writers['director_name'].apply(lambda row: row.split(','))
director_writers['writer_name'] = director_writers['writer_name'].apply(lambda row: row.split(','))
director_writers

Unnamed: 0,tconst,director_name,writer_name
0,tt0011414,[David Kirkland],"[John Emerson, Anita Loos]"
1,tt0011890,[Roy William Neill],"[Arthur F. Goodrich, Burns Mantle, Mary Murillo]"
2,tt0014341,"[Buster Keaton, John G. Blystone]","[Jean C. Havez, Clyde Bruckman, Joseph A. Mitc..."
3,tt0018054,[Cecil B. DeMille],[Jeanie Macpherson]
4,tt0024151,[James Cruze],"[Max Miller, Wells Root, Jack Jevne]"
...,...,...,...
981,tt9236688,[Kai Wessel],[Christian Jeltsch]
982,tt9278408,[Bahadir Ince],"[Levent Cantek, Ali Demirel, Baris Erdogan]"
983,tt9285882,[Rapman],[Rapman]
984,tt9310372,[Sujoy Ghosh],"[Sujoy Ghosh, Raj Vasant, Pratim D. Gupta, Sur..."


In [8]:
print(director_writers.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 986 entries, 0 to 985
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   tconst         986 non-null    object
 1   director_name  986 non-null    object
 2   writer_name    986 non-null    object
dtypes: object(3)
memory usage: 23.2+ KB
None


In [9]:
print(director_writers.isnull().sum())

tconst           0
director_name    0
writer_name      0
dtype: int64


### Actor Dataset

In [10]:
name_df = pd.read_csv('https://storage.googleapis.com/dqlab-dataset/actor_name.csv')
name_df = name_df[['nconst','primaryName','knownForTitles']]
name_df

Unnamed: 0,nconst,primaryName,knownForTitles
0,nm1774132,Nathan McLaughlin,"tt0417686,tt1713976,tt1891860,tt0454839"
1,nm10683464,Bridge Andrew,tt7718088
2,nm1021485,Brandon Fransvaag,tt0168790
3,nm6940929,Erwin van der Lely,tt4232168
4,nm5764974,Svetlana Shypitsyna,tt3014168
...,...,...,...
995,nm7596674,Paul Whitrow,"tt4118352,tt9104322,tt4447090,tt4892804"
996,nm5938546,Wendy Ponce,tt2125666
997,nm2101810,Ans Brugmans,tt0488280
998,nm5245804,Eliza Jenkins,tt1464058


In [11]:
print(name_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   nconst          1000 non-null   object
 1   primaryName     1000 non-null   object
 2   knownForTitles  1000 non-null   object
dtypes: object(3)
memory usage: 23.6+ KB
None


In [12]:
name_df['knownForTitles'].apply(lambda x: len(x.split(','))).unique()
name_df['knownForTitles'] = name_df['knownForTitles'].apply(lambda x: x.split(','))
name_df

Unnamed: 0,nconst,primaryName,knownForTitles
0,nm1774132,Nathan McLaughlin,"[tt0417686, tt1713976, tt1891860, tt0454839]"
1,nm10683464,Bridge Andrew,[tt7718088]
2,nm1021485,Brandon Fransvaag,[tt0168790]
3,nm6940929,Erwin van der Lely,[tt4232168]
4,nm5764974,Svetlana Shypitsyna,[tt3014168]
...,...,...,...
995,nm7596674,Paul Whitrow,"[tt4118352, tt9104322, tt4447090, tt4892804]"
996,nm5938546,Wendy Ponce,[tt2125666]
997,nm2101810,Ans Brugmans,[tt0488280]
998,nm5245804,Eliza Jenkins,[tt1464058]


In [13]:
print(name_df.isnull().sum())

nconst            0
primaryName       0
knownForTitles    0
dtype: int64


### Joining Datasets

In [14]:
def unnest_dataframe(df, column):
    df_uni = []
    idx = df.index.repeat(df[column].str.len())
    df1 = pd.DataFrame({column: np.concatenate(df[column].values)})
    df1.index = idx
    df_uni.append(df1)
    df_concat = pd.concat(df_uni, axis=1)
    unnested_df = df_concat.join(df.drop([column], axis=1), how='left')
    return unnested_df[df.columns.tolist()]

unnested_df = name_df.explode('knownForTitles')
unnested_df

Unnamed: 0,nconst,primaryName,knownForTitles
0,nm1774132,Nathan McLaughlin,tt0417686
0,nm1774132,Nathan McLaughlin,tt1713976
0,nm1774132,Nathan McLaughlin,tt1891860
0,nm1774132,Nathan McLaughlin,tt0454839
1,nm10683464,Bridge Andrew,tt7718088
...,...,...,...
998,nm5245804,Eliza Jenkins,tt1464058
999,nm0948460,Greg Yolen,tt0436869
999,nm0948460,Greg Yolen,tt0476663
999,nm0948460,Greg Yolen,tt0109723


In [15]:
unnested_drop = unnested_df.drop(['nconst'], axis=1)
df_grouped = unnested_drop.groupby('knownForTitles')['primaryName'].agg(list).reset_index()
df_grouped.columns = ['knownForTitles', 'cast_name']
df_grouped

Unnamed: 0,knownForTitles,cast_name
0,tt0008125,[Charles Harley]
1,tt0009706,[Charles Harley]
2,tt0010304,[Natalie Talmadge]
3,tt0011414,[Natalie Talmadge]
4,tt0011890,[Natalie Talmadge]
...,...,...
1893,tt9610496,[Stefano Baffetti]
1894,tt9714030,[Kevin Kain]
1895,tt9741820,[Caroline Plyler]
1896,tt9759814,[Ethan Francis]


In [16]:
base_df = pd.merge(df_grouped, movie_rating_df, left_on='knownForTitles', right_on='tconst', how='inner')
base_df = pd.merge(base_df, director_writers, left_on='tconst', right_on='tconst', how='left')
base_df

Unnamed: 0,knownForTitles,cast_name,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes,director_name,writer_name
0,tt0011414,[Natalie Talmadge],tt0011414,movie,The Love Expert,The Love Expert,0,1920.0,1920.0,60.0,"Comedy,Romance",4.9,136,[David Kirkland],"[John Emerson, Anita Loos]"
1,tt0011890,[Natalie Talmadge],tt0011890,movie,Yes or No,Yes or No,0,1920.0,1920.0,72.0,Unknown,6.3,7,[Roy William Neill],"[Arthur F. Goodrich, Burns Mantle, Mary Murillo]"
2,tt0014341,[Natalie Talmadge],tt0014341,movie,Our Hospitality,Our Hospitality,0,1923.0,1923.0,65.0,"Comedy,Romance,Thriller",7.8,9621,"[Buster Keaton, John G. Blystone]","[Jean C. Havez, Clyde Bruckman, Joseph A. Mitc..."
3,tt0018054,[Reeka Roberts],tt0018054,movie,The King of Kings,The King of Kings,0,1927.0,1927.0,155.0,"Biography,Drama,History",7.3,1826,[Cecil B. DeMille],[Jeanie Macpherson]
4,tt0024151,[James Hackett],tt0024151,movie,I Cover the Waterfront,I Cover the Waterfront,0,1933.0,1933.0,80.0,"Drama,Romance",6.3,455,[James Cruze],"[Max Miller, Wells Root, Jack Jevne]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1055,tt9246600,[Vanessa Hanson],tt9246600,tvSeries,UFC on ESPN,UFC on ESPN,0,2019.0,2019.0,180.0,Unknown,8.1,38,,
1056,tt9278408,[Utku Arslan],tt9278408,tvMiniSeries,Bozkir,Bozkir,0,2018.0,2019.0,50.0,"Crime,Drama,Mystery",8.2,1231,[Bahadir Ince],"[Levent Cantek, Ali Demirel, Baris Erdogan]"
1057,tt9285882,[Jonathon Deering],tt9285882,movie,Blue Story,Blue Story,0,2019.0,2019.0,91.0,"Crime,Drama",5.5,1411,[Rapman],[Rapman]
1058,tt9310372,[Sandini Dhar],tt9310372,tvSeries,Typewriter,Typewriter,0,2019.0,2019.0,48.0,"Horror,Thriller",6.5,2895,[Sujoy Ghosh],"[Sujoy Ghosh, Raj Vasant, Pratim D. Gupta, Sur..."


In [17]:
base_drop = base_df.drop(['knownForTitles'], axis=1)
print(base_drop.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1060 entries, 0 to 1059
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   cast_name       1060 non-null   object 
 1   tconst          1060 non-null   object 
 2   titleType       1060 non-null   object 
 3   primaryTitle    1060 non-null   object 
 4   originalTitle   1060 non-null   object 
 5   isAdult         1060 non-null   int64  
 6   startYear       1060 non-null   float64
 7   endYear         1060 non-null   float64
 8   runtimeMinutes  1060 non-null   float64
 9   genres          1060 non-null   object 
 10  averageRating   1060 non-null   float64
 11  numVotes        1060 non-null   int64  
 12  director_name   986 non-null    object 
 13  writer_name     986 non-null    object 
dtypes: float64(4), int64(2), object(8)
memory usage: 116.1+ KB
None


In [18]:
base_drop[['director_name','writer_name']] = base_drop[['director_name','writer_name']].fillna('unknown')
base_drop['genres'] = base_drop['genres'].apply(lambda x: x.split(','))

In [19]:
print(base_drop.isnull().sum())

cast_name         0
tconst            0
titleType         0
primaryTitle      0
originalTitle     0
isAdult           0
startYear         0
endYear           0
runtimeMinutes    0
genres            0
averageRating     0
numVotes          0
director_name     0
writer_name       0
dtype: int64


In [20]:
base_drop = base_drop[['primaryTitle','titleType','startYear','runtimeMinutes','genres','averageRating','numVotes','cast_name','director_name','writer_name']]
base_drop.columns = ['title','type','start','duration','genres','rating','votes','cast_name','director_name','writer_name']
base_drop

Unnamed: 0,title,type,start,duration,genres,rating,votes,cast_name,director_name,writer_name
0,The Love Expert,movie,1920.0,60.0,"[Comedy, Romance]",4.9,136,[Natalie Talmadge],[David Kirkland],"[John Emerson, Anita Loos]"
1,Yes or No,movie,1920.0,72.0,[Unknown],6.3,7,[Natalie Talmadge],[Roy William Neill],"[Arthur F. Goodrich, Burns Mantle, Mary Murillo]"
2,Our Hospitality,movie,1923.0,65.0,"[Comedy, Romance, Thriller]",7.8,9621,[Natalie Talmadge],"[Buster Keaton, John G. Blystone]","[Jean C. Havez, Clyde Bruckman, Joseph A. Mitc..."
3,The King of Kings,movie,1927.0,155.0,"[Biography, Drama, History]",7.3,1826,[Reeka Roberts],[Cecil B. DeMille],[Jeanie Macpherson]
4,I Cover the Waterfront,movie,1933.0,80.0,"[Drama, Romance]",6.3,455,[James Hackett],[James Cruze],"[Max Miller, Wells Root, Jack Jevne]"
...,...,...,...,...,...,...,...,...,...,...
1055,UFC on ESPN,tvSeries,2019.0,180.0,[Unknown],8.1,38,[Vanessa Hanson],unknown,unknown
1056,Bozkir,tvMiniSeries,2018.0,50.0,"[Crime, Drama, Mystery]",8.2,1231,[Utku Arslan],[Bahadir Ince],"[Levent Cantek, Ali Demirel, Baris Erdogan]"
1057,Blue Story,movie,2019.0,91.0,"[Crime, Drama]",5.5,1411,[Jonathon Deering],[Rapman],[Rapman]
1058,Typewriter,tvSeries,2019.0,48.0,"[Horror, Thriller]",6.5,2895,[Sandini Dhar],[Sujoy Ghosh],"[Sujoy Ghosh, Raj Vasant, Pratim D. Gupta, Sur..."


## Feature Engineering

In [21]:
feature_df = base_drop[['title','cast_name','genres','director_name','writer_name', 'rating', 'votes']]
feature_df

Unnamed: 0,title,cast_name,genres,director_name,writer_name,rating,votes
0,The Love Expert,[Natalie Talmadge],"[Comedy, Romance]",[David Kirkland],"[John Emerson, Anita Loos]",4.9,136
1,Yes or No,[Natalie Talmadge],[Unknown],[Roy William Neill],"[Arthur F. Goodrich, Burns Mantle, Mary Murillo]",6.3,7
2,Our Hospitality,[Natalie Talmadge],"[Comedy, Romance, Thriller]","[Buster Keaton, John G. Blystone]","[Jean C. Havez, Clyde Bruckman, Joseph A. Mitc...",7.8,9621
3,The King of Kings,[Reeka Roberts],"[Biography, Drama, History]",[Cecil B. DeMille],[Jeanie Macpherson],7.3,1826
4,I Cover the Waterfront,[James Hackett],"[Drama, Romance]",[James Cruze],"[Max Miller, Wells Root, Jack Jevne]",6.3,455
...,...,...,...,...,...,...,...
1055,UFC on ESPN,[Vanessa Hanson],[Unknown],unknown,unknown,8.1,38
1056,Bozkir,[Utku Arslan],"[Crime, Drama, Mystery]",[Bahadir Ince],"[Levent Cantek, Ali Demirel, Baris Erdogan]",8.2,1231
1057,Blue Story,[Jonathon Deering],"[Crime, Drama]",[Rapman],[Rapman],5.5,1411
1058,Typewriter,[Sandini Dhar],"[Horror, Thriller]",[Sujoy Ghosh],"[Sujoy Ghosh, Raj Vasant, Pratim D. Gupta, Sur...",6.5,2895


In [22]:
def sanitize(x):
    try:
        if isinstance(x, list):
            return [i.replace(' ', '').lower() for i in x]
        else:
            return [x.replace(' ', '').lower()]
    except Exception as e:
        print(f"Error saat membersihkan data: {e}, data: {x}")
        return []
feature_df['soup'] = feature_df.apply(lambda x: ' '.join(sanitize(x['cast_name'])) + ' ' + ' '.join(sanitize(x['genres'])) + ' ' + ' '.join(sanitize(x['director_name'])) + ' ' + ' '.join(sanitize(x['writer_name'])), axis=1)
feature_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feature_df['soup'] = feature_df.apply(lambda x: ' '.join(sanitize(x['cast_name'])) + ' ' + ' '.join(sanitize(x['genres'])) + ' ' + ' '.join(sanitize(x['director_name'])) + ' ' + ' '.join(sanitize(x['writer_name'])), axis=1)


Unnamed: 0,title,cast_name,genres,director_name,writer_name,rating,votes,soup
0,The Love Expert,[Natalie Talmadge],"[Comedy, Romance]",[David Kirkland],"[John Emerson, Anita Loos]",4.9,136,natalietalmadge comedy romance davidkirkland j...
1,Yes or No,[Natalie Talmadge],[Unknown],[Roy William Neill],"[Arthur F. Goodrich, Burns Mantle, Mary Murillo]",6.3,7,natalietalmadge unknown roywilliamneill arthur...
2,Our Hospitality,[Natalie Talmadge],"[Comedy, Romance, Thriller]","[Buster Keaton, John G. Blystone]","[Jean C. Havez, Clyde Bruckman, Joseph A. Mitc...",7.8,9621,natalietalmadge comedy romance thriller buster...
3,The King of Kings,[Reeka Roberts],"[Biography, Drama, History]",[Cecil B. DeMille],[Jeanie Macpherson],7.3,1826,reekaroberts biography drama history cecilb.de...
4,I Cover the Waterfront,[James Hackett],"[Drama, Romance]",[James Cruze],"[Max Miller, Wells Root, Jack Jevne]",6.3,455,jameshackett drama romance jamescruze maxmille...
...,...,...,...,...,...,...,...,...
1055,UFC on ESPN,[Vanessa Hanson],[Unknown],unknown,unknown,8.1,38,vanessahanson unknown unknown unknown
1056,Bozkir,[Utku Arslan],"[Crime, Drama, Mystery]",[Bahadir Ince],"[Levent Cantek, Ali Demirel, Baris Erdogan]",8.2,1231,utkuarslan crime drama mystery bahadirince lev...
1057,Blue Story,[Jonathon Deering],"[Crime, Drama]",[Rapman],[Rapman],5.5,1411,jonathondeering crime drama rapman rapman
1058,Typewriter,[Sandini Dhar],"[Horror, Thriller]",[Sujoy Ghosh],"[Sujoy Ghosh, Raj Vasant, Pratim D. Gupta, Sur...",6.5,2895,sandinidhar horror thriller sujoyghosh sujoygh...


## Modeling and Evaluation

In [23]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(feature_df['soup'])

print(count)
print(count_matrix.shape)

CountVectorizer(stop_words='english')
(1060, 10026)


In [24]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)
print(cosine_sim)

[[1.         0.15430335 0.35355339 ... 0.         0.         0.13608276]
 [0.15430335 1.         0.10910895 ... 0.         0.         0.        ]
 [0.35355339 0.10910895 1.         ... 0.         0.08703883 0.09622504]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.         0.         0.08703883 ... 0.         1.         0.10050378]
 [0.13608276 0.         0.09622504 ... 0.         0.10050378 1.        ]]


## Recommendation Function

In [25]:
indices = pd.Series(feature_df.index, index=feature_df['title']).drop_duplicates()

def content_recommender(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return base_df.iloc[movie_indices]

In [26]:
movie_title = input("Masukkan judul film: ")
recommendations = content_recommender(movie_title)
print("\nRekomendasi film yang mirip dengan", movie_title + ":")
recommendations

Masukkan judul film: Typewriter

Rekomendasi film yang mirip dengan Typewriter:


Unnamed: 0,knownForTitles,cast_name,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes,director_name,writer_name
1030,tt8130968,[Sandini Dhar],tt8130968,movie,Badla,Badla,0,2019.0,2019.0,118.0,"Crime,Drama,Mystery",7.9,18452,[Sujoy Ghosh],"[Oriol Paulo, Sujoy Ghosh, Raj Vasant]"
246,tt0124102,[Lois Hicks],tt0124102,movie,Strangeland,Strangeland,0,1998.0,1998.0,85.0,"Horror,Thriller",5.2,6417,[John Pieplow],[Dee Snider]
833,tt2791026,[Santiago López Gómez],tt2791026,movie,City of Dead Men,City of Dead Men,0,2014.0,2014.0,87.0,"Horror,Thriller",3.8,462,[Kirk Sullivan],[Andrew Poston]
171,tt0101981,[Wai Chi Wong],tt0101981,movie,Gui gan bu,Gui gan bu,0,1991.0,1991.0,93.0,"Horror,Thriller",5.5,56,[Yeung-Wah Kam],[Pik Wah Lee]
202,tt0109723,[Greg Yolen],tt0109723,movie,Embrace of the Vampire,Embrace of the Vampire,0,1995.0,1995.0,92.0,"Horror,Thriller",4.4,5714,[Anne Goursaud],"[Halle Eaton, Nicole Coady, Rick Bitzelberger]"
665,tt1523584,[Daniel Boyle],tt1523584,video,Deadly Weekend,Deadly Weekend,0,2014.0,2014.0,82.0,"Horror,Thriller",2.8,562,[Jason Sutton],[Jason Sutton]
942,tt5165620,[Dan Frishwasser],tt5165620,movie,Another Soul,Another Soul,0,2018.0,2018.0,78.0,"Horror,Thriller",2.1,222,[Paul Chau],[Paul Chau]
971,tt6063742,[Rebecca Hickey],tt6063742,tvSeries,Urban Legends,Urban Legends,0,2016.0,2016.0,5.0,"Horror,Thriller",8.8,30,[Luke Mordue],[Luke Mordue]
518,tt0808315,[Michael Steinitz],tt0808315,movie,Dead in 3 Days,In 3 Tagen bist du tot,0,2006.0,2006.0,97.0,"Horror,Thriller",5.5,2352,[Andreas Prochaska],"[Thomas Baum, Andreas Prochaska]"
875,tt3483644,[Krystal Ellison],tt3483644,movie,Slasher,Slasher,0,2014.0,2014.0,93.0,"Horror,Mystery,Thriller",7.3,22,[Jonathan Rowan],[Jonathan Rowan]
