In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel

# Data 불러오기

In [3]:
data = pd.read_csv('./dataset/merged_data.csv')
data.head()

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
0,Animation Comedy Family,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,English,Toy Story,7.7,5415,7.545529,TomHanks TimAllen DonRickles JimVarney Wallace...,JohnLasseter,jealousy toy boy friendship friends rivalry bo...,1,3.888157
1,Adventure Fantasy Family,8844,en,When siblings Judy and Peter discover an encha...,17.015539,English Français,Jumanji,6.9,2413,6.704601,RobinWilliams JonathanHyde KirstenDunst Bradle...,JoeJohnston,board game disappearance based on children's b...,2,3.236953
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.31056,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6,3.841764
3,Adventure Action Thriller,710,en,James Bond must unmask the mysterious head of ...,14.686036,English Pусский Español,GoldenEye,6.6,1194,6.338267,PierceBrosnan SeanBean IzabellaScorupco FamkeJ...,MartinCampbell,cuba falsely accused secret identity computer ...,10,3.431841
4,Drama Crime,524,en,The life of the gambling paradise – Las Vegas ...,10.137389,English,Casino,7.8,1343,7.267135,RobertDeNiro SharonStone JoePesci JamesWoods D...,MartinScorsese,poker drug abuse 1970s overdose illegal prosti...,16,3.795511


In [4]:
data.shape

(2291, 15)

In [5]:
data.isnull().sum()

genres                0
id                    0
original_language     0
overview              0
popularity            0
spoken_languages      1
title                 0
vote_average          0
vote_count            0
score                 0
cast                  1
crew                  0
keywords             25
movieId               0
rating                0
dtype: int64

In [6]:
data = data.fillna('')

# Countervectorizer

## exercise

In [7]:
temp_words = ['The fool doth think he is wise,',
             'but the wise man knows himself to be a fool.']

In [8]:
count_vector = CountVectorizer(ngram_range=(1, 1))

In [9]:
cv = count_vector.fit(temp_words)
cv.vocabulary_

{'the': 9,
 'fool': 3,
 'doth': 2,
 'think': 10,
 'he': 4,
 'is': 6,
 'wise': 12,
 'but': 1,
 'man': 8,
 'knows': 7,
 'himself': 5,
 'to': 11,
 'be': 0}

In [10]:
result = count_vector.transform(temp_words)
result

<2x13 sparse matrix of type '<class 'numpy.int64'>'
	with 16 stored elements in Compressed Sparse Row format>

In [11]:
result.toarray()

array([[0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1],
       [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1]], dtype=int64)

In [12]:
count_vector = CountVectorizer(ngram_range=(1, 2))

In [13]:
cv = count_vector.fit(temp_words)
cv.vocabulary_

{'the': 18,
 'fool': 6,
 'doth': 4,
 'think': 21,
 'he': 8,
 'is': 12,
 'wise': 25,
 'the fool': 19,
 'fool doth': 7,
 'doth think': 5,
 'think he': 22,
 'he is': 9,
 'is wise': 13,
 'but': 2,
 'man': 16,
 'knows': 14,
 'himself': 10,
 'to': 23,
 'be': 0,
 'but the': 3,
 'the wise': 20,
 'wise man': 26,
 'man knows': 17,
 'knows himself': 15,
 'himself to': 11,
 'to be': 24,
 'be fool': 1}

In [14]:
result = count_vector.transform(temp_words)
result

<2x27 sparse matrix of type '<class 'numpy.int64'>'
	with 30 stored elements in Compressed Sparse Row format>

In [15]:
result.toarray()

array([[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
        1, 0, 0, 1, 0],
       [1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0,
        0, 1, 1, 1, 1]], dtype=int64)

In [16]:
result[0, 0]

0

In [17]:
result[0, 6]

1

In [18]:
result.shape

(2, 27)

In [19]:
temp_words = 'The fool doth think he is wise.'

In [20]:
count_vector = CountVectorizer(ngram_range=(1, 2))

In [21]:
# cv = count_vector.fit(temp_words)   # 에러남

## genre

In [22]:
count_vector = CountVectorizer(ngram_range=(1, 3))

In [23]:
cv_genres = count_vector.fit_transform(data['genres'])

In [24]:
cv_genres.shape

(2291, 819)

# Cosine similarity

이렇게 하면 단어를 벡터화 시켜서 저장할 수 있습니다. 

이제 각 영화의 유사도를 측정을 하면됩니다. 유사도를 측정하면 장르가 비슷한 영화가 추천되겠죠?  
이 유사도 측정은 코사인 유사도(cosine similarity)를 사용합니다.

자! 그리고 함수를 하나 만들겁니다. 이 함수의 기능은 아래와 같습니다.  
1. 코사인 유사도를 이용해 장르가 비슷한 영화를 추천
2. vote_count를 이용해서 vote_count가 높은 것을 기반으로 최종 추천


## exercise

In [25]:
b = np.array([[1, 0], [0, 1]])
c = np.array([[0, 2], [1, 0]])
d = np.array([[1, 1], [2, 1]])

In [26]:
cosine_similarity(b, b)

array([[1., 0.],
       [0., 1.]])

In [27]:
cosine_similarity(b, c)

array([[0., 1.],
       [1., 0.]])

In [28]:
cosine_similarity(b, d)

array([[0.70710678, 0.89442719],
       [0.70710678, 0.4472136 ]])

In [29]:
cosine_similarity(d, d)

array([[1.       , 0.9486833],
       [0.9486833, 1.       ]])

In [30]:
b = np.array([[1, 0], [0, 1], [2, 3], [1, 5]])
bb = cosine_similarity(b, b)
bb

array([[1.        , 0.        , 0.5547002 , 0.19611614],
       [0.        , 1.        , 0.83205029, 0.98058068],
       [0.5547002 , 0.83205029, 1.        , 0.9246781 ],
       [0.19611614, 0.98058068, 0.9246781 , 1.        ]])

In [31]:
a = np.array([1.5, 0.2, 4.2, 2.5])
s = a.argsort()

In [32]:
print(s)
print(a[s])

[1 0 3 2]
[0.2 1.5 2.5 4.2]


In [33]:
bb[:, ::2]

array([[1.        , 0.5547002 ],
       [0.        , 0.83205029],
       [0.5547002 , 1.        ],
       [0.19611614, 0.9246781 ]])

In [34]:
bb[:, ::-1]

array([[0.19611614, 0.5547002 , 0.        , 1.        ],
       [0.98058068, 0.83205029, 1.        , 0.        ],
       [0.9246781 , 1.        , 0.83205029, 0.5547002 ],
       [1.        , 0.9246781 , 0.98058068, 0.19611614]])

In [35]:
bb.argsort()

array([[1, 3, 2, 0],
       [0, 2, 3, 1],
       [0, 1, 3, 2],
       [0, 2, 1, 3]], dtype=int64)

In [36]:
bb.argsort()[:, ::-1]

array([[0, 2, 3, 1],
       [1, 3, 2, 0],
       [2, 3, 1, 0],
       [3, 1, 2, 0]], dtype=int64)

In [37]:
bb[0, 0], bb[0, 2], bb[0, 3], bb[0, 1]

(1.0, 0.5547001962252291, 0.19611613513818404, 0.0)

In [38]:
bb[1, 1], bb[1, 3], bb[1, 2], bb[1, 0]

(1.0, 0.9805806756909202, 0.8320502943378437, 0.0)

## genre

In [39]:
#코사인 유사도를 구한 벡터를 미리 저장
genre_cs = cosine_similarity(cv_genres, cv_genres).argsort()[:, ::-1]

In [40]:
genre_cs

array([[   0,  850, 2263, ..., 1012, 1013, 2290],
       [1318,  775,    1, ..., 1114, 1116,  848],
       [   2,  576, 1541, ..., 1008, 1005,    0],
       ...,
       [2288, 2050, 2130, ..., 1008, 1005,    0],
       [2289, 1278, 1039, ..., 1630, 1631,    0],
       [2290, 1451, 2239, ...,  859,  861,    0]], dtype=int64)

In [41]:
genre_cs.shape

(2291, 2291)

In [42]:
cosine_similarity(cv_genres, cv_genres)

array([[1.        , 0.16666667, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.16666667, 1.        , 0.        , ..., 0.        , 0.13608276,
        0.11785113],
       [0.        , 0.        , 1.        , ..., 0.28867513, 0.22222222,
        0.09622504],
       ...,
       [0.        , 0.        , 0.28867513, ..., 1.        , 0.19245009,
        0.08333333],
       [0.        , 0.13608276, 0.22222222, ..., 0.19245009, 1.        ,
        0.19245009],
       [0.        , 0.11785113, 0.09622504, ..., 0.08333333, 0.19245009,
        1.        ]])

In [43]:
cosine_similarity(cv_genres, cv_genres).argsort()

array([[2290, 1013, 1012, ..., 2263,  850,    0],
       [ 848, 1116, 1114, ...,    1,  775, 1318],
       [   0, 1005, 1008, ..., 1541,  576,    2],
       ...,
       [   0, 1005, 1008, ..., 2130, 2050, 2288],
       [   0, 1631, 1630, ..., 1039, 1278, 2289],
       [   0,  861,  859, ..., 2239, 1451, 2290]], dtype=int64)

In [44]:
def get_recommend_movie_list(df, genre_cs, movie_title, top=30):
    # 특정 영화와 비슷한 영화를 추천해야 하기 때문에 '특정 영화' 정보를 뽑아낸다.
    target_movie_index = df[df['title'] == movie_title].index.values
    
    #코사인 유사도 중 비슷한 코사인 유사도를 가진 정보를 뽑아낸다.
    sim_index = genre_cs[target_movie_index, :top].reshape(-1)
    #본인을 제외
    sim_index = sim_index[sim_index != target_movie_index]

    #data frame으로 만들고 vote_count으로 정렬한 뒤 return
    result = df.iloc[sim_index].sort_values('score', ascending=False)[:10]
    return result

In [45]:
data[data['title'] == 'The Dark Knight Rises']

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1541,Action Crime Drama Thriller,49026,en,Following the death of District Attorney Harve...,20.58258,English,The Dark Knight Rises,7.6,9263,7.511302,ChristianBale MichaelCaine GaryOldman AnneHath...,ChristopherNolan,dc comics crime fighter terrorist secret ident...,91529,3.973056


In [46]:
genre_cs[1541, :30]

array([   2,  576, 1541,  988, 1105, 1227, 1822,   63,  611, 2192, 1619,
       1911, 1435, 1651,  238,  451, 2249, 1099,  140,  794,  919, 1985,
       1836,  902, 1032,  108, 1382,  789,  644,  322], dtype=int64)

In [47]:
cv_cs = get_recommend_movie_list(data, genre_cs, movie_title='The Dark Knight Rises')
cv_cs

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
576,Action Crime Drama Thriller,111,en,After getting a green card in exchange for ass...,11.299673,English Español,Scarface,8.0,3017,7.700463,AlPacino StevenBauer MichellePfeiffer MaryEliz...,BrianDePalma,miami corruption capitalism cuba prohibition b...,4262,3.936454
1099,Crime Drama Thriller,6977,en,"Llewelyn Moss stumbles upon dead bodies, $2 mi...",15.565484,English Español,No Country for Old Men,7.7,3083,7.443104,TommyLeeJones JavierBardem JoshBrolin WoodyHar...,JoelCoen EthanCoen,texas drug traffic hitman united states–mexico...,55820,3.997974
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856
1911,Crime Drama Thriller,242582,en,"When Lou Bloom, desperate for work, muscles in...",21.277073,English,Nightcrawler,7.6,3475,7.379969,JakeGyllenhaal ReneRusso RizAhmed BillPaxton K...,DanGilroy,journalism underground tv station sociopath ho...,115569,3.917608
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.31056,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6,3.841764
63,Action Crime Drama Thriller,6075,en,"A Puerto-Rican ex-con, just released from pris...",8.698509,English Español,Carlito's Way,7.7,805,6.970782,AlPacino SeanPenn PenelopeAnnMiller JohnLeguiz...,BrianDePalma,prison 1970s puerto rican release from prison ...,431,3.698139
611,Action Crime Drama Thriller,2034,en,On his first day on the job as a narcotics off...,13.664622,English Pусский Español 한국어/조선말,Training Day,7.3,1665,6.952262,DenzelWashington EthanHawke ScottGlenn TomBere...,AntoineFuqua,police brutality war on drugs drug traffic dru...,4776,3.705817
238,Crime Drama Thriller,9366,en,An FBI undercover agent infilitrates the mob a...,12.201954,日本語 English Italiano,Donnie Brasco,7.4,1175,6.91939,JohnnyDepp AlPacino MichaelMadsen BrunoKirby J...,MikeNewell,undercover colombia mafia mobster dirty cop in...,1466,3.801
789,Crime Drama Thriller,553,da,A barren soundstage is stylishly utilized to c...,9.808595,English,Dogville,7.6,619,6.783189,NicoleKidman HarrietAndersson LaurenBacall Jea...,LarsvonTrier,rape bondage refugee mountain village life bla...,7371,3.850281
322,Crime Drama Thriller,242,en,In the midst of trying to legitimize his busin...,17.185349,English Italiano Deutsch Latin,The Godfather: Part III,7.1,1589,6.782105,AlPacino DianeKeaton AndyGarcía TaliaShire Sof...,FrancisFordCoppola,italy christianity new york assassination ital...,2023,3.445889


이렇게 하면 The Dark Knight Rises와 비슷한 영화가 content based filtering 방법으로 추천이 됩니다.

The Dark Knight Rise 영화의 장르는 Action, Crime, Drama, Thriller 종류입니다. 추천된 종류 역시 이와 같이 비슷한 장르의 특성을 보여주고 있음을 알 수 있습니다.


# TfidfVectorizer

## exercise

In [48]:
temp_words = ['The fool doth think he is wise,',
             'but the wise man knows himself to be a fool.']

In [49]:
tfidf = TfidfVectorizer()

In [50]:
tfidf.fit(temp_words).vocabulary_

{'the': 9,
 'fool': 3,
 'doth': 2,
 'think': 10,
 'he': 4,
 'is': 6,
 'wise': 12,
 'but': 1,
 'man': 8,
 'knows': 7,
 'himself': 5,
 'to': 11,
 'be': 0}

In [51]:
result = tfidf.fit_transform(temp_words)

In [52]:
result.toarray()

array([[0.        , 0.        , 0.42567716, 0.30287281, 0.42567716,
        0.        , 0.42567716, 0.        , 0.        , 0.30287281,
        0.42567716, 0.        , 0.30287281],
       [0.36469323, 0.36469323, 0.        , 0.25948224, 0.        ,
        0.36469323, 0.        , 0.36469323, 0.36469323, 0.25948224,
        0.        , 0.36469323, 0.25948224]])

In [53]:
tfidf = TfidfVectorizer(stop_words='english')

In [54]:
tfidf.fit(temp_words).vocabulary_

{'fool': 1, 'doth': 0, 'think': 4, 'wise': 5, 'man': 3, 'knows': 2}

In [55]:
result = tfidf.fit_transform(temp_words)

In [56]:
result.toarray()

array([[0.57615236, 0.40993715, 0.        , 0.        , 0.57615236,
        0.40993715],
       [0.        , 0.40993715, 0.57615236, 0.57615236, 0.        ,
        0.40993715]])

## genre

In [57]:
tfidf_genres = tfidf.fit_transform(data['genres'])

In [58]:
tfidf_genres

<2291x19 sparse matrix of type '<class 'numpy.float64'>'
	with 6305 stored elements in Compressed Sparse Row format>

In [59]:
tfidf_genres.shape

(2291, 19)

In [60]:
#코사인 유사도를 구한 벡터를 미리 저장
genre_cs = cosine_similarity(tfidf_genres, tfidf_genres).argsort()[:, ::-1]

In [61]:
genre_cs

array([[   0, 2263,  520, ..., 1160, 2005, 2290],
       [1145, 1255, 2026, ..., 1187, 1189,  922],
       [1227, 1822, 1657, ..., 1008, 1005,    0],
       ...,
       [2288, 2162, 2050, ..., 1008, 1005,    0],
       [1953,  807,  910, ...,  791, 1694,    0],
       [2290, 1431,  414, ..., 1863,  947,    0]], dtype=int64)

In [62]:
genre_cs.shape

(2291, 2291)

In [63]:
tf_cs = get_recommend_movie_list(data, genre_cs, movie_title='The Dark Knight Rises')
tf_cs

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
576,Action Crime Drama Thriller,111,en,After getting a green card in exchange for ass...,11.299673,English Español,Scarface,8.0,3017,7.700463,AlPacino StevenBauer MichellePfeiffer MaryEliz...,BrianDePalma,miami corruption capitalism cuba prohibition b...,4262,3.936454
793,Action Crime Thriller,393,en,The Bride unwaveringly continues on her roarin...,21.533072,English 普通话 Español 广州话 / 廣州話,Kill Bill: Vol. 2,7.7,4061,7.498999,UmaThurman DavidCarradine DarylHannah MichaelM...,QuentinTarantino,brother brother relationship swordplay katana ...,7438,3.83155
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.31056,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6,3.841764
1485,Drama Action Thriller Crime,64690,en,A Hollywood stunt performer who moonlights as ...,15.388875,English,Drive,7.4,3832,7.218729,RyanGosling CareyMulligan BryanCranston Albert...,NicolasWindingRefn,stuntman blood splatter independent film polic...,88129,3.81472
63,Action Crime Drama Thriller,6075,en,"A Puerto-Rican ex-con, just released from pris...",8.698509,English Español,Carlito's Way,7.7,805,6.970782,AlPacino SeanPenn PenelopeAnnMiller JohnLeguiz...,BrianDePalma,prison 1970s puerto rican release from prison ...,431,3.698139
611,Action Crime Drama Thriller,2034,en,On his first day on the job as a narcotics off...,13.664622,English Pусский Español 한국어/조선말,Training Day,7.3,1665,6.952262,DenzelWashington EthanHawke ScottGlenn TomBere...,AntoineFuqua,police brutality war on drugs drug traffic dru...,4776,3.705817
796,Action Drama Thriller Crime,9509,en,Jaded ex-CIA operative John Creasy reluctantly...,9.635691,English Español,Man on Fire,7.3,1583,6.938125,DenzelWashington DakotaFanning MarcAnthony Rad...,TonyScott,mexico cia kidnapping diary bible bodyguard st...,7445,3.806181
195,Drama Action Thriller Crime,832,de,"In this classic German thriller, Hans Beckert,...",12.752421,Deutsch,M,8.0,465,6.850165,PeterLorre EllenWidmann IngeLandgut OttoWernic...,FritzLang,germany berlin underworld child murderer detec...,1260,4.163554
1227,Action Crime Drama Thriller,13804,en,"When a crime brings them back to L.A., fugitiv...",1.498902,English,Fast & Furious,6.5,2426,6.366188,VinDiesel PaulWalker MichelleRodriguez Jordana...,JustinLin,gambling brother sister relationship ex-lover ...,67923,3.216334


In [64]:
sorted(cv_cs['id']) == sorted(tf_cs['id'])

False

# linear_kernel

## exercise

In [65]:
b = np.array([[1, 0], [0, 1]])
c = np.array([[0, 2], [1, 0]])
d = np.array([[1, 1], [2, 1]])

In [66]:
linear_kernel(b, b)

array([[1., 0.],
       [0., 1.]])

In [67]:
linear_kernel(b, c)

array([[0., 1.],
       [2., 0.]])

In [68]:
linear_kernel(b, d)

array([[1., 2.],
       [1., 1.]])

## genre

In [69]:
genre_cs = linear_kernel(tfidf_genres, tfidf_genres).argsort()[:, ::-1]

In [70]:
genre_cs

array([[   0, 2263,  520, ..., 1160, 2005, 2290],
       [1145, 1255, 2026, ..., 1187, 1189,  922],
       [1227, 1822, 1657, ..., 1008, 1005,    0],
       ...,
       [2288, 2162, 2050, ..., 1008, 1005,    0],
       [1953,  807,  910, ...,  791, 1694,    0],
       [2290, 1431,  414, ..., 1863,  947,    0]], dtype=int64)

In [71]:
genre_cs.shape

(2291, 2291)

In [72]:
tf_lk = get_recommend_movie_list(data, genre_cs, movie_title='The Dark Knight Rises')
tf_lk

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
576,Action Crime Drama Thriller,111,en,After getting a green card in exchange for ass...,11.299673,English Español,Scarface,8.0,3017,7.700463,AlPacino StevenBauer MichellePfeiffer MaryEliz...,BrianDePalma,miami corruption capitalism cuba prohibition b...,4262,3.936454
793,Action Crime Thriller,393,en,The Bride unwaveringly continues on her roarin...,21.533072,English 普通话 Español 广州话 / 廣州話,Kill Bill: Vol. 2,7.7,4061,7.498999,UmaThurman DavidCarradine DarylHannah MichaelM...,QuentinTarantino,brother brother relationship swordplay katana ...,7438,3.83155
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.31056,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6,3.841764
1485,Drama Action Thriller Crime,64690,en,A Hollywood stunt performer who moonlights as ...,15.388875,English,Drive,7.4,3832,7.218729,RyanGosling CareyMulligan BryanCranston Albert...,NicolasWindingRefn,stuntman blood splatter independent film polic...,88129,3.81472
63,Action Crime Drama Thriller,6075,en,"A Puerto-Rican ex-con, just released from pris...",8.698509,English Español,Carlito's Way,7.7,805,6.970782,AlPacino SeanPenn PenelopeAnnMiller JohnLeguiz...,BrianDePalma,prison 1970s puerto rican release from prison ...,431,3.698139
611,Action Crime Drama Thriller,2034,en,On his first day on the job as a narcotics off...,13.664622,English Pусский Español 한국어/조선말,Training Day,7.3,1665,6.952262,DenzelWashington EthanHawke ScottGlenn TomBere...,AntoineFuqua,police brutality war on drugs drug traffic dru...,4776,3.705817
796,Action Drama Thriller Crime,9509,en,Jaded ex-CIA operative John Creasy reluctantly...,9.635691,English Español,Man on Fire,7.3,1583,6.938125,DenzelWashington DakotaFanning MarcAnthony Rad...,TonyScott,mexico cia kidnapping diary bible bodyguard st...,7445,3.806181
195,Drama Action Thriller Crime,832,de,"In this classic German thriller, Hans Beckert,...",12.752421,Deutsch,M,8.0,465,6.850165,PeterLorre EllenWidmann IngeLandgut OttoWernic...,FritzLang,germany berlin underworld child murderer detec...,1260,4.163554
1227,Action Crime Drama Thriller,13804,en,"When a crime brings them back to L.A., fugitiv...",1.498902,English,Fast & Furious,6.5,2426,6.366188,VinDiesel PaulWalker MichelleRodriguez Jordana...,JustinLin,gambling brother sister relationship ex-lover ...,67923,3.216334


In [73]:
sorted(tf_cs['id']) == sorted(tf_lk['id'])

True

In [74]:
data[data['title'] == 'The Avengers']

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1504,ScienceFiction Action Adventure,24428,en,When an unexpected enemy emerges and threatens...,89.887648,English,The Avengers,7.4,12000,7.337808,RobertDowneyJr. ChrisEvans MarkRuffalo ChrisHe...,JossWhedon,new york shield marvel comic superhero based o...,89745,3.816396


In [75]:
get_recommend_movie_list(data, genre_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
159,Adventure Action ScienceFiction,1891,en,"The epic saga continues as Luke Skywalker, in ...",19.470959,English,The Empire Strikes Back,8.2,5998,8.025793,MarkHamill HarrisonFord CarrieFisher BillyDeeW...,IrvinKershner,rebel android asteroid space battle snow storm...,1196,4.142536
1870,Action ScienceFiction Adventure,118340,en,"Light years from Earth, 26 years after being a...",53.291601,English,Guardians of the Galaxy,7.9,10014,7.805216,ChrisPratt ZoeSaldana DaveBautista VinDiesel B...,JamesGunn,marvel comic spaceship space outer space orpha...,112852,3.927768
171,Adventure Action ScienceFiction,1892,en,As Rebel leaders map their strategy for an all...,14.586087,English,Return of the Jedi,7.9,4763,7.709447,MarkHamill HarrisonFord CarrieFisher BillyDeeW...,RichardMarquand,rebel brother sister relationship emperor spac...,1210,3.989612
1781,Adventure Action ScienceFiction,101299,en,Katniss Everdeen has returned home safe after ...,25.309139,English,The Hunger Games: Catching Fire,7.4,6656,7.290931,JenniferLawrence JoshHutcherson LiamHemsworth ...,FrancisLawrence,competition based on novel mentor secret facto...,106487,3.544752
2237,Action Adventure ScienceFiction,330459,en,A rogue band of resistance fighters unite for ...,36.567575,English,Rogue One: A Star Wars Story,7.4,5111,7.260541,FelicityJones DiegoLuna BenMendelsohn DonnieYe...,GarethEdwards,rebel space battle space travel war prequel sp...,166528,3.877362
1235,ScienceFiction Action Adventure,13475,en,The fate of the galaxy rests in the hands of b...,15.362632,English,Star Trek,7.4,4574,7.245587,ChrisPine ZacharyQuinto LeonardNimoy EricBana ...,J.J.Abrams,spacecraft teleportation space mission parachu...,68358,3.872353
1700,Action Adventure ScienceFiction,54138,en,When the crew of the Enterprise is called back...,15.78129,English,Star Trek Into Darkness,7.4,4479,7.242601,ChrisPine ZacharyQuinto ZoeSaldana KarlUrban S...,J.J.Abrams,spacecraft friendship sequel futuristic space ...,102445,3.765821
1472,Action ScienceFiction Adventure,49538,en,Before Charles Xavier and Erik Lensherr took t...,0.738633,Deutsch English Español Français Pусский,X-Men: First Class,7.1,5252,6.986897,JamesMcAvoy MichaelFassbender JenniferLawrence...,MatthewVaughn,cia mutant mine marvel comic superhero based o...,87232,3.774063
897,ScienceFiction Adventure Action,1895,en,"Years after the onset of the Clone Wars, the n...",13.165421,English,Star Wars: Episode III - Revenge of the Sith,7.1,4200,6.961221,EwanMcGregor NataliePortman HaydenChristensen ...,GeorgeLucas,showdown death star vision cult figure hatred ...,33493,3.414713
432,Action Adventure ScienceFiction,861,en,Construction worker Douglas Quaid discovers a ...,12.954444,English,Total Recall,7.1,1745,6.804864,ArnoldSchwarzenegger SharonStone RachelTicotin...,PaulVerhoeven,oxygen falsely accused resistance mars double ...,2916,3.59154


## overview

In [76]:
tfidf_overview = tfidf.fit_transform(data['overview'])

In [77]:
tfidf_overview.shape

(2291, 13570)

In [78]:
#코사인 유사도를 구한 벡터를 미리 저장
overview_cs = cosine_similarity(tfidf_overview, tfidf_overview).argsort()[:, ::-1]

In [79]:
overview_cs

array([[   0, 1358,  460, ..., 1484, 1485, 1145],
       [   1, 2024, 1636, ..., 1091, 1092,  954],
       [   2, 1388,  930, ..., 1443, 1444,    0],
       ...,
       [2288,  866, 1230, ..., 1433, 1434,    0],
       [2289, 1248, 1478, ..., 1451, 1452,    0],
       [2290, 1412,  250, ..., 1094, 1095,    0]], dtype=int64)

In [80]:
overview_cs.shape

(2291, 2291)

In [81]:
data[data['title'] == 'The Dark Knight Rises']['overview']

1541    Following the death of District Attorney Harve...
Name: overview, dtype: object

In [82]:
get_recommend_movie_list(data, overview_cs, movie_title='The Dark Knight Rises')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856
1856,Fantasy Action Adventure Animation Comedy Family,82702,en,The thrilling second chapter of the epic How T...,12.256689,English,How to Train Your Dragon 2,7.6,3163,7.360884,JayBaruchel GerardButler KristenWiig JonahHill...,DeanDeBlois,father son relationship wife husband relations...,112175,3.740234
1896,War Action,190859,en,U.S. Navy SEAL Chris Kyle takes his sole missi...,19.228561,English,American Sniper,7.4,4600,7.246384,BradleyCooper SiennaMiller KyleGallner ColeKon...,ClintEastwood,sniper biography iraq navy seal u.s. soldier,114662,3.474305
316,Drama ScienceFiction,19,de,In a futuristic city sharply divided between t...,14.487867,No Language,Metropolis,8.0,666,7.060271,BrigitteHelm AlfredAbel GustavFröhlich RudolfK...,FritzLang,man vs machine underground world inventor metr...,2010,3.974882
1282,Drama Crime Thriller,22803,en,A frustrated man decides to take justice into ...,16.639047,English,Law Abiding Citizen,7.2,1522,6.849028,JamieFoxx GerardButler ColmMeaney BruceMcGill ...,F.GaryGray,tattoo secret passage baseball bat deal explos...,71838,3.674976
2252,Action Animation Comedy Family Fantasy,324849,en,In the irreverent spirit of fun that made “The...,17.070748,English,The Lego Batman Movie,7.2,1473,6.84001,WillArnett ZachGalifianakis MichaelCera Rosari...,ChrisMcKay,superhero based on comic based on toy spin off...,167746,3.574866
86,Fantasy Action,268,en,The Dark Knight of Gotham City begins his war ...,19.10673,English Français,Batman,7.0,2145,6.767467,JackNicholson MichaelKeaton KimBasinger Michae...,TimBurton,double life dc comics dual identity chemical c...,592,3.384822
1200,Drama Romance,8055,en,"The story of Michael Berg, a German lawyer who...",7.931732,English Deutsch ελληνικά Latin,The Reader,7.2,835,6.659022,KateWinslet RalphFiennes DavidKross JeanetteHa...,StephenDaldry,germany war crimes trial female prisoner teena...,64622,3.781915
484,Drama Thriller History,820,en,New Orleans District Attorney Jim Garrison dis...,13.813146,English Español,JFK,7.5,513,6.637591,KevinCostner TommyLeeJones GaryOldman KevinBac...,OliverStone,assassination cia homophobia new orleans vietn...,3386,3.651879


In [83]:
data[data['title'] == 'The Avengers']['overview']

1504    When an unexpected enemy emerges and threatens...
Name: overview, dtype: object

In [84]:
get_recommend_movie_list(data, overview_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1944,Crime Comedy Action Adventure,207703,en,The story of a super-secret spy organization t...,28.224212,English,Kingsman: The Secret Service,7.6,6069,7.467738,ColinFirth SamuelL.Jackson MarkStrong TaronEge...,MatthewVaughn,spy great britain secret organization secret a...,119145,3.80529
1958,Action Adventure ScienceFiction,99861,en,When Tony Stark tries to jumpstart a dormant p...,37.37942,English,Avengers: Age of Ultron,7.3,6908,7.200585,RobertDowneyJr. ChrisHemsworth MarkRuffalo Chr...,JossWhedon,marvel comic sequel superhero based on comic v...,122892,3.59498
1707,Thriller Crime,75656,en,An FBI agent and an Interpol detective track a...,17.852022,Français English,Now You See Me,7.3,5635,7.179733,JesseEisenberg MarkRuffalo WoodyHarrelson Méla...,LouisLeterrier,paris bank secret fbi vault magic new orleans ...,102903,3.664363
395,Comedy Crime,100,en,A card sharp and his unwillingly-enlisted frie...,4.60786,English,"Lock, Stock and Two Smoking Barrels",7.5,1671,7.112018,JasonFlemyng DexterFletcher NickMoran JasonSta...,GuyRitchie,ambush alcohol shotgun tea joint machismo cock...,2542,4.01081
2108,Comedy Drama,318846,en,The men who made millions from a global econom...,13.143061,English,The Big Short,7.3,2679,7.065531,ChristianBale SteveCarell RyanGosling BradPitt...,AdamMcKay,bank fraud biography wall street finances base...,148626,4.005573
1387,Drama,37799,en,"On a fall night in 2003, Harvard undergrad and...",16.972995,English,The Social Network,7.1,3492,6.936194,JesseEisenberg AndrewGarfield JustinTimberlake...,DavidFincher,hacker hacking creator frat party social netwo...,80463,3.77374
1852,Action Adventure Thriller,177677,en,Ethan and team take on their most impossible m...,17.836882,Deutsch English svenska,Mission: Impossible - Rogue Nation,7.1,3274,6.926564,TomCruise RebeccaFerguson SimonPegg JeremyRenn...,ChristopherMcQuarrie,london england spy austria villain sequel miss...,111781,3.60946
2016,Action Comedy ScienceFiction Fantasy,251516,en,"During an unfortunate series of events, a frie...",7.741143,English Deutsch svenska,Kung Fury,7.6,762,6.880852,DavidSandberg JormaTaccone LeopoldNilsson Andr...,DavidSandberg,video game martial arts kung fu hacker nazis a...,134170,3.601395
2018,Action Comedy Crime,238713,en,A desk-bound CIA analyst volunteers to go unde...,13.257467,Deutsch English Français Italiano,Spy,6.9,2590,6.716038,MelissaMcCarthy JudeLaw RoseByrne JasonStatham...,PaulFeig,spy cia undercover arms dealer disaster female...,134368,3.567259
234,Crime Horror Mystery,4232,en,A killer known as Ghostface begins killing off...,13.326443,English,Scream,7.0,1520,6.69309,DavidArquette NeveCampbell CourteneyCox Matthe...,WesCraven,halloween gore serial killer slasher tabloid n...,1407,3.269929


## keywords

In [85]:
tfidf_keywords = tfidf.fit_transform(data['keywords'])

In [86]:
tfidf_keywords.shape

(2291, 5649)

In [87]:
#코사인 유사도를 구한 벡터를 미리 저장
keywords_cs = cosine_similarity(tfidf_keywords, tfidf_keywords).argsort()[:, ::-1]

In [88]:
keywords_cs

array([[   0,  295, 1358, ..., 2107, 1343, 2290],
       [   1, 1281, 1675, ..., 1272, 1273, 1011],
       [   2, 2080, 2273, ..., 1429, 1432,    0],
       ...,
       [2288,  581, 1424, ..., 1495, 1496,    0],
       [2289, 1069,  893, ..., 1536, 1537,    0],
       [2290, 1813, 1849, ..., 1385, 1387,    0]], dtype=int64)

In [89]:
keywords_cs.shape

(2291, 2291)

In [90]:
data[data['title'] == 'The Dark Knight Rises']['keywords']

1541    dc comics crime fighter terrorist secret ident...
Name: keywords, dtype: object

In [91]:
get_recommend_movie_list(data, keywords_cs, movie_title='The Dark Knight Rises')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856
354,Fantasy Drama Romance,162,en,A small suburban town receives a visit from a ...,17.612244,English,Edward Scissorhands,7.5,3731,7.303913,JohnnyDepp WinonaRyder DianneWiest AlanArkin A...,TimBurton,underdog love at first sight hairdresser small...,2291,3.722712
845,Action Adventure Animation Family,9806,en,Bob Parr has given up his superhero days to lo...,22.220214,Français English,The Incredibles,7.4,5290,7.264902,CraigT.Nelson HollyHunter SamuelL.Jackson Jaso...,BradBird,secret identity secret hero island wretch supe...,8961,3.857842
2091,Action Adventure Fantasy,297762,en,An Amazon princess comes to the world of Man t...,294.337037,Deutsch English,Wonder Woman,7.2,5025,7.074244,GalGadot ChrisPine RobinWright DannyHuston Dav...,PattyJenkins,dc comics hero greek mythology island world wa...,143355,3.747344
1339,Action Crime,23483,en,Dave Lizewski is an unnoticed high school stud...,17.26045,English,Kick-Ass,7.1,4747,6.975873,AaronTaylor-Johnson ChloëGraceMoretz Christoph...,MatthewVaughn,crime fighter secret identity comic book super...,76251,3.650602
195,Drama Action Thriller Crime,832,de,"In this classic German thriller, Hans Beckert,...",12.752421,Deutsch,M,8.0,465,6.850165,PeterLorre EllenWidmann IngeLandgut OttoWernic...,FritzLang,germany berlin underworld child murderer detec...,1260,4.163554
1160,Action Mystery ScienceFiction,13183,en,In a gritty and alternate 1985 the glory days ...,16.551503,English,Watchmen,7.0,2892,6.819693,MalinÅkerman BillyCrudup MatthewGoode JackieEa...,ZackSnyder,dc comics secret identity mass murder retireme...,60684,3.712461
86,Fantasy Action,268,en,The Dark Knight of Gotham City begins his war ...,19.10673,English Français,Batman,7.0,2145,6.767467,JackNicholson MichaelKeaton KimBasinger Michae...,TimBurton,double life dc comics dual identity chemical c...,592,3.384822
2150,Action Drama Comedy Thriller ScienceFiction,364433,it,Enzo Ceccotti comes into contact with a radioa...,10.649494,Italiano,They Call Me Jeeg Robot,7.5,575,6.690583,ClaudioSantamaria IleniaPastorelli LucaMarinel...,GabrieleMainetti,superhero,155078,3.732143


In [92]:
data[data['title'] == 'The Avengers']['keywords']

1504    new york shield marvel comic superhero based o...
Name: keywords, dtype: object

In [93]:
get_recommend_movie_list(data, keywords_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1870,Action ScienceFiction Adventure,118340,en,"Light years from Earth, 26 years after being a...",53.291601,English,Guardians of the Galaxy,7.9,10014,7.805216,ChrisPratt ZoeSaldana DaveBautista VinDiesel B...,JamesGunn,marvel comic spaceship space outer space orpha...,112852,3.927768
1944,Crime Comedy Action Adventure,207703,en,The story of a super-secret spy organization t...,28.224212,English,Kingsman: The Secret Service,7.6,6069,7.467738,ColinFirth SamuelL.Jackson MarkStrong TaronEge...,MatthewVaughn,spy great britain secret organization secret a...,119145,3.80529
1824,Action Adventure ScienceFiction,100402,en,After the cataclysmic events in New York with ...,18.717704,English,Captain America: The Winter Soldier,7.6,5881,7.4638,ChrisEvans SamuelL.Jackson ScarlettJohansson R...,AnthonyRusso JoeRusso,washington d.c. future shield marvel comic sup...,110102,3.720278
1963,Action Adventure Comedy ScienceFiction,283995,en,The Guardians must fight to keep their newfoun...,185.330992,English,Guardians of the Galaxy Vol. 2,7.6,4858,7.437471,ChrisPratt ZoeSaldana DaveBautista VinDiesel B...,JamesGunn,sequel superhero based on comic misfit space o...,122918,3.890255
1841,Action Adventure Fantasy ScienceFiction,127585,en,The ultimate X-Men ensemble fights a war for t...,26.058586,English,X-Men: Days of Future Past,7.5,6155,7.376051,HughJackman JamesMcAvoy MichaelFassbender Jenn...,BryanSinger,1970s mutant time travel marvel comic based on...,111362,3.775989
1962,Action Adventure Comedy,293660,en,Deadpool tells the origin story of former Spec...,187.860492,English,Deadpool,7.4,11444,7.334896,RyanReynolds MorenaBaccarin EdSkrein T.J.Mille...,TimMiller,anti hero mercenary marvel comic superhero bas...,122904,3.859518
1141,Action ScienceFiction Adventure,1726,en,"After being held captive in an Afghan cave, bi...",22.073099,English فارسی اردو العربية,Iron Man,7.4,8951,7.317602,RobertDowneyJr. TerrenceHoward JeffBridges Sha...,JonFavreau,middle east arms dealer malibu marvel comic su...,59315,3.85521
1958,Action Adventure ScienceFiction,99861,en,When Tony Stark tries to jumpstart a dormant p...,37.37942,English,Avengers: Age of Ultron,7.3,6908,7.200585,RobertDowneyJr. ChrisHemsworth MarkRuffalo Chr...,JossWhedon,marvel comic sequel superhero based on comic v...,122892,3.59498
1964,Adventure Action ScienceFiction,271110,en,"Following the events of Age of Ultron, the col...",145.882135,Română English Deutsch Pусский,Captain America: Civil War,7.1,7462,7.018554,ChrisEvans RobertDowneyJr. ScarlettJohansson S...,AnthonyRusso JoeRusso,civil war war marvel comic sequel superhero ba...,122920,3.724678
1965,Action Adventure Fantasy ScienceFiction,284052,en,"After his career is destroyed, a brilliant but...",43.847654,English,Doctor Strange,7.1,5880,6.998147,BenedictCumberbatch ChiwetelEjiofor RachelMcAd...,ScottDerrickson,magic marvel comic superhero based on comic so...,122922,3.742311


## cast

In [94]:
tfidf_cast = tfidf.fit_transform(data['cast'])

In [95]:
tfidf_cast.shape

(2291, 42484)

In [96]:
#코사인 유사도를 구한 벡터를 미리 저장
cast_cs = cosine_similarity(tfidf_cast, tfidf_cast).argsort()[:, ::-1]

In [97]:
cast_cs

array([[   0,  460, 1358, ..., 1525, 1526, 1145],
       [   1, 1233,  240, ..., 1527, 1528,    0],
       [   2, 2058,  634, ..., 1514, 1516,    0],
       ...,
       [2288, 1365, 1837, ..., 1511, 1512,    0],
       [2289, 1478, 1859, ..., 1453, 1454,    0],
       [2290, 1618, 1554, ..., 1508, 1509,    0]], dtype=int64)

In [98]:
cast_cs.shape

(2291, 2291)

In [99]:
data[data['title'] == 'The Dark Knight Rises']['cast']

1541    ChristianBale MichaelCaine GaryOldman AnneHath...
Name: cast, dtype: object

In [100]:
get_recommend_movie_list(data, cast_cs, movie_title='The Dark Knight Rises')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
1365,Action Thriller ScienceFiction Mystery Adventure,27205,en,"Cobb, a skilled thief who commits corporate es...",29.108149,English,Inception,8.1,14075,8.025763,LeonardoDiCaprio JosephGordon-Levitt EllenPage...,ChristopherNolan,loss of lover dream kidnapping sleep subconsci...,79132,4.161756
1816,Adventure Drama ScienceFiction,157336,en,Interstellar chronicles the adventures of a gr...,32.213481,English,Interstellar,8.1,11187,8.007314,MatthewMcConaughey JessicaChastain AnneHathawa...,ChristopherNolan,saving the world artificial intelligence fathe...,109487,4.086824
571,Mystery Thriller,77,en,Suffering short-term memory loss after a head ...,15.450789,English,Memento,8.1,4168,7.865949,GuyPearce Carrie-AnneMoss JoePantoliano MarkBo...,ChristopherNolan,individual insulin tattoo waitress amnesia mot...,4226,4.157078
1014,Drama Mystery Thriller,1124,en,A mysterious story of two magicians whose inte...,16.94556,English,The Prestige,8.0,4510,7.790918,HughJackman ChristianBale MichaelCaine Scarlet...,ChristopherNolan,competition secret obsession magic dying and d...,48780,4.078295
1787,Crime Drama Comedy,106646,en,A New York stockbroker refuses to cooperate in...,16.382422,Français English,The Wolf of Wall Street,7.9,6768,7.762496,LeonardoDiCaprio JonahHill MargotRobbie KyleCh...,MartinScorsese,corruption sex sexuality bank humor biography ...,106782,3.858644
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856
1504,ScienceFiction Action Adventure,24428,en,When an unexpected enemy emerges and threatens...,89.887648,English,The Avengers,7.4,12000,7.337808,RobertDowneyJr. ChrisEvans MarkRuffalo ChrisHe...,JossWhedon,new york shield marvel comic superhero based o...,89745,3.816396
2288,Action Drama History Thriller War,374720,en,The miraculous evacuation of Allied soldiers f...,30.938854,English Français Deutsch,Dunkirk,7.5,2712,7.2404,FionnWhitehead TomGlynn-Carney JackLowden Harr...,ChristopherNolan,france beach world war ii evacuation german pi...,174055,4.113475
2233,Adventure Animation Family,277834,en,"In Ancient Polynesia, when a terrible curse in...",9.143457,English,Moana,7.3,3471,7.113085,Auli'iCravalho DwayneJohnson RachelHouse Temue...,RonClements JohnMusker,ocean sailboat mythology island musical animat...,166461,3.847451


In [101]:
data[data['title'] == 'The Avengers']['cast']

1504    RobertDowneyJr. ChrisEvans MarkRuffalo ChrisHe...
Name: cast, dtype: object

In [102]:
get_recommend_movie_list(data, cast_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
173,Horror Action Thriller ScienceFiction,348,en,"During its return to the earth, commercial spa...",23.37742,English Español,Alien,7.9,4564,7.70186,TomSkerritt SigourneyWeaver VeronicaCartwright...,RidleyScott,android countdown space marine space suit behe...,1214,4.04735
1541,Action Crime Drama Thriller,49026,en,Following the death of District Attorney Harve...,20.58258,English,The Dark Knight Rises,7.6,9263,7.511302,ChristianBale MichaelCaine GaryOldman AnneHath...,ChristopherNolan,dc comics crime fighter terrorist secret ident...,91529,3.973056
1824,Action Adventure ScienceFiction,100402,en,After the cataclysmic events in New York with ...,18.717704,English,Captain America: The Winter Soldier,7.6,5881,7.4638,ChrisEvans SamuelL.Jackson ScarlettJohansson R...,AnthonyRusso JoeRusso,washington d.c. future shield marvel comic sup...,110102,3.720278
1141,Action ScienceFiction Adventure,1726,en,"After being held captive in an Afghan cave, bi...",22.073099,English فارسی اردو العربية,Iron Man,7.4,8951,7.317602,RobertDowneyJr. TerrenceHoward JeffBridges Sha...,JonFavreau,middle east arms dealer malibu marvel comic su...,59315,3.85521
1958,Action Adventure ScienceFiction,99861,en,When Tony Stark tries to jumpstart a dormant p...,37.37942,English,Avengers: Age of Ultron,7.3,6908,7.200585,RobertDowneyJr. ChrisHemsworth MarkRuffalo Chr...,JossWhedon,marvel comic sequel superhero based on comic v...,122892,3.59498
1964,Adventure Action ScienceFiction,271110,en,"Following the events of Age of Ultron, the col...",145.882135,Română English Deutsch Pусский,Captain America: Civil War,7.1,7462,7.018554,ChrisEvans RobertDowneyJr. ScarlettJohansson S...,AnthonyRusso JoeRusso,civil war war marvel comic sequel superhero ba...,122920,3.724678
1040,Crime Drama Mystery Thriller,1949,en,The true story of the investigation of 'The Zo...,19.083823,English,Zodiac,7.3,2080,7.009665,JakeGyllenhaal RobertDowneyJr. MarkRuffalo Ant...,DavidFincher,california san francisco killing journalist ne...,51540,3.701229
2274,Drama,400928,en,"Frank, a single man raising his child prodigy ...",16.025911,English,Gifted,7.7,549,6.780873,ChrisEvans MckennaGrace LindsayDuncan JennySla...,MarcWebb,cat intellectually gifted child prodigy mathem...,170697,4.0
1697,Action Adventure ScienceFiction,68721,en,When Tony Stark's world is torn apart by a for...,23.721243,English,Iron Man 3,6.8,8951,6.745349,RobertDowneyJr. GwynethPaltrow DonCheadle GuyP...,ShaneBlack,terrorist war on terror tennessee malibu marve...,102125,3.557847
1776,Action Adventure Fantasy,76338,en,Thor fights to restore order across the cosmos...,34.905447,English,Thor: The Dark World,6.8,4873,6.703354,ChrisHemsworth NataliePortman TomHiddleston An...,AlanTaylor,marvel comic superhero based on comic hostile ...,106072,3.378876


## crew

In [103]:
tfidf_crew = tfidf.fit_transform(data['crew'])

In [104]:
tfidf_crew.shape

(2291, 1116)

In [105]:
#코사인 유사도를 구한 벡터를 미리 저장
crew_cs = linear_kernel(tfidf_crew, tfidf_crew).argsort()[:, ::-1]

In [106]:
crew_cs

array([[   0,  364,  460, ..., 1535, 1536, 1145],
       [ 325, 1315, 1486, ..., 1535, 1536,    0],
       [ 450, 1000,    2, ..., 1534, 1535,    0],
       ...,
       [ 571, 1365, 1014, ..., 1533, 1534,    0],
       [1069, 1695,  729, ..., 1533, 1534,    0],
       [2290,  760,  766, ..., 1535, 1536,    0]], dtype=int64)

In [107]:
crew_cs.shape

(2291, 2291)

In [108]:
data[data['title'] == 'The Dark Knight Rises']['crew']

1541    ChristopherNolan
Name: crew, dtype: object

In [109]:
get_recommend_movie_list(data, crew_cs, movie_title='The Dark Knight Rises')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071
1365,Action Thriller ScienceFiction Mystery Adventure,27205,en,"Cobb, a skilled thief who commits corporate es...",29.108149,English,Inception,8.1,14075,8.025763,LeonardoDiCaprio JosephGordon-Levitt EllenPage...,ChristopherNolan,loss of lover dream kidnapping sleep subconsci...,79132,4.161756
1816,Adventure Drama ScienceFiction,157336,en,Interstellar chronicles the adventures of a gr...,32.213481,English,Interstellar,8.1,11187,8.007314,MatthewMcConaughey JessicaChastain AnneHathawa...,ChristopherNolan,saving the world artificial intelligence fathe...,109487,4.086824
770,Adventure Fantasy Action,122,en,Aragorn is revealed as the heir to the ancient...,29.324358,English,The Lord of the Rings: The Return of the King,8.1,8226,7.975623,ElijahWood IanMcKellen ViggoMortensen LivTyler...,PeterJackson,elves orcs middle-earth (tolkien) based on nov...,7153,4.109478
571,Mystery Thriller,77,en,Suffering short-term memory loss after a head ...,15.450789,English,Memento,8.1,4168,7.865949,GuyPearce Carrie-AnneMoss JoePantoliano MarkBo...,ChristopherNolan,individual insulin tattoo waitress amnesia mot...,4226,4.157078
1014,Drama Mystery Thriller,1124,en,A mysterious story of two magicians whose inte...,16.94556,English,The Prestige,8.0,4510,7.790918,HughJackman ChristianBale MichaelCaine Scarlet...,ChristopherNolan,competition secret obsession magic dying and d...,48780,4.078295
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856
769,Adventure Fantasy Drama,587,en,Throughout his life Edward Bloom has always be...,14.739983,English,Big Fish,7.6,2064,7.255684,EwanMcGregor AlbertFinney BillyCrudup JessicaL...,TimBurton,circus father son relationship witch fish fish...,7147,3.808669
2288,Action Drama History Thriller War,374720,en,The miraculous evacuation of Allied soldiers f...,30.938854,English Français Deutsch,Dunkirk,7.5,2712,7.2404,FionnWhitehead TomGlynn-Carney JackLowden Harr...,ChristopherNolan,france beach world war ii evacuation german pi...,174055,4.113475
768,Drama Action War History,616,en,Nathan Algren is an American hired to instruct...,19.582325,English 日本語,The Last Samurai,7.3,1946,6.993319,TomCruise KenWatanabe WilliamAtherton ChadLind...,EdwardZwick,japan war crimes sense of guilt swordplay gene...,7143,3.673578


In [110]:
data[data['title'] == 'The Avengers']['crew']

1504    JossWhedon
Name: crew, dtype: object

In [111]:
get_recommend_movie_list(data, crew_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
769,Adventure Fantasy Drama,587,en,Throughout his life Edward Bloom has always be...,14.739983,English,Big Fish,7.6,2064,7.255684,EwanMcGregor AlbertFinney BillyCrudup JessicaL...,TimBurton,circus father son relationship witch fish fish...,7147,3.808669
1958,Action Adventure ScienceFiction,99861,en,When Tony Stark tries to jumpstart a dormant p...,37.37942,English,Avengers: Age of Ultron,7.3,6908,7.200585,RobertDowneyJr. ChrisHemsworth MarkRuffalo Chr...,JossWhedon,marvel comic sequel superhero based on comic v...,122892,3.59498
777,ScienceFiction Thriller,1954,en,A young man struggles to access sublimated chi...,12.994939,English,The Butterfly Effect,7.3,2119,7.0141,AshtonKutcher MeloraWalters AmySmart EldenHens...,EricBress J.MackyeGruber,amnesia chaos theory blackout time travel flas...,7254,3.707217
768,Drama Action War History,616,en,Nathan Algren is an American hired to instruct...,19.582325,English 日本語,The Last Samurai,7.3,1946,6.993319,TomCruise KenWatanabe WilliamAtherton ChadLind...,EdwardZwick,japan war crimes sense of guilt swordplay gene...,7143,3.673578
767,Adventure Animation Fantasy,81,ja,"After a global war, the seaside kingdom known ...",11.267366,日本語,Nausicaä of the Valley of the Wind,7.7,808,6.972543,SumiShimamoto IchirōNagai GorōNaya YōjiMatsuda...,HayaoMiyazaki,saving the world human vs nature fungus spores...,7099,4.069051
910,ScienceFiction Action Adventure Thriller,16320,en,When the renegade crew of Serenity agrees to h...,11.919995,English 普通话,Serenity,7.4,1287,6.950667,NathanFillion GinaTorres AlanTudyk MorenaBacca...,JossWhedon,martial arts telepathy dystopia spaceship fugi...,34405,3.988671
765,Drama Thriller Adventure,3176,ja,"In the future, the Japanese government capture...",12.701098,日本語,Battle Royale,7.3,992,6.788148,TakeshiKitano TsuyakoKinoshita EriIshikawa Say...,KinjiFukasaku,island television survivor soldier battle deat...,7022,3.794012
756,Comedy Romance Drama,508,en,Follows seemingly unrelated people as their li...,10.586815,English Français Português,Love Actually,7.0,1917,6.744916,KeiraKnightley HeikeMakatsch EmmaThompson Laur...,RichardCurtis,london england male nudity female nudity love ...,6942,3.749628
759,Drama Crime Thriller,470,en,This is the story of three gentle persons: Pau...,6.372278,English,21 Grams,7.2,943,6.701451,SeanPenn NaomiWatts BeniciodelToro DannyHuston...,AlejandroGonzálezIñárritu,life and death transplantation suicide attempt...,6953,3.69259
782,Comedy Drama Romance,338,de,An affectionate and refreshing East/West-Germa...,10.605595,Deutsch,"Good bye, Lenin!",7.4,602,6.65357,DanielBrühl KatrinSass ChulpanKhamatova MariaS...,WolfgangBecker,bureaucracy berlin wall police state coma loss...,7323,3.887241


## all

In [112]:
data['all'] = data['genres'] + ' ' + data['overview'] + ' ' + data['keywords'] + ' ' + data['cast'] + ' ' + data['crew']

In [113]:
tfidf_all = tfidf.fit_transform(data['all'])

In [114]:
tfidf_all.shape

(2291, 58299)

In [115]:
#코사인 유사도를 구한 벡터를 미리 저장
all_cs = linear_kernel(tfidf_all, tfidf_all).argsort()[:, ::-1]

In [116]:
all_cs

array([[   0,  460, 1358, ..., 2051, 2050, 2290],
       [   1, 1910,  943, ...,  907, 2202, 1028],
       [   2,  930, 1707, ...,  949,  952,    0],
       ...,
       [2288,  183,  323, ...,  641, 1843,    0],
       [2289, 1248, 1478, ..., 1374,  512,    0],
       [2290, 1412,   27, ...,  453, 1173,    0]], dtype=int64)

In [117]:
all_cs.shape

(2291, 2291)

In [118]:
data[data['title'] == 'The Dark Knight Rises']['all']

1541    Action Crime Drama Thriller Following the deat...
Name: all, dtype: object

In [119]:
get_recommend_movie_list(data, all_cs, movie_title='The Dark Knight Rises')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating,all
1135,Drama Action Crime Thriller,155,en,Batman raises the stakes in his war on crime. ...,123.167259,English 普通话,The Dark Knight,8.3,12269,8.208376,ChristianBale MichaelCaine HeathLedger AaronEc...,ChristopherNolan,dc comics crime fighter secret identity scarec...,58559,4.182071,Drama Action Crime Thriller Batman raises the ...
1365,Action Thriller ScienceFiction Mystery Adventure,27205,en,"Cobb, a skilled thief who commits corporate es...",29.108149,English,Inception,8.1,14075,8.025763,LeonardoDiCaprio JosephGordon-Levitt EllenPage...,ChristopherNolan,loss of lover dream kidnapping sleep subconsci...,79132,4.161756,Action Thriller ScienceFiction Mystery Adventu...
1816,Adventure Drama ScienceFiction,157336,en,Interstellar chronicles the adventures of a gr...,32.213481,English,Interstellar,8.1,11187,8.007314,MatthewMcConaughey JessicaChastain AnneHathawa...,ChristopherNolan,saving the world artificial intelligence fathe...,109487,4.086824,Adventure Drama ScienceFiction Interstellar ch...
1014,Drama Mystery Thriller,1124,en,A mysterious story of two magicians whose inte...,16.94556,English,The Prestige,8.0,4510,7.790918,HughJackman ChristianBale MichaelCaine Scarlet...,ChristopherNolan,competition secret obsession magic dying and d...,48780,4.078295,Drama Mystery Thriller A mysterious story of t...
1787,Crime Drama Comedy,106646,en,A New York stockbroker refuses to cooperate in...,16.382422,Français English,The Wolf of Wall Street,7.9,6768,7.762496,LeonardoDiCaprio JonahHill MargotRobbie KyleCh...,MartinScorsese,corruption sex sexuality bank humor biography ...,106782,3.858644,Crime Drama Comedy A New York stockbroker refu...
902,Action Crime Drama,272,en,"Driven by tragedy, billionaire Bruce Wayne ded...",28.505341,English اردو 普通话,Batman Begins,7.5,7511,7.397206,ChristianBale MichaelCaine LiamNeeson KatieHol...,ChristopherNolan,himalaya martial arts dc comics crime fighter ...,33794,3.944856,"Action Crime Drama Driven by tragedy, billiona..."
1247,Drama Thriller War,12162,en,Forced to play a dangerous game of cat-and-mou...,9.403724,English العربية Türkçe,The Hurt Locker,7.2,1881,6.903455,JeremyRenner AnthonyMackie BrianGeraghty GuyPe...,KathrynBigelow,sniper explosive loyalty u.s. army iraq car bo...,69481,3.741221,Drama Thriller War Forced to play a dangerous ...
195,Drama Action Thriller Crime,832,de,"In this classic German thriller, Hans Beckert,...",12.752421,Deutsch,M,8.0,465,6.850165,PeterLorre EllenWidmann IngeLandgut OttoWernic...,FritzLang,germany berlin underworld child murderer detec...,1260,4.163554,Drama Action Thriller Crime In this classic Ge...
1282,Drama Crime Thriller,22803,en,A frustrated man decides to take justice into ...,16.639047,English,Law Abiding Citizen,7.2,1522,6.849028,JamieFoxx GerardButler ColmMeaney BruceMcGill ...,F.GaryGray,tattoo secret passage baseball bat deal explos...,71838,3.674976,Drama Crime Thriller A frustrated man decides ...
2252,Action Animation Comedy Family Fantasy,324849,en,In the irreverent spirit of fun that made “The...,17.070748,English,The Lego Batman Movie,7.2,1473,6.84001,WillArnett ZachGalifianakis MichaelCera Rosari...,ChrisMcKay,superhero based on comic based on toy spin off...,167746,3.574866,Action Animation Comedy Family Fantasy In the ...


In [120]:
data[data['title'] == 'The Avengers']['all']

1504    ScienceFiction Action Adventure When an unexpe...
Name: all, dtype: object

In [121]:
get_recommend_movie_list(data, all_cs, movie_title='The Avengers')

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating,all
1870,Action ScienceFiction Adventure,118340,en,"Light years from Earth, 26 years after being a...",53.291601,English,Guardians of the Galaxy,7.9,10014,7.805216,ChrisPratt ZoeSaldana DaveBautista VinDiesel B...,JamesGunn,marvel comic spaceship space outer space orpha...,112852,3.927768,Action ScienceFiction Adventure Light years fr...
1944,Crime Comedy Action Adventure,207703,en,The story of a super-secret spy organization t...,28.224212,English,Kingsman: The Secret Service,7.6,6069,7.467738,ColinFirth SamuelL.Jackson MarkStrong TaronEge...,MatthewVaughn,spy great britain secret organization secret a...,119145,3.80529,Crime Comedy Action Adventure The story of a s...
1824,Action Adventure ScienceFiction,100402,en,After the cataclysmic events in New York with ...,18.717704,English,Captain America: The Winter Soldier,7.6,5881,7.4638,ChrisEvans SamuelL.Jackson ScarlettJohansson R...,AnthonyRusso JoeRusso,washington d.c. future shield marvel comic sup...,110102,3.720278,Action Adventure ScienceFiction After the cata...
1962,Action Adventure Comedy,293660,en,Deadpool tells the origin story of former Spec...,187.860492,English,Deadpool,7.4,11444,7.334896,RyanReynolds MorenaBaccarin EdSkrein T.J.Mille...,TimMiller,anti hero mercenary marvel comic superhero bas...,122904,3.859518,Action Adventure Comedy Deadpool tells the ori...
1141,Action ScienceFiction Adventure,1726,en,"After being held captive in an Afghan cave, bi...",22.073099,English فارسی اردو العربية,Iron Man,7.4,8951,7.317602,RobertDowneyJr. TerrenceHoward JeffBridges Sha...,JonFavreau,middle east arms dealer malibu marvel comic su...,59315,3.85521,Action ScienceFiction Adventure After being he...
1958,Action Adventure ScienceFiction,99861,en,When Tony Stark tries to jumpstart a dormant p...,37.37942,English,Avengers: Age of Ultron,7.3,6908,7.200585,RobertDowneyJr. ChrisHemsworth MarkRuffalo Chr...,JossWhedon,marvel comic sequel superhero based on comic v...,122892,3.59498,Action Adventure ScienceFiction When Tony Star...
1964,Adventure Action ScienceFiction,271110,en,"Following the events of Age of Ultron, the col...",145.882135,Română English Deutsch Pусский,Captain America: Civil War,7.1,7462,7.018554,ChrisEvans RobertDowneyJr. ScarlettJohansson S...,AnthonyRusso JoeRusso,civil war war marvel comic sequel superhero ba...,122920,3.724678,Adventure Action ScienceFiction Following the ...
1965,Action Adventure Fantasy ScienceFiction,284052,en,"After his career is destroyed, a brilliant but...",43.847654,English,Doctor Strange,7.1,5880,6.998147,BenedictCumberbatch ChiwetelEjiofor RachelMcAd...,ScottDerrickson,magic marvel comic superhero based on comic so...,122922,3.742311,Action Adventure Fantasy ScienceFiction After ...
1472,Action ScienceFiction Adventure,49538,en,Before Charles Xavier and Erik Lensherr took t...,0.738633,Deutsch English Español Français Pусский,X-Men: First Class,7.1,5252,6.986897,JamesMcAvoy MichaelFassbender JenniferLawrence...,MatthewVaughn,cia mutant mine marvel comic superhero based o...,87232,3.774063,Action ScienceFiction Adventure Before Charles...
1960,ScienceFiction Action Adventure,102899,en,Armed with the astonishing ability to shrink i...,26.882388,English,Ant-Man,7.0,6029,6.90721,PaulRudd MichaelDouglas EvangelineLilly CoreyS...,PeytonReed,marvel comic superhero based on comic aftercre...,122900,3.600346,ScienceFiction Action Adventure Armed with the...
