# 영화 추천 시스템 프로젝트

#### 라이브러리

In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import urllib.request
import io, os
from google.colab import files


from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from scipy.sparse.linalg import svds

import warnings; warnings.simplefilter('ignore')

# 데이터 정리

## 데이터 불러오기

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/movies_metadata.csv')
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.85949,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.38752,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


## 데이터 정리

In [None]:
df_copy = df.copy()

In [None]:
# 결측치 확인

df_copy.isnull().sum()

adult                        0
belongs_to_collection    40972
budget                       0
genres                       0
homepage                 37684
id                           0
imdb_id                     17
original_language           11
original_title               0
overview                   954
popularity                   5
poster_path                386
production_companies         3
production_countries         3
release_date                87
revenue                      6
runtime                    263
spoken_languages             6
status                      87
tagline                  25054
title                        6
video                        6
vote_average                 6
vote_count                   6
dtype: int64

## 장르 정리

In [None]:
# String -> List & Dictionary 변환
df_copy['genres'] = df_copy['genres'].apply(literal_eval)

# name에 해당하는 값 추출
df_copy['genres'] = df_copy['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [None]:
df_copy['genres']

0         [Animation, Comedy, Family]
1        [Adventure, Fantasy, Family]
2                   [Romance, Comedy]
3            [Comedy, Drama, Romance]
4                            [Comedy]
                     ...             
45461                 [Drama, Family]
45462                         [Drama]
45463       [Action, Drama, Thriller]
45464                              []
45465                              []
Name: genres, Length: 45466, dtype: object

## 연도 분리

In [None]:
# 개봉일에서 연도만 추출

df_copy['year'] = pd.to_datetime(df_copy['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

## 필요한 데이터만 추출

In [None]:
df_copy.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'year'],
      dtype='object')

In [None]:
# md.drop(['adult', 'belongs_to_collection', 'budget', 'homepage', 'id',
#          'imdb_id', 'original_language', 'original_title', 'overview', 'production_companies', 'production_countries',
#          'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'video'], axis=1, inplace=True)

In [None]:
necessary = ['id', 'title', 'year', 'vote_count', 'vote_average', 'popularity', 'genres', 'poster_path']
md = df_copy[necessary]
md.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path
0,862,Toy Story,1995,5415.0,7.7,21.9469,"[Animation, Comedy, Family]",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,8844,Jumanji,1995,2413.0,6.9,17.0155,"[Adventure, Fantasy, Family]",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"[Romance, Comedy]",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,31357,Waiting to Exhale,1995,34.0,6.1,3.85949,"[Comedy, Drama, Romance]",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.38752,[Comedy],/e64sOI48hQXyru7naBFyssKFxVd.jpg


## 결측치 제거

## 투표수 상위 데이터 추출

In [None]:
# 투표수 30개 이상

data = md[md['vote_count'] >= 30]
data

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path
0,862,Toy Story,1995,5415.0,7.7,21.9469,"[Animation, Comedy, Family]",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,8844,Jumanji,1995,2413.0,6.9,17.0155,"[Adventure, Fantasy, Family]",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"[Romance, Comedy]",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,31357,Waiting to Exhale,1995,34.0,6.1,3.85949,"[Comedy, Drama, Romance]",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.38752,[Comedy],/e64sOI48hQXyru7naBFyssKFxVd.jpg
...,...,...,...,...,...,...,...,...
45353,3104,Frankenstein Created Woman,1967,33.0,5.9,2.302582,"[Horror, Science Fiction]",/s95WohDSxZ0WyCQn8fzS92ZN8Xv.jpg
45373,426272,Take Me,2017,38.0,6.0,9.222308,"[Comedy, Crime]",/70kL9vXjbCAYd3wNXYScCBGlkJC.jpg
45380,432789,The Incredible Jessica James,2017,37.0,6.2,5.667067,"[Romance, Comedy]",/r7tDHGsFzHY0YBCaaNctvAxZhpc.jpg
45437,455661,In a Heartbeat,2017,146.0,8.3,20.82178,"[Family, Animation, Romance, Comedy]",/wJUJROdLOtOzMixkjkx1aaZGSLl.jpg


In [None]:
data.isnull().sum()

id              0
title           0
year            0
vote_count      0
vote_average    0
popularity      0
genres          0
poster_path     0
dtype: int64

# 중간저장

In [None]:
# 정리된 파일 중간저장

data.to_csv('drive/My Drive/Colab Notebooks/final/mid.csv')

In [None]:
# 중간저장 불러오기

mid = pd.read_csv('drive/My Drive/Colab Notebooks/final/mid.csv', encoding='utf-8')
mid.head()

Unnamed: 0.1,Unnamed: 0,title,year,vote_count,vote_average,popularity,genres,poster_path
0,0,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,1,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,2,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,3,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,4,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],/e64sOI48hQXyru7naBFyssKFxVd.jpg


In [None]:
# 중간저장을 불러온 경우

data = mid.drop(['Unnamed: 0'], axis=1).copy()
data.head()

Unnamed: 0,title,year,vote_count,vote_average,genres
0,Toy Story,1995,5415.0,7.7,"['Animation', 'Comedy', 'Family']"
1,Jumanji,1995,2413.0,6.9,"['Adventure', 'Fantasy', 'Family']"
2,Grumpier Old Men,1995,92.0,6.5,"['Romance', 'Comedy']"
3,Waiting to Exhale,1995,34.0,6.1,"['Comedy', 'Drama', 'Romance']"
4,Father of the Bride Part II,1995,173.0,5.7,['Comedy']


# 크롤링

## 크롤링 코드

In [None]:
# 크롤링 데이터 입력할 column 추가

data['director'] = ''
data['actor'] = ''
data['poster'] = ''
data['link'] = ''
data['crawling_title'] = ''

data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.9469,"[Animation, Comedy, Family]",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,,,,,
1,8844,Jumanji,1995,2413.0,6.9,17.0155,"[Adventure, Fantasy, Family]",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,,,,,
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"[Romance, Comedy]",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,,,,,
3,31357,Waiting to Exhale,1995,34.0,6.1,3.85949,"[Comedy, Drama, Romance]",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,,,,,
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.38752,[Comedy],/e64sOI48hQXyru7naBFyssKFxVd.jpg,,,,,


In [None]:
# 네이버 API

# client_ID = "inputYourID"
# client_secret = "InputYourSecret"

#헤더 변경
header_parms = {"X-Naver-Client-Id":client_ID, "X-Naver-Client-Secret":client_secret}

In [None]:
# 크롤링 반복문

for i, keyword in enumerate(data['title'].tolist()):

    try:
        # 네이버 api로 영화 데이터 수집
        url = "https://openapi.naver.com/v1/search/movie.json?query={}".format(keyword)
        request = requests.get(url, headers = header_parms)
        mdata = request.json()
        
        director = mdata['items'][0]['director'].split('|')[0]
        actor = mdata['items'][0]['actor'].split('|')[:-1]
        poster = mdata['items'][0]['image']
        link = mdata['items'][0]['link']
        title = mdata['items'][0]['title']
        
        movieinfo_list = [director, actor, poster, link, title]
        data.iloc[i, 8:13] = movieinfo_list

        print("이번 키워드 :", keyword, "| 진행내역 :", i+1, f"/ {data.shape[0]}")

        
    except:
        pass
        print('이번 키워드 :', keyword, 'fail')

In [None]:
data

## 크롤링 결과 저장

In [None]:
# crawling 파일 중간저장

# data.to_csv('drive/My Drive/Colab Notebooks/final/crawling.csv', encoding='utf-8-sig')

# 콘텐츠 기반 추천

## 장르기반

### 데이터 정리

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/crawling.csv')
data.drop(['Unnamed: 0'], axis=1, inplace=True)
data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
movie_data = data.copy()
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
movie_data['genres'] = movie_data['genres'].apply(lambda x : eval(x))
movie_data['genres'] = movie_data['genres'].apply(lambda x : " ".join(x))
movie_data['genres']

0                Animation Comedy Family
1               Adventure Fantasy Family
2                         Romance Comedy
3                   Comedy Drama Romance
4                                 Comedy
                      ...               
12416             Horror Science Fiction
12417                       Comedy Crime
12418                     Romance Comedy
12419    Family Animation Romance Comedy
12420                             Comedy
Name: genres, Length: 12421, dtype: object

### TF-IDF 벡터화

In [None]:
tfidf_vector = TfidfVectorizer()
#tfidf_vector = TfidfVectorizer(ngram_range=(1,2))
tfidf_matrix = tfidf_vector.fit_transform(movie_data['genres']).toarray()
#tfidf_matrix = tfidf_vector.fit_transform(movie_data['genres']).toarray()
tfidf_matrix_feature = tfidf_vector.get_feature_names()

In [None]:
tfidf_matrix.shape

(12421, 22)

In [None]:
tfidf_matrix = pd.DataFrame(tfidf_matrix, columns=tfidf_matrix_feature, index = movie_data.title)
print(tfidf_matrix.shape)
tfidf_matrix.head()

(12421, 22)


Unnamed: 0_level_0,action,adventure,animation,comedy,crime,documentary,drama,family,fantasy,fiction,foreign,history,horror,movie,music,mystery,romance,science,thriller,tv,war,western
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Toy Story,0.0,0.0,0.682821,0.381155,0.0,0.0,0.0,0.623279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jumanji,0.0,0.529179,0.0,0.0,0.0,0.0,0.0,0.591205,0.608644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Grumpier Old Men,0.0,0.0,0.0,0.589473,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.807788,0.0,0.0,0.0,0.0,0.0
Waiting to Exhale,0.0,0.0,0.0,0.5268,0.0,0.0,0.448708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.721902,0.0,0.0,0.0,0.0,0.0
Father of the Bride Part II,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 유사도 구하기

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)

In [None]:
cosine_sim

array([[1.        , 0.36848542, 0.22468061, ..., 0.22468061, 0.8863745 ,
        0.38115484],
       [0.36848542, 1.        , 0.        , ..., 0.        , 0.32661608,
        0.        ],
       [0.22468061, 0.        , 1.        , ..., 1.        , 0.57313185,
        0.58947332],
       ...,
       [0.22468061, 0.        , 1.        , ..., 1.        , 0.57313185,
        0.58947332],
       [0.8863745 , 0.32661608, 0.57313185, ..., 0.57313185, 1.        ,
        0.33784593],
       [0.38115484, 0.        , 0.58947332, ..., 0.58947332, 0.33784593,
        1.        ]])

In [None]:
cosine_sim_df = pd.DataFrame(cosine_sim, index = movie_data.title, columns = movie_data.title)
print(cosine_sim_df.shape)
cosine_sim_df.head()

(12421, 12421)


title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,The American President,Dracula: Dead and Loving It,Balto,Nixon,Cutthroat Island,Casino,Sense and Sensibility,Four Rooms,Ace Ventura: When Nature Calls,Money Train,Get Shorty,Copycat,Assassins,Powder,Leaving Las Vegas,Othello,Now and Then,Persuasion,The City of Lost Children,Dangerous Minds,Twelve Monkeys,Babe,Dead Man Walking,It Takes Two,Clueless,Richard III,Dead Presidents,Restoration,Mortal Kombat,To Die For,...,The Dark Tower,Annie,Inconceivable,Security,Overdrive,Snow White: The Fairest of Them All,Tour de Pharmacy,Dirty Dancing,Banana,The Emoji Movie,Bedeviled,The Saint,Mom or Dad?,Questa notte è ancora nostra,Wind River,Olga,The Nile Hilton Incident,Shot Caller,Brice 3,Wish Upon,Girls Trip,Detroit,Feed,Ducoboo,Sahara,The Double Lover,First Kill,Minions: Orientation Day,Descendants 2,Force Majeure,"Good Guys Go to Heaven, Bad Guys Go to Pattaya",The Olive Tree,With Open Arms,The Visitors: Bastille Day,Titanic 2,Frankenstein Created Woman,Take Me,The Incredible Jessica James,In a Heartbeat,Cadet Kelly
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
Toy Story,1.0,0.368485,0.224681,0.200792,0.381155,0.0,0.224681,0.383473,0.0,0.0,0.200792,0.219186,0.791558,0.0,0.0,0.0,0.0,0.216443,0.166453,0.177265,0.182061,0.0,0.0,0.0,0.0,0.0,0.667635,0.0,0.0,0.0,0.0,0.520672,0.0,0.594321,0.200792,0.0,0.0,0.0,0.0,0.157703,...,0.0,0.4856,0.0,0.0,0.0,0.215627,0.096221,0.0,1.0,1.0,0.0,0.0,0.381155,0.381155,0.0,0.0,0.0,0.0,0.381155,0.0,0.381155,0.0,0.0,0.381155,0.873291,0.0,0.0,1.0,0.27115,0.290164,0.381155,0.290164,0.381155,0.381155,0.0,0.0,0.216443,0.224681,0.886375,0.381155
Jumanji,0.368485,1.0,0.0,0.0,0.0,0.0,0.0,0.655159,0.348257,0.348257,0.0,0.0,0.61466,0.0,0.404608,0.0,0.0,0.0,0.338251,0.0,0.0,0.0,0.291761,0.326457,0.0,0.0,0.460911,0.0,0.569346,0.0,0.0,0.740424,0.0,0.410298,0.0,0.0,0.0,0.0,0.490918,0.423944,...,0.245989,0.33524,0.0,0.0,0.0,0.58517,0.0,0.0,0.368485,0.368485,0.0,0.322568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.413512,0.0,0.0,0.0,0.0,0.57961,0.0,0.0,0.368485,0.337167,0.0,0.0,0.0,0.0,0.0,0.348257,0.0,0.0,0.0,0.326616,0.0
Grumpier Old Men,0.224681,0.0,1.0,0.893678,0.589473,0.0,1.0,0.0,0.0,0.0,0.893678,0.338982,0.0,0.0,0.0,0.0,0.68606,0.334738,0.257428,0.274148,0.281566,0.0,0.0,0.0,0.68606,0.0,0.281036,0.68606,0.0,0.0,0.0,0.219173,0.0,0.719973,0.893678,0.0,0.0,0.68606,0.0,0.243896,...,0.0,0.20441,0.0,0.0,0.0,0.0,0.14881,0.236342,0.224681,0.224681,0.0,0.0,0.589473,0.589473,0.0,0.0,0.0,0.0,0.589473,0.0,0.589473,0.0,0.0,0.589473,0.196212,0.0,0.0,0.224681,0.114139,0.448752,0.589473,0.448752,0.589473,0.589473,0.0,0.0,0.334738,1.0,0.573132,0.589473
Waiting to Exhale,0.200792,0.0,0.893678,1.0,0.5268,0.161065,0.893678,0.143799,0.0,0.0,1.0,0.30294,0.0,0.170315,0.0,0.227326,0.84999,0.299148,0.230057,0.245,0.251629,0.269755,0.0,0.12177,0.84999,0.448708,0.43337,0.84999,0.0,0.227326,0.0,0.337974,0.448708,0.643424,1.0,0.162992,0.129881,0.84999,0.0,0.376097,...,0.0,0.182677,0.269755,0.0,0.0,0.0,0.132988,0.292814,0.200792,0.200792,0.0,0.0,0.5268,0.5268,0.0,0.094785,0.269755,0.188557,0.5268,0.0,0.5268,0.131723,0.448708,0.5268,0.17535,0.0,0.0,0.200792,0.102003,0.691995,0.5268,0.691995,0.5268,0.5268,0.0,0.0,0.299148,0.893678,0.512195,0.5268
Father of the Bride Part II,0.381155,0.0,0.589473,0.5268,1.0,0.0,0.589473,0.0,0.0,0.0,0.5268,0.575058,0.0,0.0,0.0,0.0,0.0,0.56786,0.436708,0.465073,0.477656,0.0,0.0,0.0,0.0,0.0,0.476758,0.0,0.0,0.0,0.0,0.371812,0.0,0.424405,0.5268,0.0,0.0,0.0,0.0,0.413752,...,0.0,0.346767,0.0,0.0,0.0,0.0,0.252445,0.0,0.381155,0.381155,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.332859,0.0,0.0,0.381155,0.193628,0.761277,1.0,0.761277,1.0,1.0,0.0,0.0,0.56786,0.589473,0.337846,1.0


### 장르기반 추천함수

In [None]:
def genre_recommendations(target_title, matrix, items, k=10):
    recom_idx = matrix.loc[:, target_title].values.reshape(1, -1).argsort()[:, ::-1].flatten()[1:k+1]
    recom_title = items.iloc[recom_idx, :].title.values
    recom_genre = items.iloc[recom_idx, :].genres.values

    d = {

        'recom_title' : recom_title,
        'recom_genre' : recom_genre
    }
    
    return pd.DataFrame(d)

In [None]:
genre_recommendations('The Dark Knight Rises', cosine_sim_df, movie_data)

Unnamed: 0,recom_title,recom_genre
0,The Dark Knight Rises,Action Crime Drama Thriller
1,Contraband,Thriller Action Drama Crime
2,Colt 45,Drama Thriller Action Crime
3,Twelve,Thriller Drama Action Crime
4,The Fast and the Furious: Tokyo Drift,Action Crime Drama Thriller
5,Get the Gringo,Action Thriller Crime Drama
6,Crime Story,Thriller Action Crime Drama
7,Payback,Drama Action Thriller Crime
8,Gone in 60 Seconds,Action Crime Drama Thriller
9,French Connection II,Action Crime Drama Thriller


## 감독기반

In [None]:
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,Animation Comedy Family,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,Adventure Fantasy Family,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,Romance Comedy,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,Comedy Drama Romance,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,Comedy,/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
# director 없는 데이터 제거

movie_data.dropna(subset = ['director'], inplace=True)
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,Animation Comedy Family,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,Adventure Fantasy Family,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,Romance Comedy,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,Comedy Drama Romance,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,Comedy,/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
# 감독의 이름에서 공백 삭제

movie_data['director'] = movie_data['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))

### TF-IDF 벡터화

In [None]:
tfidf_vector = TfidfVectorizer()
#tfidf_vector = TfidfVectorizer(ngram_range=(1,2))
tfidf_matrix = tfidf_vector.fit_transform(movie_data['director']).toarray()
#tfidf_matrix = tfidf_vector.fit_transform(movie_data['genres']).toarray()
tfidf_matrix_feature = tfidf_vector.get_feature_names()

In [None]:
tfidf_matrix.shape

(11165, 6797)

In [None]:
tfidf_matrix = pd.DataFrame(tfidf_matrix, columns=tfidf_matrix_feature, index = movie_data.title)
print(tfidf_matrix.shape)
tfidf_matrix.head()

(11165, 6797)


Unnamed: 0_level_0,bj맥도넬,b스톤,j블레이크슨,one9,pj페티트,ss라자몰리,tj오그레이디페이튼,tt더아티스트,가레스캐리빅,가렛에드워즈,가렛에반스,가르시아,가린호반니시안,가버추보,가부키사와코,가브리엘라다소베뉴,가브리엘라마쉬,가브리엘라코우퍼스웨이트,가브리엘레인지,가브리엘립스테인,가브리엘마이네티,가브리엘무치노,가브리엘보머,가브리엘보몽,가브리엘살바토레,가브리엘엑셀,가브리엘이글레시아스,가브리엘파브로,가스제닝스,가스파노에,가스파안티요,가슨카닌,가오숑지에,가오톈,가와세나오미,가와지리요시아키,가우리신드,가이리치,가이퍼랜드,가이픽든,...,후안안토니오바요나,후안카를로스마네글리아,후안카를로스팔콘,후안카를로스프레스나딜로,후안퍼안드레스,후안펠리페글리세일스,후안프라우스토,후안호세캄파넬라,후이신로우,후지나미야스,후지모리아키나,후지와라켄이치,후지이미치히토,후지타토시야,후쿠다유이치,훌리오메뎀,훌리오에르난데스코르동,훼이라이,휴고드포콤프레,휴고스미스,휴고프라세토,휴윌슨,휴존슨,휴즈,휴튼,휴허드슨,흑자,흘렙파푸,히긴스,히다카마사미츠,히데오조조,히라노토시키,히라카와유이치로,히로아키,히로키류이치,히오나히네어,히쳉,히콕스,힌드메뎁,힐데반미에그헴
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
Toy Story,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jumanji,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Grumpier Old Men,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Waiting to Exhale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Father of the Bride Part II,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 유사도 구하기

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)

In [None]:
cosine_sim.shape

(11165, 11165)

In [None]:
cosine_sim_df = pd.DataFrame(cosine_sim, index = movie_data.title, columns = movie_data.title)
print(cosine_sim_df.shape)
cosine_sim_df.head()

(11165, 11165)


title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,The American President,Dracula: Dead and Loving It,Balto,Nixon,Cutthroat Island,Casino,Sense and Sensibility,Four Rooms,Ace Ventura: When Nature Calls,Money Train,Get Shorty,Copycat,Assassins,Powder,Leaving Las Vegas,Othello,Now and Then,Persuasion,The City of Lost Children,Dangerous Minds,Twelve Monkeys,Babe,Dead Man Walking,It Takes Two,Clueless,Richard III,Dead Presidents,Restoration,Mortal Kombat,To Die For,...,2:22,A Bag of Marbles,Sweet Dreams,Batman Beyond: The Movie,What the Health,The Dark Tower,Annie,Inconceivable,Security,Overdrive,Snow White: The Fairest of Them All,Dirty Dancing,Banana,The Emoji Movie,Bedeviled,The Saint,Mom or Dad?,Wind River,Olga,The Nile Hilton Incident,Shot Caller,Brice 3,Wish Upon,Girls Trip,Detroit,Feed,Sahara,The Double Lover,First Kill,Descendants 2,Force Majeure,The Olive Tree,With Open Arms,The Visitors: Bastille Day,Titanic 2,Frankenstein Created Woman,Take Me,The Incredible Jessica James,In a Heartbeat,Cadet Kelly
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
Toy Story,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jumanji,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Grumpier Old Men,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Waiting to Exhale,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Father of the Bride Part II,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 감독기반 추천함수

In [None]:
def director_recommendations(target_title, matrix, items, k=5):
    recom_idx = matrix.loc[:, target_title].values.reshape(1, -1).argsort()[:, ::-1].flatten()[1:k+1]
    recom_title = items.iloc[recom_idx, :].title.values
    recom_director = items.iloc[recom_idx, :].director.values

    d = {

        'recom_title' : recom_title,
        'recom_director' : recom_director
    }
    
    return pd.DataFrame(d)

In [None]:
director_recommendations('The Dark Knight Rises', cosine_sim_df, movie_data)

Unnamed: 0,recom_title,recom_director
0,Batman Begins,크리스토퍼놀란
1,The Prestige,크리스토퍼놀란
2,Doodlebug,크리스토퍼놀란
3,Cadet Kelly,래리쇼우
4,The Pick-up Artist,스테펜웨이너트


## 키워드 기반

In [None]:
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,Animation Comedy Family,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,Adventure Fantasy Family,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,Romance Comedy,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,Comedy Drama Romance,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,Comedy,/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
df_key = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/keywords.csv')
df_key.head()

Unnamed: 0,id,keywords
0,862,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290,..."
1,8844,"[{'id': 10090, 'name': 'board game'}, {'id': 1..."
2,15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392..."
3,31357,"[{'id': 818, 'name': 'based on novel'}, {'id':..."
4,11862,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'n..."


In [None]:
# id으로 기준으로 합치기

movie_data = movie_data.merge(df_key, on='id')

In [None]:
movie_data['keywords'] = movie_data['keywords'].apply(literal_eval)
movie_data['keywords'] = movie_data['keywords'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : " ".join(x))

### TF-IDF 벡터화

In [None]:
tfidf_vector = TfidfVectorizer()
#tfidf_vector = TfidfVectorizer(ngram_range=(1,2))
tfidf_matrix = tfidf_vector.fit_transform(movie_data['keywords']).toarray()
#tfidf_matrix = tfidf_vector.fit_transform(movie_data['genres']).toarray()
tfidf_matrix_feature = tfidf_vector.get_feature_names()

In [None]:
tfidf_matrix.shape

(11308, 9599)

In [None]:
tfidf_matrix = pd.DataFrame(tfidf_matrix, columns=tfidf_matrix_feature, index = movie_data.keywords)
print(tfidf_matrix.shape)
tfidf_matrix.head()

(11308, 9599)


Unnamed: 0_level_0,10,10th,11,1500s,15th,16th,17th,18th,1905,1910s,1917,1920s,1930s,1940s,1950s,1960s,1970s,1980s,1990s,1992,1995,19th,2000,2001,2002,2079,20th,21st,25th,2nd,3d,51,60s,66,68,70s,95,aachen,aaron,abandoned,...,yurt,zagreb,zaire,zar,zaragoza,zealand,zealot,zebra,zeit,zeppelin,zero,zeus,zip,zither,zodiac,zombie,zombification,zone,zoo,zookeeper,zoom,zoophilia,zorro,zulu,zumaia,zurich,øverste,γη,битва,воин,любовь,卧底肥妈,绝地奶霸,超级妈妈,감시자들,변호인,소원,연애,오싹한,하울링
keywords,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
jealousy toy boy friendship friends rivalry boy next door new toy toy comes to life,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
board game disappearance based on children's book new home recluse giant insect,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fishing best friend duringcreditsstinger old men,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
based on novel interracial relationship single mother divorce chick flick,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
baby midlife crisis confidence aging daughter mother daughter relationship pregnancy contraception gynecologist,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 유사도 구하기

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)

In [None]:
cosine_sim.shape

(11308, 11308)

In [None]:
cosine_sim_df = pd.DataFrame(cosine_sim, index = movie_data.title, columns = movie_data.title)
print(cosine_sim_df.shape)
cosine_sim_df.head()

(11308, 11308)


title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,The American President,Dracula: Dead and Loving It,Balto,Nixon,Cutthroat Island,Casino,Sense and Sensibility,Four Rooms,Ace Ventura: When Nature Calls,Money Train,Get Shorty,Copycat,Assassins,Powder,Leaving Las Vegas,Othello,Now and Then,Persuasion,The City of Lost Children,Dangerous Minds,Twelve Monkeys,Babe,Dead Man Walking,It Takes Two,Clueless,Richard III,Dead Presidents,Restoration,Mortal Kombat,To Die For,...,One Hundred Steps,2:22,A Bag of Marbles,Sweet Dreams,Batman Beyond: The Movie,What the Health,The Dark Tower,Annie,Inconceivable,Security,Overdrive,Snow White: The Fairest of Them All,Dirty Dancing,Banana,The Emoji Movie,Bedeviled,The Saint,Mom or Dad?,Wind River,Olga,The Nile Hilton Incident,Shot Caller,Brice 3,Wish Upon,Girls Trip,Detroit,Feed,Sahara,The Double Lover,First Kill,Descendants 2,The Olive Tree,With Open Arms,The Visitors: Bastille Day,Titanic 2,Frankenstein Created Woman,Take Me,The Incredible Jessica James,In a Heartbeat,Cadet Kelly
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
Toy Story,1.0,0.026736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029375,0.0,0.0,0.0,0.0,0.0,0.023373,0.021969,0.0,0.049345,0.0,0.0,0.03795,0.07273,0.0,0.147185,0.032888,0.0,0.037588,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054366,0.033974,0.02698,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jumanji,0.026736,1.0,0.0,0.059444,0.0,0.0,0.0,0.0,0.0,0.0,0.04085,0.0,0.0,0.0,0.0,0.0,0.038432,0.030551,0.0,0.068622,0.044773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.079209,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.104524,0.03752,...,0.0,0.0,0.0,0.0,0.0,0.028279,0.11644,0.093171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Grumpier Old Men,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.131119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Waiting to Exhale,0.0,0.059444,0.0,1.0,0.077943,0.0,0.031984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.073012,0.0,0.043788,0.028572,0.08506,0.0,0.0,0.016167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051043,0.0,...,0.0,0.0,0.0,0.0,0.0,0.031118,0.221213,0.102523,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Father of the Bride Part II,0.0,0.0,0.0,0.077943,1.0,0.0,0.026187,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035852,0.023394,0.0,0.0,0.0,0.013236,0.0,0.0,0.142267,0.0,0.109383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 키워드 기반 추천 함수

In [None]:
def keywords_recommendations(target_title, matrix, items, k=10):
    recom_idx = matrix.loc[:, target_title].values.reshape(1, -1).argsort()[:, ::-1].flatten()[1:k+1]
    recom_title = items.iloc[recom_idx, :].title.values
    recom_keywords = items.iloc[recom_idx, :].keywords.values
 
    d = {

        'recom_title' : recom_title,
        'recom_keywords' : recom_keywords
    }
    return pd.DataFrame(d)

In [None]:
keywords_recommendations('Toy Story', cosine_sim_df, movie_data)

Unnamed: 0,recom_title,recom_keywords
0,Toy Story That Time Forgot,toy short toy story
1,Small Soldiers,defense industry toy shop technical toy soldie...
2,Barbie and the Three Musketeers,based on toy
3,Toy Story 3,hostage college toy barbie animation escape da...
4,Dolls,toy gore storm doll toy maker
5,Child's Play,gun birthday voodoo toy stalker murder blood s...
6,Child's Play 2,faithlessness puppet killer toys toy comes to ...
7,Toys,brother brother relationship loss of brother l...
8,The Indian in the Cupboard,cupboard games puppet parallel world toy comes...
9,The Transformers: The Movie,toy transformation based on toy transformers r...


## 배우 기반

In [None]:
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title,keywords
0,862,Toy Story,1995,5415.0,7.7,21.946943,Animation Comedy Family,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4,jealousy toy boy friendship friends rivalry bo...
1,8844,Jumanji,1995,2413.0,6.9,17.015539,Adventure Fantasy Family,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨,board game disappearance based on children's b...
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,Romance Comedy,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨,fishing best friend duringcreditsstinger old men
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,Comedy Drama Romance,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며,based on novel interracial relationship single...
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,Comedy,/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2,baby midlife crisis confidence aging daughter ...


In [None]:
movie_data['actor'] = movie_data['actor'].apply(lambda x : eval(x))
movie_data['actor'] = movie_data['actor'].apply(lambda x : " ".join(x))
movie_data['actor'].head()

0     톰 행크스 애니 파츠 토니 헤일 팀 알렌
1    카렌 길런 드웨인 존슨 케빈 하트 잭 블랙
2           월터 매튜 잭 레먼 앤 마그렛
3     휘트니 휴스턴 안젤라 바셋 로레타 드바인
4         스티브 마틴 다이안 키튼 마틴 숏
Name: actor, dtype: object

#### TF-IDF 벡터화

In [None]:
tfidf_vector = TfidfVectorizer()
#tfidf_vector = TfidfVectorizer(ngram_range=(1,2))
tfidf_matrix = tfidf_vector.fit_transform(movie_data['actor']).toarray()
#tfidf_matrix = tfidf_vector.fit_transform(movie_data['genres']).toarray()
tfidf_matrix_feature = tfidf_vector.get_feature_names()

In [None]:
tfidf_matrix.shape

(11308, 11832)

In [None]:
tfidf_matrix = pd.DataFrame(tfidf_matrix, columns=tfidf_matrix_feature, index = movie_data.actor)
print(tfidf_matrix.shape)
tfidf_matrix.head()

(11308, 11832)


Unnamed: 0_level_0,2세,3000,357,3세,50,aj,baker,cch,cj,dj,dragon,jb,jj,john,jr,ll,pj,rj,rm,st,가가,가가린,가나단,가너,가네시,가뇽,가데니아,가데부아,가돈,가돗,가드너,가디스,가랜트,가레이,가렐,가렛,가로팔로,가로포로,가르보,가르시아,...,히뷰,히비쿠,히비키,히사시,히사코,히스,히시미,히암,히어로님코,히에우,히요리,히치콕,히카리,히키,히킨,히토미,히튼,히틀러,히팅거,힉스,힌넨,힌들,힌리히스,힌쇼,힌우드,힌즈,힐근브링크,힐드,힐드레스,힐랜드,힐러리,힐리,힐리고스,힐리아드,힐마,힐미르,힐즈,힐튼,힐티,힝글
actor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
톰 행크스 애니 파츠 토니 헤일 팀 알렌,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
카렌 길런 드웨인 존슨 케빈 하트 잭 블랙,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
월터 매튜 잭 레먼 앤 마그렛,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
휘트니 휴스턴 안젤라 바셋 로레타 드바인,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
스티브 마틴 다이안 키튼 마틴 숏,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 유사도 구하기

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)

In [None]:
cosine_sim_df = pd.DataFrame(cosine_sim, index = movie_data.title, columns = movie_data.title)
print(cosine_sim_df.shape)
cosine_sim_df.head()

(11308, 11308)


title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,The American President,Dracula: Dead and Loving It,Balto,Nixon,Cutthroat Island,Casino,Sense and Sensibility,Four Rooms,Ace Ventura: When Nature Calls,Money Train,Get Shorty,Copycat,Assassins,Powder,Leaving Las Vegas,Othello,Now and Then,Persuasion,The City of Lost Children,Dangerous Minds,Twelve Monkeys,Babe,Dead Man Walking,It Takes Two,Clueless,Richard III,Dead Presidents,Restoration,Mortal Kombat,To Die For,...,One Hundred Steps,2:22,A Bag of Marbles,Sweet Dreams,Batman Beyond: The Movie,What the Health,The Dark Tower,Annie,Inconceivable,Security,Overdrive,Snow White: The Fairest of Them All,Dirty Dancing,Banana,The Emoji Movie,Bedeviled,The Saint,Mom or Dad?,Wind River,Olga,The Nile Hilton Incident,Shot Caller,Brice 3,Wish Upon,Girls Trip,Detroit,Feed,Sahara,The Double Lover,First Kill,Descendants 2,The Olive Tree,With Open Arms,The Visitors: Bastille Day,Titanic 2,Frankenstein Created Woman,Take Me,The Incredible Jessica James,In a Heartbeat,Cadet Kelly
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
Toy Story,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jumanji,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.104951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Grumpier Old Men,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125512,0.0,0.12422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.141324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Waiting to Exhale,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Father of the Bride Part II,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 배우 기반 추천함수

In [None]:
def actor_recommendations(target_title, matrix, items, k=10):
    recom_idx = matrix.loc[:, target_title].values.reshape(1, -1).argsort()[:, ::-1].flatten()[1:k+1]
    recom_title = items.iloc[recom_idx, :].title.values
    recom_actor = items.iloc[recom_idx, :].actor.values

    d = {

        'recom_title' : recom_title,
        'recom_actor' : recom_actor
    }
    
    return pd.DataFrame(d)

In [None]:
actor_recommendations('The Dark Knight Rises', cosine_sim_df, movie_data)

Unnamed: 0,recom_title,recom_actor
0,"Guns, Girls and Gambling",게리 올드만 크리스찬 슬레이터
1,Immortal Beloved,게리 올드만
2,Hesher,나탈리 포트만 조셉 고든 레빗
3,Vice,크리스찬 베일
4,Lean On Me,모건 프리먼
5,Kiss the Girls,모건 프리먼
6,(500) Days of Summer,조셉 고든 레빗 주이 디샤넬
7,Snowden,조셉 고든 레빗 쉐일린 우들리
8,The Prestige,휴 잭맨 크리스찬 베일 마이클 케인 스칼릿 조핸슨
9,Going in Style,모건 프리먼 조이 킹 마이클 케인 알란 아킨


# 협업 필터링

## 데이터 정리

### 영화 데이터

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/crawling.csv')
data.drop(['Unnamed: 0'], axis=1, inplace=True)
data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
movie_data = data.copy()
movie_data.head()

Unnamed: 0,id,title,year,vote_count,vote_average,popularity,genres,poster_path,director,actor,poster,link,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",https://ssl.pstatic.net/imgmovie/mdi/mit110/10...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",https://ssl.pstatic.net/imgmovie/mdi/mit110/18...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],/e64sOI48hQXyru7naBFyssKFxVd.jpg,찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",https://ssl.pstatic.net/imgmovie/mdi/mit110/01...,https://movie.naver.com/movie/bi/mi/basic.nhn?...,신부의 아버지 2


In [None]:
movie_data.rename(columns = {'id': 'movieId'}, inplace = True)
movie_data.drop(['poster_path', 'poster', 'link'], axis=1, inplace=True)
movie_data.head()

Unnamed: 0,movieId,title,year,vote_count,vote_average,popularity,genres,director,actor,crawling_title
0,862,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4
1,8844,Jumanji,1995,2413.0,6.9,17.015539,"['Adventure', 'Fantasy', 'Family']",제이크 캐스단,"['카렌 길런', '드웨인 존슨', '케빈 하트', '잭 블랙']",쥬만지: 넥스트 레벨
2,15602,Grumpier Old Men,1995,92.0,6.5,11.7129,"['Romance', 'Comedy']",하워드 도이치,"['월터 매튜', '잭 레먼', '앤 마그렛']",그럼피어 올드 맨
3,31357,Waiting to Exhale,1995,34.0,6.1,3.859495,"['Comedy', 'Drama', 'Romance']",포레스트 휘태커,"['휘트니 휴스턴', '안젤라 바셋', '로레타 드바인']",사랑을 기다리며
4,11862,Father of the Bride Part II,1995,173.0,5.7,8.387519,['Comedy'],찰스 샤이어,"['스티브 마틴', '다이안 키튼', '마틴 숏']",신부의 아버지 2


In [None]:
movie_data.isnull().sum()

movieId              0
title                0
year                 0
vote_count           0
vote_average         0
popularity           0
genres               0
director          1256
actor             1123
crawling_title    1123
dtype: int64

In [None]:
movie_data.dtypes

movieId             int64
title              object
year               object
vote_count        float64
vote_average      float64
popularity        float64
genres             object
director           object
actor              object
crawling_title     object
dtype: object

In [None]:
# 숫자 칼럼 type 변경

movie_data.movieId = pd.to_numeric(movie_data.movieId, errors='coerce')
movie_data.popularity = pd.to_numeric(movie_data.popularity, errors='coerce')

In [None]:
movie_data.dtypes

movieId             int64
title              object
year               object
vote_count        float64
vote_average      float64
popularity        float64
genres             object
director           object
actor              object
crawling_title     object
dtype: object

### 평가 데이터

In [None]:
rating_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/rating_mid.csv')
rating_data.head()

Unnamed: 0.1,Unnamed: 0,movieId,rating,userId
0,184624,862,3.0,1923
1,200490,862,5.0,2103
2,524001,862,1.0,5380
3,610887,862,4.0,6177
4,643892,862,4.0,6525


In [None]:
rating_data.drop('Unnamed: 0', axis = 1, inplace=True)
rating_data.head()

Unnamed: 0,movieId,rating,userId
0,862,3.0,1923
1,862,5.0,2103
2,862,1.0,5380
3,862,4.0,6177
4,862,4.0,6525


In [None]:
rating_data.dtypes

movieId      int64
rating     float64
userId       int64
dtype: object

### movieId로 정렬

In [None]:
user_movie_ratings = pd.merge(rating_data, movie_data, on = 'movieId')
user_movie_ratings.head()

Unnamed: 0,movieId,rating,userId,title,year,vote_count,vote_average,popularity,genres,director,actor,crawling_title
0,862,3.0,1923,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4
1,862,5.0,2103,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4
2,862,1.0,5380,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4
3,862,4.0,6177,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4
4,862,4.0,6525,Toy Story,1995,5415.0,7.7,21.946943,"['Animation', 'Comedy', 'Family']",조시 쿨리,"['톰 행크스', '애니 파츠', '토니 헤일', '팀 알렌']",토이 스토리 4


### 피벗 테이블

In [None]:
movie_user_rating = user_movie_ratings.pivot_table('rating', index = 'title', columns='userId')
user_movie_rating = user_movie_ratings.pivot_table('rating', index = 'userId', columns='title')

In [None]:
# 영화별 이용자 평가

movie_user_rating.head()

userId,1,2,3,4,5,6,8,11,12,15,16,20,21,22,23,24,27,28,29,30,31,32,33,34,35,36,37,40,41,43,46,47,48,49,50,52,53,54,55,56,...,270842,270844,270846,270847,270848,270849,270850,270851,270852,270854,270855,270857,270859,270860,270861,270863,270866,270867,270868,270869,270870,270871,270872,270874,270875,270877,270878,270879,270881,270882,270883,270884,270885,270887,270891,270892,270893,270894,270895,270896
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
"20,000 Leagues Under the Sea",,,,,,,,,,,,,,,,,,,,,,,,2.5,,,,,,,2.0,,,4.0,,,,,,,...,,,,,,,,,,,,,,3.0,,,,,,,,2.5,,,,,,,,,,,,,,,,,,
2001: A Space Odyssey,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,4.0,,,,,,,,,3.0,,,,...,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,5.0,,,,,
A Christmas Carol,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
A Close Shave,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,
A Fish Called Wanda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
# 이용자별 영화 평가

user_movie_rating.head()

title,"20,000 Leagues Under the Sea",2001: A Space Odyssey,A Christmas Carol,A Close Shave,A Fish Called Wanda,A Streetcar Named Desire,A Time to Kill,Ace Ventura: Pet Detective,Addams Family Values,Aladdin,All About Eve,An American in Paris,Apollo 13,Around the World in Eighty Days,Barbarella,Basic Instinct,Basquiat,Batman,Batman Forever,Before Sunrise,Belle de Jour,Beverly Hills Cop III,Blade Runner,Blink,Blown Away,Body Snatchers,Bogus,Bonnie and Clyde,Braveheart,Breakfast at Tiffany's,Bringing Up Baby,Carlito's Way,Casablanca,Casino,Casper,Cat on a Hot Tin Roof,Charade,Children of the Corn IV: The Gathering,Citizen Kane,Clerks,...,The People vs. Larry Flynt,The Philadelphia Story,The Piano,The Remains of the Day,The River Wild,The Shadow,The Shawshank Redemption,The Silence of the Lambs,The Specialist,The Thin Man,The Tin Drum,The Truth About Cats & Dogs,The Umbrellas of Cherbourg,The Usual Suspects,The Wild Bunch,The Wizard of Oz,The World of Apu,The Wrong Trousers,Things to Do in Denver When You're Dead,Three Colors: Blue,Three Colors: Red,Three Colors: White,Tie Me Up! Tie Me Down!,Timecop,To Be or Not to Be,To Catch a Thief,To Die For,Top Gun,Top Hat,Toy Story,Trainspotting,True Romance,Twelve Monkeys,Twister,Under Siege 2: Dark Territory,Vertigo,Weekend at Bernie's,What's Eating Gilbert Grape,While You Were Sleeping,Willy Wonka & the Chocolate Factory
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## 필터링

### 영화별 유사도

In [None]:
# 결측치 0으로 치환

movie_user_rating.fillna(0, inplace = True)
movie_user_rating.head()

userId,1,2,3,4,5,6,8,11,12,15,16,20,21,22,23,24,27,28,29,30,31,32,33,34,35,36,37,40,41,43,46,47,48,49,50,52,53,54,55,56,...,270842,270844,270846,270847,270848,270849,270850,270851,270852,270854,270855,270857,270859,270860,270861,270863,270866,270867,270868,270869,270870,270871,270872,270874,270875,270877,270878,270879,270881,270882,270883,270884,270885,270887,270891,270892,270893,270894,270895,270896
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
"20,000 Leagues Under the Sea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
A Christmas Carol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Close Shave,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
A Fish Called Wanda,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 영화별 평점 유사도

In [None]:
# 영화별 평점 유사도 측정 ->코사인 유사도

item_based_collabor = cosine_similarity(movie_user_rating)
item_based_collabor

array([[1.        , 0.162587  , 0.01875008, ..., 0.229943  , 0.07615058,
        0.25240447],
       [0.162587  , 1.        , 0.02606502, ..., 0.08851892, 0.09805459,
        0.25063639],
       [0.01875008, 0.02606502, 1.        , ..., 0.02793086, 0.04198976,
        0.0297984 ],
       ...,
       [0.229943  , 0.08851892, 0.02793086, ..., 1.        , 0.14549759,
        0.0880902 ],
       [0.07615058, 0.09805459, 0.04198976, ..., 0.14549759, 1.        ,
        0.0808977 ],
       [0.25240447, 0.25063639, 0.0297984 , ..., 0.0880902 , 0.0808977 ,
        1.        ]])

In [None]:
print(movie_user_rating.shape)
print(item_based_collabor.shape)

(255, 198321)
(255, 255)


#### 영화 간 평점 유사도

In [None]:
# 영화 간 평점 유사도

item_based_collabor = pd.DataFrame(data = item_based_collabor, index = movie_user_rating.index, columns = movie_user_rating.index)
item_based_collabor.head()

title,"20,000 Leagues Under the Sea",2001: A Space Odyssey,A Christmas Carol,A Close Shave,A Fish Called Wanda,A Streetcar Named Desire,A Time to Kill,Ace Ventura: Pet Detective,Addams Family Values,Aladdin,All About Eve,An American in Paris,Apollo 13,Around the World in Eighty Days,Barbarella,Basic Instinct,Basquiat,Batman,Batman Forever,Before Sunrise,Belle de Jour,Beverly Hills Cop III,Blade Runner,Blink,Blown Away,Body Snatchers,Bogus,Bonnie and Clyde,Braveheart,Breakfast at Tiffany's,Bringing Up Baby,Carlito's Way,Casablanca,Casino,Casper,Cat on a Hot Tin Roof,Charade,Children of the Corn IV: The Gathering,Citizen Kane,Clerks,...,The People vs. Larry Flynt,The Philadelphia Story,The Piano,The Remains of the Day,The River Wild,The Shadow,The Shawshank Redemption,The Silence of the Lambs,The Specialist,The Thin Man,The Tin Drum,The Truth About Cats & Dogs,The Umbrellas of Cherbourg,The Usual Suspects,The Wild Bunch,The Wizard of Oz,The World of Apu,The Wrong Trousers,Things to Do in Denver When You're Dead,Three Colors: Blue,Three Colors: Red,Three Colors: White,Tie Me Up! Tie Me Down!,Timecop,To Be or Not to Be,To Catch a Thief,To Die For,Top Gun,Top Hat,Toy Story,Trainspotting,True Romance,Twelve Monkeys,Twister,Under Siege 2: Dark Territory,Vertigo,Weekend at Bernie's,What's Eating Gilbert Grape,While You Were Sleeping,Willy Wonka & the Chocolate Factory
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
"20,000 Leagues Under the Sea",1.0,0.162587,0.01875,0.169667,0.012977,0.019534,0.212435,0.02476,0.011077,0.006181,0.02985,0.050182,0.014153,0.011438,0.0173,0.012817,0.021655,0.061308,0.068473,0.170825,0.011325,0.082775,0.056808,0.059108,0.044518,0.015359,0.033448,0.135609,0.027904,0.131471,0.048713,0.014637,0.120404,0.144614,0.037804,0.139868,0.059483,0.030574,0.227755,0.029654,...,0.001676,0.029255,0.011531,0.102321,0.012596,0.033249,0.077234,0.117353,0.027642,0.048158,0.019353,0.060822,0.01608,0.010787,0.013008,0.019399,0.014663,0.104651,0.006156,0.021311,0.331765,0.001726,0.087812,0.058063,0.271689,0.134837,0.052184,0.005287,0.017342,0.017243,0.060094,0.142429,0.087656,0.030486,0.04834,0.190894,0.034805,0.229943,0.076151,0.252404
2001: A Space Odyssey,0.162587,1.0,0.026065,0.130854,0.015595,0.025494,0.122949,0.025231,0.020709,0.009881,0.036851,0.039647,0.031501,0.011441,0.023841,0.019412,0.045149,0.053225,0.081016,0.126653,0.013767,0.091139,0.114406,0.020136,0.034859,0.022702,0.012055,0.171912,0.04519,0.126132,0.091549,0.005279,0.160755,0.219983,0.015128,0.25334,0.046669,0.024118,0.118348,0.032201,...,0.0048,0.044167,0.035125,0.096,0.026339,0.025621,0.090907,0.110386,0.013226,0.047544,0.03151,0.064211,0.011745,0.010444,0.012827,0.053307,0.037403,0.180086,0.022428,0.03461,0.260835,0.007467,0.111511,0.037155,0.127234,0.188962,0.063939,0.012406,0.001616,0.051444,0.102887,0.098817,0.086887,0.072646,0.095438,0.088991,0.030648,0.088519,0.098055,0.250636
A Christmas Carol,0.01875,0.026065,1.0,0.032959,0.017999,0.053668,0.029052,0.083968,0.033217,0.018317,0.051037,0.030034,0.015241,0.063647,0.167746,0.028137,0.045436,0.016064,0.036607,0.021145,0.026073,0.026602,0.032432,0.092376,0.016777,0.040913,0.027703,0.030508,0.052851,0.037333,0.077081,0.024483,0.027052,0.024307,0.058113,0.028554,0.046206,0.104251,0.017542,0.01346,...,0.017974,0.008248,0.010503,0.044652,0.084139,0.070905,0.022209,0.011752,0.04554,0.063932,0.069697,0.03581,0.038864,0.017416,0.029543,0.015514,0.03086,0.029223,0.0,0.009345,0.023627,0.0,0.059617,0.05319,0.023428,0.022516,0.028676,0.0,0.071842,0.022985,0.03352,0.023314,0.01268,0.023272,0.048307,0.024094,0.135635,0.027931,0.04199,0.029798
A Close Shave,0.169667,0.130854,0.032959,1.0,0.015523,0.056854,0.142982,0.04697,0.037319,0.026747,0.056348,0.061164,0.061095,0.039683,0.028368,0.023443,0.080384,0.106793,0.07687,0.112946,0.035258,0.12324,0.095904,0.03469,0.097541,0.04375,0.05214,0.173588,0.049069,0.189395,0.096095,0.016745,0.163178,0.146688,0.048327,0.155622,0.073127,0.034202,0.140885,0.044207,...,0.001722,0.03157,0.021817,0.143831,0.033061,0.025029,0.154851,0.124441,0.038534,0.085522,0.059667,0.037872,0.012414,0.019921,0.027122,0.033733,0.067607,0.153781,0.012889,0.034525,0.159965,0.006445,0.157721,0.040135,0.216423,0.143766,0.053015,0.020949,0.012953,0.05971,0.125426,0.191237,0.08678,0.059501,0.058267,0.23112,0.053209,0.114424,0.13788,0.198992
A Fish Called Wanda,0.012977,0.015595,0.017999,0.015523,1.0,0.030864,0.005244,0.008804,0.009248,0.059156,0.040554,0.014894,0.040139,0.0,0.026405,0.050937,0.009083,0.036884,0.011887,0.033018,0.050797,0.019101,0.029227,0.00846,0.048867,0.0,0.0,0.012497,0.068762,0.018776,0.008652,0.0,0.034046,0.013458,0.0,0.019481,0.009691,0.006311,0.023098,0.031941,...,0.03492,0.071376,0.074199,0.00967,0.0,0.015357,0.054543,0.056239,0.017897,0.014793,0.044404,0.018884,0.048048,0.088468,0.052565,0.08631,0.018178,0.02207,0.125036,0.050063,0.010991,0.021651,0.011077,0.0,0.018487,0.028925,0.013141,0.108257,0.0,0.035724,0.042738,0.016317,0.02119,0.079855,0.012771,0.02141,0.011412,0.002884,0.005239,0.01963


#### 유사도 추천 함수

In [None]:
def get_item_based_collabor(title):
    list_item =  item_based_collabor[title].sort_values(ascending=False)[1:11].index
    
    item_recc = pd.Series()
    
    for item in list_item:
        item_recc = pd.concat([item_recc, movie_data[movie_data['title'] == item]])
    
    return item_recc[['title', 'movieId']]

In [None]:
get_item_based_collabor('Toy Story')

Unnamed: 0,title,movieId
518,The Wizard of Oz,630.0
593,Die Hard,562.0
381,Dances with Wolves,581.0
77,Braveheart,197.0
516,Roman Holiday,804.0
62,Things to Do in Denver When You're Dead,400.0
549,It's a Wonderful Life,1585.0
330,The Piano,713.0
312,M. Butterfly,1413.0
415,The World of Apu,896.0


### 이용자별 유사도

In [None]:
# 결측치 0으로 치환

user_movie_rating.fillna(0, inplace = True)
user_movie_rating.head()

title,"20,000 Leagues Under the Sea",2001: A Space Odyssey,A Christmas Carol,A Close Shave,A Fish Called Wanda,A Streetcar Named Desire,A Time to Kill,Ace Ventura: Pet Detective,Addams Family Values,Aladdin,All About Eve,An American in Paris,Apollo 13,Around the World in Eighty Days,Barbarella,Basic Instinct,Basquiat,Batman,Batman Forever,Before Sunrise,Belle de Jour,Beverly Hills Cop III,Blade Runner,Blink,Blown Away,Body Snatchers,Bogus,Bonnie and Clyde,Braveheart,Breakfast at Tiffany's,Bringing Up Baby,Carlito's Way,Casablanca,Casino,Casper,Cat on a Hot Tin Roof,Charade,Children of the Corn IV: The Gathering,Citizen Kane,Clerks,...,The People vs. Larry Flynt,The Philadelphia Story,The Piano,The Remains of the Day,The River Wild,The Shadow,The Shawshank Redemption,The Silence of the Lambs,The Specialist,The Thin Man,The Tin Drum,The Truth About Cats & Dogs,The Umbrellas of Cherbourg,The Usual Suspects,The Wild Bunch,The Wizard of Oz,The World of Apu,The Wrong Trousers,Things to Do in Denver When You're Dead,Three Colors: Blue,Three Colors: Red,Three Colors: White,Tie Me Up! Tie Me Down!,Timecop,To Be or Not to Be,To Catch a Thief,To Die For,Top Gun,Top Hat,Toy Story,Trainspotting,True Romance,Twelve Monkeys,Twister,Under Siege 2: Dark Territory,Vertigo,Weekend at Bernie's,What's Eating Gilbert Grape,While You Were Sleeping,Willy Wonka & the Chocolate Factory
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 영화별 평점 유사도

In [None]:
# 영화별 평점 유사도 측정 ->코사인 유사도

user_based_collabor = cosine_similarity(user_movie_rating)
user_based_collabor

In [None]:
print(user_movie_rating.shape)
print(user_based_collabor.shape)

#### 이용자 간 평점 유사도

In [None]:
# 이용자 간 평점 유사도

user_based_collabor = pd.DataFrame(data = user_based_collabor, index = user_movie_rating.index, columns = user_movie_rating.index)
user_based_collabor.head()

NameError: ignored

#### 유사도 추천 함수

In [None]:
def get_user_based_collabor(userId):

    similar_user_ids = list(user_based_collabor[userId].sort_values(ascending=False)[1:6].keys())

    top_movie_titles = pd.Series()

    for user_id in similar_user_ids:
        top_movie_per_user = movie_user_rating[user_id].sort_values(ascending=False)[:10]
        top_movie_titles = pd.concat([top_movie_titles, top_movie_per_user])
        
    return top_movie_titles.sort_values(ascending=False)[:10]

In [None]:
get_user_based_collabor(161)

The Million Dollar Hotel              5.0
Monsieur Ibrahim                      5.0
Stand by Me                           5.0
Solaris                               5.0
Terminator 3: Rise of the Machines    5.0
Dancer in the Dark                    5.0
Men in Black II                       5.0
Judgment Night                        5.0
Scarface                              5.0
Monsieur Ibrahim                      5.0
dtype: float64

### 신규유저

In [None]:
rating_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/final/rating_mid.csv')
rating_data.drop('Unnamed: 0', axis = 1, inplace=True)
rating_data.head()

In [None]:
new_user_rating = [[611, 79132, 1]]
# new_user_rating = [[611, 318, 5.0], [611, 2324, 5.0], [611, 109487, 1.0], [611, 79132, 1.0]]


In [None]:
for i in new_user_rating:
    rating_data.loc[len(rating_data)] = i

In [None]:
rating_data['userId'] = rating_data['userId'].astype('int64')
rating_data['movieId'] = rating_data['movieId'].astype('int64')
rating_data['rating'] = rating_data['rating'].astype('float')

rating_data.tail()

In [None]:
rating_data.dtypes

In [None]:
ratings_movies = pd.merge(rating_data, movie_data, on='movieId')
user_rating_pivot = ratings_movies.pivot_table('rating', index = 'userId', columns = 'movieId').fillna(0)

In [None]:
user_based_collabor = cosine_similarity(user_rating_pivot)
user_based_collabor = pd.DataFrame(data = user_based_collabor, index = user_rating_pivot.index, columns = user_rating_pivot.index)
user_based_collabor.head()

In [None]:
def get_user_like(userId):
    user_sim_index = user_based_collabor.loc[userId].sort_values(ascending=False).index[1]
    sim_user_rec = user_rating_pivot.loc[user_sim_index].sort_values(ascending=False)[:7].index
    user_movie_index = user_rating_pivot.loc[userId][user_rating_pivot.loc[userId] != 0.0].index
    sim_user_rec = [i for i in list(sim_user_rec) if i not in list(user_movie_index)]
    return movie_data.loc[sim_user_rec]['title']

In [None]:
get_user_like(611)

## 개인별 추천

### 데이터 준비

In [None]:
# userId와 movieId 간 피벗테이블
user_movie_id_ratings = rating_data.pivot_table('rating', index='userId', columns='movieId').fillna(0)
user_movie_id_ratings

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
668,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
669,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
670,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# matrix로 변환

matrix = user_movie_id_ratings.values
matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [5., 0., 0., ..., 0., 0., 0.]])

In [None]:
# 이용자 평균 평점

user_rating_mean = np.mean(matrix, axis=1)
user_rating_mean

array([0.00562541, 0.02923009, 0.02007501, 0.09783808, 0.04312817,
       0.01582837, 0.03364218, 0.04947055, 0.01864108, 0.01875138,
       0.01709685, 0.01853077, 0.02189499, 0.00650783, 0.49161703,
       0.01318112, 0.14990073, 0.01819987, 0.16490183, 0.03557247,
       0.06265167, 0.07947276, 0.29092213, 0.00849327, 0.00893448,
       0.06579528, 0.0097066 , 0.02360468, 0.00694904, 0.41986543,
       0.03171189, 0.01941319, 0.04985661, 0.08118244, 0.00529451,
       0.04147364, 0.01433929, 0.05255901, 0.02592102, 0.02139863,
       0.08487756, 0.03099493, 0.03639974, 0.00926539, 0.00838297,
       0.02128833, 0.01621443, 0.19887492, 0.03783366, 0.01665564,
       0.01356717, 0.02768586, 0.01411869, 0.01842047, 0.01522171,
       0.2081403 , 0.09121994, 0.02746525, 0.02652769, 0.02536951,
       0.05994926, 0.02360468, 0.04020516, 0.00926539, 0.01235385,
       0.02139863, 0.04268696, 0.04919479, 0.03695125, 0.03849548,
       0.01080962, 0.06833223, 0.59921685, 0.02217075, 0.05244

In [None]:
# 영화별 이용자 평점 - 이용자 평균 평점

matrix_user_mean = matrix - user_rating_mean.reshape(-1, 1)
matrix_user_mean

array([[-0.00562541, -0.00562541, -0.00562541, ..., -0.00562541,
        -0.00562541, -0.00562541],
       [-0.02923009, -0.02923009, -0.02923009, ..., -0.02923009,
        -0.02923009, -0.02923009],
       [-0.02007501, -0.02007501, -0.02007501, ..., -0.02007501,
        -0.02007501, -0.02007501],
       ...,
       [-0.01367748, -0.01367748, -0.01367748, ..., -0.01367748,
        -0.01367748, -0.01367748],
       [ 3.98698434, -0.01301566, -0.01301566, ..., -0.01301566,
        -0.01301566, -0.01301566],
       [ 4.95030885, -0.04969115, -0.04969115, ..., -0.04969115,
        -0.04969115, -0.04969115]])

In [None]:
user_mean = pd.DataFrame(matrix_user_mean, columns = user_movie_id_ratings.columns)
user_mean

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
0,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,...,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625
1,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,3.970770,...,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230,-0.029230
2,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,...,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075
3,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,3.902162,...,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838
4,-0.043128,-0.043128,3.956872,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,...,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,3.972645,-0.027355,-0.027355,-0.027355,-0.027355,...,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355,-0.027355
667,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,...,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273,-0.008273
668,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,...,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677,-0.013677
669,3.986984,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,...,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016,-0.013016


### SVD

#### U, sigma 행렬, Vt 전치행렬

In [None]:
# U, sigma 행렬, Vt 전치행렬

U, sigma, Vt = svds(user_mean, k=12)

In [None]:
U.shape

(671, 12)

In [None]:
sigma.shape

(12,)

In [None]:
Vt.shape

(12, 9066)

In [None]:
# sigma 행렬 대치행렬 변환

sigma = np.diag(sigma)
sigma.shape

(12, 12)

#### U, sigma, Vt 내적수행

In [None]:
# U, sigma, Vt 내적수행으로 원본행렬 복원 + 이용자 평균 평점

svd_user_predition_ratings = np.dot(np.dot(U, sigma), Vt) + user_rating_mean.reshape(-1, 1)

In [None]:
# 원본 행렬의 column 적용

svd_predition = pd.DataFrame(svd_user_predition_ratings, columns = user_movie_id_ratings.columns)
svd_predition

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
0,-0.079686,0.021779,-0.013837,-0.005870,-0.028877,0.032371,0.000715,-0.004428,-0.005219,0.038195,...,-0.004324,-0.004352,0.010478,-0.004256,-0.003944,-0.005674,0.018157,-0.005575,-0.005297,-0.003766
1,1.428452,1.608841,0.529476,0.168278,0.520809,1.107473,0.529719,0.089376,0.296270,1.970031,...,0.013227,-0.002275,0.020680,-0.005245,-0.007644,-0.021019,0.031243,-0.000957,-0.000753,0.026901
2,0.977246,0.396971,0.000299,0.027444,0.021287,0.141458,-0.057134,0.031633,-0.012538,0.383576,...,0.002761,0.004907,-0.014190,-0.000251,-0.006007,-0.003189,-0.026916,0.014637,0.013287,-0.005741
3,1.870844,1.169993,0.252202,0.094831,-0.181713,-0.511953,-0.027820,-0.143080,0.013247,1.461694,...,0.026412,-0.027245,0.054681,0.018450,0.034544,-0.035740,0.088889,-0.019365,-0.017113,0.066559
4,1.182777,0.924903,0.075998,0.061505,0.602680,-0.159825,0.339925,0.081534,-0.079666,0.535018,...,-0.029124,-0.029357,0.009064,-0.029092,-0.030890,-0.057453,0.026344,-0.024027,-0.024614,-0.032752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666,1.577140,1.002294,0.699893,0.157323,0.631404,1.447100,0.658630,0.027763,0.347034,1.393400,...,-0.000740,0.005554,-0.022873,0.002715,0.005142,0.009042,-0.033241,-0.003929,-0.003212,0.000658
667,0.405596,0.011198,0.022474,0.025968,-0.015645,0.222755,-0.074666,0.000733,0.001368,0.074340,...,-0.004024,0.005189,-0.008046,0.005195,0.004814,-0.007049,-0.015048,0.005298,0.005562,-0.014007
668,0.360704,-0.000016,0.086261,0.012227,0.090441,0.053472,0.028401,-0.010500,0.008004,-0.021811,...,-0.002954,-0.000573,-0.012094,0.003460,0.005672,-0.002675,-0.017659,-0.001340,-0.001189,-0.004946
669,1.054516,0.265079,0.223782,0.054706,0.189281,0.584825,0.129335,0.017404,0.088330,0.435521,...,-0.002090,0.004452,-0.039603,0.003322,0.002830,0.001331,-0.061556,0.005344,0.004928,-0.008484


### 개인별 영화 추천 함수

In [None]:
def recommand_movies(userId):
        
    # userId가 index화 하면서 1씩 감소해 있으므로
    user_row_number = userId - 1
    # svd_predition에서 이용자 찾아서 영화 평점 높은순으로 정렬
    sorted_pred = svd_predition.iloc[user_row_number].sort_values(ascending=False)
    # rating_data에서 userId 데이터 추출
    user_data = rating_data[rating_data.userId == userId]
    # movie_data와 합치고 평점 순으로 정렬
    already_rated = user_data.merge(movie_data, on = 'movieId').sort_values('rating', ascending=False)
    
    # 사용자가 본 영화 제외
    predictions = movie_data[~movie_data.isin(already_rated['movieId'])]
    # sorted_pred와 함침
    predictions = predictions.merge(pd.DataFrame(sorted_pred).reset_index(), on = 'movieId')
    # 컬럼이름바꾸고 정렬
    predictions = predictions.rename(columns = {user_row_number : 'Predictions'}).sort_values('Predictions', ascending=False)
    
    return already_rated, predictions

In [None]:
already_rated, predictions = recommand_movies(161)

In [None]:
already_rated.shape

(60, 12)

In [None]:
predictions['movieId', 'title', 'Predictions']

Unnamed: 0,movieId,title,year,vote_count,vote_average,popularity,genres,director,actor,crawling_title,Predictions
854,590,The Hours,2002,461.0,7.0,14.686487,['Drama'],알렉산드라 브롭첸코,[],플라워스 라운더 더 아워스,4.600576
1658,457,Sissi,1955,109.0,7.3,11.02505,"['Comedy', 'Drama', 'Romance']",마티 올리케이넨,[],게릴라,4.551762
774,150,48 Hrs.,1982,364.0,6.5,15.297121,"['Thriller', 'Action', 'Comedy', 'Crime', 'Dra...",월터 힐,"['에디 머피', '닉 놀테']",<b>48</b>시간 2,4.534732
891,296,Terminator 3: Rise of the Machines,2003,2177.0,5.9,20.818907,"['Action', 'Thriller', 'Science Fiction']",조나단 모스토우,['아놀드 슈왈제네거'],터미네이터 <b>3</b> - 라이즈 오브 더 머신,4.527702
345,380,Rain Man,1988,1762.0,7.6,11.267467,['Drama'],미구엘 무뇨즈,[],비를 본 적 없는 남자,4.302787
