In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

import warnings
warnings.filterwarnings('ignore')

In [2]:
import joblib
from ast import literal_eval

# Data 전처리 & merge

## metadata

In [3]:
data1 = pd.read_csv('./dataset/movies_metadata.csv')

In [4]:
columns_use = joblib.load("./dataset/metadata_columns_use.pkl")
columns_tbd = joblib.load("./dataset/metadata_columns_tbd.pkl")

In [5]:
data1 = data1[columns_use]

In [6]:
data1.shape

(45466, 9)

### genres

In [7]:
data1['genres'].isnull().sum()

0

In [8]:
data1['genres'] = data1['genres'].apply(literal_eval)

In [9]:
data1['genres'][0]

[{'id': 16, 'name': 'Animation'},
 {'id': 35, 'name': 'Comedy'},
 {'id': 10751, 'name': 'Family'}]

In [10]:
type(data1['genres'][0])

list

In [11]:
data1['genres'] = data1['genres'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : [d.replace(' ', '') for d in x]).apply(lambda x : " ".join(x))

In [12]:
data1['genres'][0]

'Animation Comedy Family'

### id

In [13]:
data1['id'].isnull().sum()

0

In [14]:
data1[data1.id.str.contains('[^0-9]')]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count
19730,CarouselProductions VisionViewEntertainment Te...,1997-08-20,104.0,Released,,,,,
29503,Aniplex GoHands BROSTATV MardockScrambleProduc...,2012-09-29,68.0,Released,,,,,
35587,OdysseyMedia PulserProductions RogueState TheC...,2014-01-01,82.0,Released,Beware Of Frost Bites,,,,


In [15]:
index = data1[data1.id.str.contains('[^0-9]')].index

In [16]:
data1.drop(index=index, axis=0, inplace=True)

In [17]:
data1['id'] = data1.id.astype('int64')

In [18]:
len(data1['id'].unique()) == len(data1)

False

In [19]:
data1 = data1.drop_duplicates('id')

In [20]:
len(data1['id'].unique()) == len(data1)

True

### original_language

In [21]:
data1[data1['original_language'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count
19574,Documentary,283101,,Documentary about the production of The Third ...,0.017007,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Shadowing the Third Man,0.0,0.0
21602,Romance Drama,103902,,An Outback farmer takes in an Afghani woman wh...,0.359818,"[{'iso_639_1': 'en', 'name': 'English'}]",Unfinished Sky,6.4,8.0
22832,War Western,359195,,A group of Union Army soldiers is charged with...,0.070647,"[{'iso_639_1': 'en', 'name': 'English'}]",13 Fighting Men,0.0,0.0
32141,Comedy,147050,,George and Gracie enter an elegant drawing roo...,0.073418,"[{'iso_639_1': 'en', 'name': 'English'}]",Lambchops,6.5,2.0
37407,Animation,257095,,The first fairy tale transformed into a full-l...,0.036841,"[{'iso_639_1': 'cs', 'name': 'Český'}]",Prince Bayaya,5.0,1.0
41047,Documentary,332742,,"Until the late 1970s, the Pakistani city of La...",0.373688,"[{'iso_639_1': 'ur', 'name': 'اردو'}, {'iso_63...",Song of Lahore,6.5,2.0
41872,Documentary,144410,,Two dancers perform in succession facing a sta...,1.58434,"[{'iso_639_1': 'xx', 'name': 'No Language'}]",Annabelle Serpentine Dance,5.7,20.0
44057,Drama,380438,,,0.001283,"[{'iso_639_1': 'fr', 'name': 'Français'}]",Lettre d'une inconnue,0.0,0.0
44410,Animation Documentary,381096,,The traditional crafts of crochet and knitting...,0.067624,"[{'iso_639_1': 'sv', 'name': 'svenska'}, {'iso...",Yarn,0.0,0.0
44576,,381525,,American investment fund buys Austrian compani...,0.030766,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",WiNWiN,0.0,0.0


In [22]:
data1['original_language'].value_counts() / len(data1) * 100

en    70.985847
fr     5.361741
it     3.365395
ja     2.964805
de     2.374926
        ...    
tg     0.002201
rw     0.002201
fy     0.002201
hy     0.002201
la     0.002201
Name: original_language, Length: 89, dtype: float64

- 누락값은 모두 en으로 대체

In [23]:
index = data1[data1['original_language'].isnull()].index

In [24]:
data1.loc[index, 'original_language'] = 'en'

### overview

In [25]:
data1[data1['overview'] == 'Released']

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count


In [26]:
data1['overview'] = data1['overview'].fillna('')

In [27]:
data1[data1.overview.str.contains('comedy')]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count
44,Fantasy Drama Comedy Thriller,577,en,Susan wants to work in television and will the...,10.4485,"[{'iso_639_1': 'en', 'name': 'English'}]",To Die For,6.7,177.0
51,Comedy Romance,11448,en,"When Lenny and his wife, Amanda, adopt a baby,...",15.8991,"[{'iso_639_1': 'en', 'name': 'English'}]",Mighty Aphrodite,6.7,145.0
117,Comedy Drama,78406,en,"Ruben and Robby are twin brothers, adopted by ...",1.52241,"[{'iso_639_1': 'en', 'name': 'English'}]",Steal Big Steal Little,5.1,7.0
127,Comedy Romance,110972,en,Pie in the Sky is a 1996 American romantic com...,0.699066,"[{'iso_639_1': 'en', 'name': 'English'}]",Pie in the Sky,7.0,2.0
154,Action Comedy,1775,en,Canadian Bacon is the only fictitious film fro...,5.39156,"[{'iso_639_1': 'en', 'name': 'English'}]",Canadian Bacon,5.9,57.0
...,...,...,...,...,...,...,...,...,...
45216,Comedy,7014,fr,"Made without proper language, just gibberish a...",2.018911,"[{'iso_639_1': 'xx', 'name': 'No Language'}]",Themroc,7.1,9.0
45266,Comedy,373247,en,A black comedy set during the aftermath of the...,0.947866,"[{'iso_639_1': 'en', 'name': 'English'}]",Down Under,7.2,9.0
45269,Comedy,454787,en,"A comedy about depression, alcoholism, suicide...",1.135015,"[{'iso_639_1': 'en', 'name': 'English'}]",Chris Gethard: Career Suicide,7.5,4.0
45288,ScienceFiction Comedy Fantasy,36391,en,A comedy. The story follows a young scientist ...,0.529836,"[{'iso_639_1': 'pl', 'name': 'Polski'}]",King Size,7.3,8.0


### popularity

In [28]:
data1[data1['popularity'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count
19729,Action Thriller Drama,82663,en,British soldiers force a recently captured IRA...,,,,,
29502,Animation ScienceFiction,122662,ja,Third film of the Mardock Scramble series.,,,,,
35586,TVMovie Action Horror ScienceFiction,249260,en,A group of skiers are terrorized during spring...,,,,,


In [29]:
index = data1[data1['popularity'].isnull()].index

In [30]:
data1.drop(index=index, axis=0, inplace=True)

In [31]:
data1['popularity'] = data1['popularity'].astype('float')

### spoken_languages

In [32]:
data1[data1['spoken_languages'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count


In [33]:
data1['spoken_languages'] = data1['spoken_languages'].apply(literal_eval)

In [34]:
data1['spoken_languages'] = data1['spoken_languages'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : " ".join(x))

### title

In [35]:
data1[data1['title'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count


In [36]:
data1[data1['title'].str.contains('[0-9]')]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count
46,Crime Mystery Thriller,807,en,Two homicide detectives are on a desperate hun...,18.457430,English,Se7en,8.1,5915.0
65,Action ScienceFiction,11525,en,Jobe is resuscitated by Jonathan Walker. He wa...,2.495350,English Magyar,Lawnmower Man 2: Beyond Cyberspace,2.8,32.0
147,Drama,568,en,The true story of technical troubles that scut...,18.761467,English,Apollo 13,7.3,1637.0
166,Adventure,9073,en,Jesse becomes reunited with Willy three years ...,7.575098,English Bahasa indonesia Bahasa melayu,Free Willy 2 - The Adventure Home,5.5,130.0
201,Action Thriller,3512,en,A passenger train has been hijacked by an elec...,7.490247,English,Under Siege 2: Dark Territory,5.6,217.0
...,...,...,...,...,...,...,...,...,...
45329,Comedy Drama,43656,en,Can you solve the mystery of the 13 moons?,2.407001,English,13 Moons,5.8,4.0
45348,Action Adventure Thriller,44918,en,On the 100th anniversary of the original voyag...,4.359886,English,Titanic 2,3.4,55.0
45381,Horror Thriller Mystery,264269,ru,"Sveta and Maxim, a happy newlyweds, are moving...",0.217441,Pусский,Apartment 18,4.4,4.0
45458,Horror,289923,en,A film archivist revisits the story of Rustin ...,0.386450,English,The Burkittsville 7,7.0,1.0


In [37]:
len(data1['title'].unique()) == len(data1)

False

### vote_average

In [38]:
data1[data1['vote_average'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count


### vote_count

In [39]:
data1[data1['vote_count'].isnull()]

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count


In [40]:
data1['vote_count'] = data1['vote_count'].astype('int64')

In [41]:
data1.isnull().sum()

genres               0
id                   0
original_language    0
overview             0
popularity           0
spoken_languages     0
title                0
vote_average         0
vote_count           0
dtype: int64

### new column(score)

vote_average값을 변경해주어야 합니다.   
현재 vote_average는 조금 **불공정**하게 되어 있습니다.

왜냐하면, vote 수가 적은데(예를 들어 3개) 3개 전부 5점이라고 하면 vote가 5점으로 되어 있기 때문입니다.  
하지만, vote 수가 많을수록 vote_average가 떨어질 수 밖에 없습니다. 많은 사람들이 평가를 하니까요.  

그래서 이런 불공정을 처리하기 위해 imdb에서 처리한 방법이 있습니다.  
해당 이슈는 url : https://www.quora.com/How-does-IMDbs-rating-system-work 에서 확인할 수 있습니다.

그에 대한 답은 아래와 같습니다.

![1](https://user-images.githubusercontent.com/24634054/71774470-d1470c80-2fb2-11ea-8a1e-aa018dd6d25a.JPG)

- r : 개별 영화 평점
- v : 개별 영화에 평점을 투표한 횟수
- m : 250위 안에 들어야 하는 최소 투표 (정하기 나름인듯. 난 500이라고 하면 500으로 해도 되고.)
- c : 전체 영화에 대한 평균 평점

여기서 m은 **500위로 가정하고 진행하겠습니다.** 

먼저 m부터 찾아보죠. 500위 정도로 들어오게 하려면 vote_count가 상위 몇 %이어야 할까요?  
이는 quantile을 이용해서 구할 수 있습니다.

In [42]:
data1['vote_count'].quantile(0.10)

1.0

In [43]:
data1['vote_count'].quantile(0.50)

10.0

In [44]:
data1['vote_count'].quantile(0.95)

434.0

In [45]:
tmp_data = data1[data1['vote_count'] >= data1['vote_count'].quantile(0.95)]
tmp_data.shape

(2274, 9)

상위 90%로 했을 때 481개가 들어옵니다.   
89%로 하면 529개가 들어오게 됩니다. 저는 90%로 가정하고 진행하도록 하겠습니다.

In [46]:
del tmp_data

In [47]:
m = data1['vote_count'].quantile(0.95)
data1_reduced = data1[data1['vote_count'] >= m]

In [48]:
C = data1['vote_average'].mean()

In [49]:
print(C)
print(m)

5.618329297820528
434.0


In [50]:
def weighted_rating(x, m=m, C=C):
    v = x['vote_count']
    R = x['vote_average']
    
    return (v / (v + m) * R) + (m / (m + v) * C)

In [51]:
data1_reduced['score'] = data1_reduced.apply(weighted_rating, axis=1)

## credits

In [52]:
data2 = pd.read_csv('./dataset/credits.csv')

In [53]:
columns_use = joblib.load("./dataset/credits_columns_use.pkl")
columns_tbd = joblib.load("./dataset/credits_columns_tbd.pkl")

In [54]:
data2 = data2[columns_use]

In [55]:
data2.shape

(45476, 3)

In [56]:
data2.isnull().sum()

cast    0
crew    0
id      0
dtype: int64

### cast

In [57]:
data2['cast'] = data2['cast'].apply(literal_eval)

In [58]:
data2['cast'] = data2['cast'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : [d.replace(' ', '') for d in x]).apply(lambda x : " ".join(x))

In [59]:
data2['cast'][0]

'TomHanks TimAllen DonRickles JimVarney WallaceShawn JohnRatzenberger AnniePotts JohnMorris ErikvonDetten LaurieMetcalf R.LeeErmey SarahFreeman PennJillette'

### crew

In [60]:
data2['crew'] = data2['crew'].apply(literal_eval)

In [61]:
data2['crew'] = data2['crew'].apply(lambda x : [d['name'] for d in x if d['job'] == 'Director']).apply(lambda x : [d.replace(' ', '') for d in x]).apply(lambda x : " ".join(x))

In [62]:
data2['crew'][0]

'JohnLasseter'

### merge

In [63]:
data = pd.merge(data1_reduced, data2)
data.head()

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew
0,Animation Comedy Family,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,English,Toy Story,7.7,5415,7.545539,TomHanks TimAllen DonRickles JimVarney Wallace...,JohnLasseter
1,Adventure Fantasy Family,8844,en,When siblings Judy and Peter discover an encha...,17.015539,English Français,Jumanji,6.9,2413,6.704621,RobinWilliams JonathanHyde KirstenDunst Bradle...,JoeJohnston
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.310584,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann
3,Adventure Action Thriller,710,en,James Bond must unmask the mysterious head of ...,14.686036,English Pусский Español,GoldenEye,6.6,1194,6.338302,PierceBrosnan SeanBean IzabellaScorupco FamkeJ...,MartinCampbell
4,Drama Crime,524,en,The life of the gambling paradise – Las Vegas ...,10.137389,English,Casino,7.8,1343,7.267167,RobertDeNiro SharonStone JoePesci JamesWoods D...,MartinScorsese


In [64]:
data.shape

(2274, 12)

In [65]:
data.isnull().sum()

genres               0
id                   0
original_language    0
overview             0
popularity           0
spoken_languages     0
title                0
vote_average         0
vote_count           0
score                0
cast                 0
crew                 0
dtype: int64

## keywords

In [66]:
data3 = pd.read_csv('./dataset/keywords.csv')

In [67]:
columns_use = joblib.load("./dataset/keywords_columns_use.pkl")
columns_tbd = joblib.load("./dataset/keywords_columns_tbd.pkl")

In [68]:
data3 = data3[columns_use]

In [69]:
data3.shape

(46419, 2)

In [70]:
data3.isnull().sum()

id          0
keywords    0
dtype: int64

### keywords

In [71]:
data3['keywords'] = data3['keywords'].apply(literal_eval)

In [72]:
data3['keywords'] = data3['keywords'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : " ".join(x))

### merge

In [73]:
data = pd.merge(data, data3, on='id')
data.head()

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords
0,Animation Comedy Family,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,English,Toy Story,7.7,5415,7.545539,TomHanks TimAllen DonRickles JimVarney Wallace...,JohnLasseter,jealousy toy boy friendship friends rivalry bo...
1,Adventure Fantasy Family,8844,en,When siblings Judy and Peter discover an encha...,17.015539,English Français,Jumanji,6.9,2413,6.704621,RobinWilliams JonathanHyde KirstenDunst Bradle...,JoeJohnston,board game disappearance based on children's b...
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.310584,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...
3,Adventure Action Thriller,710,en,James Bond must unmask the mysterious head of ...,14.686036,English Pусский Español,GoldenEye,6.6,1194,6.338302,PierceBrosnan SeanBean IzabellaScorupco FamkeJ...,MartinCampbell,cuba falsely accused secret identity computer ...
4,Drama Crime,524,en,The life of the gambling paradise – Las Vegas ...,10.137389,English,Casino,7.8,1343,7.267167,RobertDeNiro SharonStone JoePesci JamesWoods D...,MartinScorsese,poker drug abuse 1970s overdose illegal prosti...


In [74]:
data.shape

(2291, 13)

In [75]:
data.isnull().sum()

genres               0
id                   0
original_language    0
overview             0
popularity           0
spoken_languages     0
title                0
vote_average         0
vote_count           0
score                0
cast                 0
crew                 0
keywords             0
dtype: int64

## links

In [76]:
data4 = pd.read_csv('./dataset/links.csv')

In [77]:
columns_use = joblib.load("./dataset/links_columns_use.pkl")
columns_tbd = joblib.load("./dataset/links_columns_tbd.pkl")

In [78]:
data4 = data4[columns_use]

In [79]:
data4.shape

(45843, 2)

In [80]:
data4.isnull().sum()

movieId      0
tmdbId     219
dtype: int64

#### tmdbId

In [81]:
data4[data4.tmdbId.isnull()]

Unnamed: 0,movieId,tmdbId
140,142,
598,604,
708,720,
709,721,
718,730,
...,...,...
24673,115254,
24775,115715,
24802,115821,
28570,128734,


In [82]:
data4.dropna(axis=0, inplace=True)

In [83]:
data4.shape

(45624, 2)

### merge

In [84]:
data = pd.merge(data, data4, left_on='id', right_on='tmdbId').drop('tmdbId', axis=1)
data.head()

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId
0,Animation Comedy Family,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,English,Toy Story,7.7,5415,7.545539,TomHanks TimAllen DonRickles JimVarney Wallace...,JohnLasseter,jealousy toy boy friendship friends rivalry bo...,1
1,Adventure Fantasy Family,8844,en,When siblings Judy and Peter discover an encha...,17.015539,English Français,Jumanji,6.9,2413,6.704621,RobinWilliams JonathanHyde KirstenDunst Bradle...,JoeJohnston,board game disappearance based on children's b...,2
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.310584,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6
3,Adventure Action Thriller,710,en,James Bond must unmask the mysterious head of ...,14.686036,English Pусский Español,GoldenEye,6.6,1194,6.338302,PierceBrosnan SeanBean IzabellaScorupco FamkeJ...,MartinCampbell,cuba falsely accused secret identity computer ...,10
4,Drama Crime,524,en,The life of the gambling paradise – Las Vegas ...,10.137389,English,Casino,7.8,1343,7.267167,RobertDeNiro SharonStone JoePesci JamesWoods D...,MartinScorsese,poker drug abuse 1970s overdose illegal prosti...,16


In [85]:
data.shape

(2291, 14)

In [86]:
data.isnull().sum()

genres               0
id                   0
original_language    0
overview             0
popularity           0
spoken_languages     0
title                0
vote_average         0
vote_count           0
score                0
cast                 0
crew                 0
keywords             0
movieId              0
dtype: int64

## ratings

In [87]:
data5 = pd.read_csv('./dataset/ratings.csv')

In [88]:
columns_use = joblib.load("./dataset/ratings_columns_use.pkl")
columns_tbd = joblib.load("./dataset/ratings_columns_tbd.pkl")

In [89]:
data5 = data5[columns_use]

In [90]:
data5.shape

(26024289, 3)

In [91]:
data5.isnull().sum()

userId     0
movieId    0
rating     0
dtype: int64

### userId
- content based filtering 시에 필요 없으므로 삭제

In [92]:
data5.drop('userId', axis=1, inplace=True)

### rating

In [93]:
data5_groupby = data5.groupby('movieId').mean()
data5_groupby

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
1,3.888157
2,3.236953
3,3.175550
4,2.875713
5,3.079565
...,...
176267,4.000000
176269,3.500000
176271,5.000000
176273,1.000000


In [94]:
data5_groupby = data5_groupby.reset_index()
data5_groupby

Unnamed: 0,movieId,rating
0,1,3.888157
1,2,3.236953
2,3,3.175550
3,4,2.875713
4,5,3.079565
...,...,...
45110,176267,4.000000
45111,176269,3.500000
45112,176271,5.000000
45113,176273,1.000000


### merge

In [95]:
data = pd.merge(data, data5_groupby)
data.head()

Unnamed: 0,genres,id,original_language,overview,popularity,spoken_languages,title,vote_average,vote_count,score,cast,crew,keywords,movieId,rating
0,Animation Comedy Family,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,English,Toy Story,7.7,5415,7.545539,TomHanks TimAllen DonRickles JimVarney Wallace...,JohnLasseter,jealousy toy boy friendship friends rivalry bo...,1,3.888157
1,Adventure Fantasy Family,8844,en,When siblings Judy and Peter discover an encha...,17.015539,English Français,Jumanji,6.9,2413,6.704621,RobinWilliams JonathanHyde KirstenDunst Bradle...,JoeJohnston,board game disappearance based on children's b...,2,3.236953
2,Action Crime Drama Thriller,949,en,"Obsessive master thief, Neil McCauley leads a ...",17.924927,English Español,Heat,7.7,1886,7.310584,AlPacino RobertDeNiro ValKilmer JonVoight TomS...,MichaelMann,robbery detective bank obsession chase shootin...,6,3.841764
3,Adventure Action Thriller,710,en,James Bond must unmask the mysterious head of ...,14.686036,English Pусский Español,GoldenEye,6.6,1194,6.338302,PierceBrosnan SeanBean IzabellaScorupco FamkeJ...,MartinCampbell,cuba falsely accused secret identity computer ...,10,3.431841
4,Drama Crime,524,en,The life of the gambling paradise – Las Vegas ...,10.137389,English,Casino,7.8,1343,7.267167,RobertDeNiro SharonStone JoePesci JamesWoods D...,MartinScorsese,poker drug abuse 1970s overdose illegal prosti...,16,3.795511


In [96]:
data.shape

(2291, 15)

In [97]:
data.isnull().sum()

genres               0
id                   0
original_language    0
overview             0
popularity           0
spoken_languages     0
title                0
vote_average         0
vote_count           0
score                0
cast                 0
crew                 0
keywords             0
movieId              0
rating               0
dtype: int64

# Data 저장

In [98]:
data.to_csv('./dataset/merged_data.csv', index = False)