In [1]:
#! pip install spacy
#! pip install Levenshtein
#! python -m spacy download en_core_web_sm

In [2]:
# The first dataset contains metadata on 45,000 movies as ratings and reviews.
# https://www.kaggle.com/rounakbanik/the-movies-dataset
# The second dataset contains movie summaries scraped from wikipedia.
# http://www.cs.cmu.edu/~ark/personas/
# The third data set contains additional metadata we can cross reference.
# https://www.imdb.com/interfaces/

In [3]:
import pandas as pd
import ast
import numpy as np
import random
from sklearn import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from collections import Counter

In [4]:
#these are in directory 'Capstone Resources/Kaggle' and they are the data from:
# https://www.kaggle.com/rounakbanik/the-movies-dataset

#load the ratings_df to warm up, not sure we'll need it though
ratings_df = pd.read_csv('Capstone Resources/Kaggle/ratings.csv')
links_df = pd.read_csv('Capstone Resources/Kaggle/links.csv')
ratings_df = ratings_df.merge(links_df[['movieId', 'imdbId']], on='movieId') #add imdbID column to ratings_df
#imdbId column will match to metadata_df below to get the movie's title

In [5]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,imdbId
0,1,110,1.0,1425941529,112573
1,11,110,3.5,1231676989,112573
2,22,110,5.0,1111937009,112573
3,24,110,5.0,979870012,112573
4,29,110,3.0,1044020005,112573


In [6]:
#this is in directory 'Capstone Resources/Kaggle' and they are the data from:
# https://www.kaggle.com/rounakbanik/the-movies-dataset

metadata_df = pd.read_csv('Capstone Resources/Kaggle/movies_metadata.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [7]:
metadata_df.iloc[0:10]

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
5,False,,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,949,tt0113277,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,1995-12-15,187436818.0,170.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,A Los Angeles Crime Saga,Heat,False,7.7,1886.0
6,False,,58000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,11860,tt0114319,en,Sabrina,An ugly duckling having undergone a remarkable...,...,1995-12-15,0.0,127.0,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...",Released,You are cordially invited to the most surprisi...,Sabrina,False,6.2,141.0
7,False,,0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,45325,tt0112302,en,Tom and Huck,"A mischievous young boy, Tom Sawyer, witnesses...",...,1995-12-22,0.0,97.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Original Bad Boys.,Tom and Huck,False,5.4,45.0
8,False,,35000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,9091,tt0114576,en,Sudden Death,International action superstar Jean Claude Van...,...,1995-12-22,64350171.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Terror goes into overtime.,Sudden Death,False,5.5,174.0
9,False,"{'id': 645, 'name': 'James Bond Collection', '...",58000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.mgm.com/view/movie/757/Goldeneye/,710,tt0113189,en,GoldenEye,James Bond must unmask the mysterious head of ...,...,1995-11-16,352194034.0,130.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,No limits. No fears. No substitutes.,GoldenEye,False,6.6,1194.0


In [8]:
#columns in metadata

metadata_df.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [9]:
#columns to keep

columns = ['adult', 'belongs_to_collection', 'budget', 'genres', 'id',
       'imdb_id', 'original_language', 'overview',
       'popularity', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'tagline', 'title',
       'vote_average', 'vote_count']

In [10]:
metadata_df = metadata_df[columns]

In [11]:
#some additional clean up

#remove any entry where the title isn't named
metadata_df.dropna(subset=['title'], inplace=True)

#let's not recommend anything X rated
metadata_df['adult'] = metadata_df['adult'].astype(str)
metadata_df = metadata_df[metadata_df['adult'] == 'False']
metadata_df.drop(columns=['adult'], inplace=True)

#modify imdb_id column to match ratings_df
metadata_df['imdb_id'] = metadata_df['imdb_id'].str[2:]
metadata_df.rename(columns={'imdb_id':'imdbId'}, inplace=True)

#change id to an integer
metadata_df['id'] = metadata_df['id'].astype('str').astype('int')

#modify genres column into a list
metadata_df['genres'] = metadata_df['genres'].astype('str')
metadata_df['genres'] = metadata_df['genres'].apply(ast.literal_eval)
metadata_df['genres'] = [[j['name'] for j in i] for i in metadata_df['genres']]

#modify belongs_to_collection column into a string if it exists
metadata_df['belongs_to_collection'] = metadata_df['belongs_to_collection'].astype('str')
this = []
for i in metadata_df['belongs_to_collection']:
    if i == 'nan':
        this.append('None')
    else:
        try:
            dictionary = ast.literal_eval(i)
            this.append(dictionary['name'])
        except:
            this.append('None')
metadata_df['belongs_to_collection'] = this

In [12]:
metadata_df.head()

Unnamed: 0,belongs_to_collection,budget,genres,id,imdbId,original_language,overview,popularity,release_date,revenue,runtime,spoken_languages,tagline,title,vote_average,vote_count
0,Toy Story Collection,30000000,"[Animation, Comedy, Family]",862,114709,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",,Toy Story,7.7,5415.0
1,,65000000,"[Adventure, Fantasy, Family]",8844,113497,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0
2,Grumpy Old Men Collection,0,"[Romance, Comedy]",15602,113228,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0
3,,16000000,"[Comedy, Drama, Romance]",31357,114885,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0
4,Father of the Bride Collection,0,[Comedy],11862,113041,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0


In [13]:
#this is in directory 'Capstone Resources/Kaggle' and they are the data from:
# https://www.kaggle.com/rounakbanik/the-movies-dataset

credits_df = pd.read_csv('Capstone Resources/Kaggle/credits.csv')

In [14]:

#convert cast to a list of names
credits_df['cast'] = credits_df['cast'].apply(ast.literal_eval)
credits_df['cast'] = [[j['name'] for j in i] for i in credits_df['cast']]

#create a column for a list of each of the major roles
credits_df['crew'] = credits_df['crew'].apply(ast.literal_eval)
credits_df['director'] = [[j['name'] for j in i if j['job'] == 'Director'] for i in credits_df['crew']]
credits_df['writer'] = [[j['name'] for j in i if j['job'] == 'Screenplay'] for i in credits_df['crew']]
credits_df['producer'] = [[j['name'] for j in i if j['job'] == 'Producer'] for i in credits_df['crew']]
credits_df['executive producer'] = [[j['name'] for j in i if j['job'] == 'Executive Producer'] for i in credits_df['crew']]
credits_df['score'] = [[j['name'] for j in i if j['job'] == 'Music'] for i in credits_df['crew']]

credits_df.drop(columns=['crew'], inplace=True)

credits_df.head()

Unnamed: 0,cast,id,director,writer,producer,executive producer,score
0,"[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",862,[John Lasseter],"[Joss Whedon, Andrew Stanton, Joel Cohen, Alec...","[Bonnie Arnold, Ralph Guggenheim]","[Ed Catmull, Steve Jobs]",[Randy Newman]
1,"[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",8844,[Joe Johnston],"[Jonathan Hensleigh, Greg Taylor, Jim Strain]","[Scott Kroopf, William Teitler]","[Larry J. Franco, Ted Field, Robert W. Cort]",[]
2,"[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",15602,[Howard Deutch],[],[],[],[]
3,"[Whitney Houston, Angela Bassett, Loretta Devi...",31357,[Forest Whitaker],"[Ronald Bass, Terry McMillan]","[Ronald Bass, Ezra Swerdlow, Deborah Schindler...",[Terry McMillan],[]
4,"[Steve Martin, Diane Keaton, Martin Short, Kim...",11862,[Charles Shyer],"[Nancy Meyers, Albert Hackett]",[Nancy Meyers],[],[]


In [15]:
#merge cast and crew with metadata
metadata_df = metadata_df.merge(credits_df, on='id')

In [16]:
metadata_df.head()

Unnamed: 0,belongs_to_collection,budget,genres,id,imdbId,original_language,overview,popularity,release_date,revenue,...,tagline,title,vote_average,vote_count,cast,director,writer,producer,executive producer,score
0,Toy Story Collection,30000000,"[Animation, Comedy, Family]",862,114709,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,...,,Toy Story,7.7,5415.0,"[Tom Hanks, Tim Allen, Don Rickles, Jim Varney...",[John Lasseter],"[Joss Whedon, Andrew Stanton, Joel Cohen, Alec...","[Bonnie Arnold, Ralph Guggenheim]","[Ed Catmull, Steve Jobs]",[Randy Newman]
1,,65000000,"[Adventure, Fantasy, Family]",8844,113497,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,...,Roll the dice and unleash the excitement!,Jumanji,6.9,2413.0,"[Robin Williams, Jonathan Hyde, Kirsten Dunst,...",[Joe Johnston],"[Jonathan Hensleigh, Greg Taylor, Jim Strain]","[Scott Kroopf, William Teitler]","[Larry J. Franco, Ted Field, Robert W. Cort]",[]
2,Grumpy Old Men Collection,0,"[Romance, Comedy]",15602,113228,en,A family wedding reignites the ancient feud be...,11.7129,1995-12-22,0.0,...,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5,92.0,"[Walter Matthau, Jack Lemmon, Ann-Margret, Sop...",[Howard Deutch],[],[],[],[]
3,,16000000,"[Comedy, Drama, Romance]",31357,114885,en,"Cheated on, mistreated and stepped on, the wom...",3.859495,1995-12-22,81452156.0,...,Friends are the people who let you be yourself...,Waiting to Exhale,6.1,34.0,"[Whitney Houston, Angela Bassett, Loretta Devi...",[Forest Whitaker],"[Ronald Bass, Terry McMillan]","[Ronald Bass, Ezra Swerdlow, Deborah Schindler...",[Terry McMillan],[]
4,Father of the Bride Collection,0,[Comedy],11862,113041,en,Just when George Banks has recovered from his ...,8.387519,1995-02-10,76578911.0,...,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,5.7,173.0,"[Steve Martin, Diane Keaton, Martin Short, Kim...",[Charles Shyer],"[Nancy Meyers, Albert Hackett]",[Nancy Meyers],[],[]


In [17]:
#this is in directory 'Capstone Resources/CMU' and they are the data from:
#http://www.cs.cmu.edu/~ark/personas/

#df with summaries
summary_df = pd.read_csv('Capstone Resources/CMU/plot_summaries.txt', sep='\t', header=None)
summary_df.rename(columns={0:'wikiId', 1:'Summary'}, inplace=True)

In [18]:
#this is in directory 'Capstone Resources/CMU' and they are the data from:
# http://www.cs.cmu.edu/~ark/personas/

#df to get title from wikiId
summarykey_df = pd.read_csv('Capstone Resources/CMU/movie.metadata.tsv', sep='\t', header=None)

#keeping only wikiId and Title columns
summarykey_df = summarykey_df[[0, 2]]
summarykey_df.rename(columns={0:'wikiId', 2:'title'}, inplace=True)

In [19]:
#add title from summarykey_df to summary_df
summary_df = summary_df.merge(summarykey_df, on='wikiId')
#summary_df.drop(columns=['wikiId'], inplace=True)
summary_df['title'] = summary_df['title'].astype(str)

summary_df.head()

Unnamed: 0,wikiId,Summary,title
0,23890098,"Shlykov, a hard-working taxi driver and Lyosha...",Taxi Blues
1,31186339,The nation of Panem consists of a wealthy Capi...,The Hunger Games
2,20663735,Poovalli Induchoodan is sentenced for six yea...,Narasimham
3,2231378,"The Lemon Drop Kid , a New York City swindler,...",The Lemon Drop Kid
4,595909,Seventh-day Adventist Church pastor Michael Ch...,A Cry in the Dark


In [20]:
before_summary_df = len(metadata_df)
metadata_df = metadata_df.merge(summary_df, on = 'title')
after_summary_df = len(metadata_df)
print('Loss due to merge with summary_df: ', before_summary_df - after_summary_df)
print('Remaining: ', after_summary_df)

Loss due to merge with summary_df:  20841
Remaining:  24685


In [21]:
metadata_df = metadata_df[metadata_df['overview'].notna()]

#Add release year to title if database contains more than one title
year = []
for date in metadata_df['release_date'].astype('str'):
    year.append(date[0:4])
metadata_df['release_year'] = year

duplicates_list = list(metadata_df['title'][metadata_df['title'].duplicated(keep=False)].unique())
for i in range(len(metadata_df)):
    if metadata_df['title'].iloc[i] in duplicates_list:
        metadata_df['title'].iloc[i] = metadata_df['title'].iloc[i] + '(' + metadata_df['release_year'].iloc[i] + ')'

duplicates_list = list(metadata_df['title'][metadata_df['title'].duplicated(keep=False)].unique())
for title in duplicates_list:
    
    overview = list(metadata_df[metadata_df['title'] == title]['overview'])
    summary = list(metadata_df[metadata_df['title'] == title]['Summary'])

    sim_list = []
    for i in range(len(overview)):
        vect = TfidfVectorizer(min_df=1, stop_words="english")                                                                                                                                                                                                   
        tfidf = vect.fit_transform([overview[i], summary[i]])
        sim_list.append((tfidf * tfidf.T).toarray()[0][1])

    indices = metadata_df[metadata_df['title'] == title].index
    
    to_drop = [True for i in range(len(sim_list))]
    to_drop[np.argmax(sim_list)] = False
    
    index_to_drop = indices[to_drop]
    metadata_df.drop(index = index_to_drop, inplace=True)

metadata_df.reset_index(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [22]:
#final number of movies in df

final_metadata_len = len(metadata_df)
print('Loss due to removal of duplicate titles: ', after_summary_df - final_metadata_len)
print('Remaining: ', final_metadata_len)

Loss due to removal of duplicate titles:  4458
Remaining:  20227


In [23]:
import networkx as nx
from tqdm import tqdm

def create_network(df, column, min_num_occ = 2):
    df = df[['title', column]]
    df = df.explode(column)
    df.set_index('title', inplace=True)
    unique_vals = df[column].value_counts()[list(df[column].value_counts() > min_num_occ)]
    edge_list = set()
    for i in tqdm(unique_vals.index):
        movies = df.loc[df[column] == i].index
        for j in movies:
            for k in movies:
                edge_list.add((j,k))
    G = nx.Graph()
    G.add_edges_from(edge_list)
    return G
G = create_network(metadata_df, 'cast', 2)

100%|██████████| 24803/24803 [29:05<00:00, 14.21it/s]    


In [24]:
def cast_paths(movie1, movie2):
    #shortest_path = nx.shortest_path_length(G, source=movie1, target=movie2)
    cast_score = dict.fromkeys(metadata_df['title'], 0)
    for path in nx.all_simple_paths(G, source=movie1, target=movie2, cutoff = 2):#shortest_path):
        cast_score[path[0]] += 2
    for path in nx.all_simple_paths(G, source=movie1, target=movie2, cutoff = 3):
        for movie in path:
            if cast_score[movie] == 0:
                cast_score[movie] += 1
                
    cast_score[movie1] = 0
    cast_score[movie2] = 0
    
    return cast_score

In [25]:
tfidf_summary = TfidfVectorizer(stop_words='english').fit_transform(metadata_df['Summary'])
cosine_similarity = linear_kernel(tfidf_summary, tfidf_summary)

In [38]:
def get_cos_sim(title, cos_sim = cosine_similarity):
    index = metadata_df.index[metadata_df['title'] == title]
    cs = cosine_similarity[index][0]
    cs[index] = 0
    return cs

def list_to_dict(values):
    keys = metadata_df['title']
    dictionary = dict(zip(keys, values))
    #dictionary = sorted(dictionary.items(), key=lambda item: item[1], reverse=True)
    return dictionary
    
def cos_sim_match(movie1, movie2):
    cs_movie1 = get_cos_sim(title = movie1)
    cs_movie2 = get_cos_sim(title = movie2)
    summary_score = list_to_dict(cs_movie1*np.max(cs_movie2) + cs_movie2*np.max(cs_movie1))
    return dict(summary_score)

In [27]:
flat_genres = [y for x in metadata_df['genres'] for y in x]
all_genres = set(flat_genres)

genres_dict = {}
for genre in all_genres:
    genres_dict[genre] = dict.fromkeys(all_genres, 1)
    for i in metadata_df['genres']:
        if genre in i:
            for j in i:
                genres_dict[genre][j] = genres_dict[genre][j]+1
    genres_dict[genre] = sorted(genres_dict[genre].items(), key=lambda item: item[1], reverse=True)

In [28]:
def randomized_genre_pick(genre1, genre2):
    genre1_pick = genres_dict[random.choice(genre1[0])][1][0]
    genre2_pick = genres_dict[random.choice(genre2[0])][1][0]
    return set([genre1_pick, genre2_pick])

def match_genres(movie1, movie2):
    genre1 = list(metadata_df.loc[metadata_df['title'] == movie1, 'genres'])
    genre2 = list(metadata_df.loc[metadata_df['title'] == movie2, 'genres'])
    intersect = set(genre1[0]) & set(genre2[0])

    #if there are no intersecting genres then we'll randomly pick some based on genres commonly found together
    emergency_count = 0
    while len(intersect) == 0:
        intersect = randomized_genre_pick(genre1, genre2)
        emergency_count += 1
        if emergency_count > 10:
            intersect = set(['drama'])
            break
        
    genre_score = dict.fromkeys(metadata_df['title'], 1)
    for i in metadata_df.index:
        if intersect.issubset(metadata_df['genres'].iloc[i]):
            genre_score[metadata_df['title'].iloc[i]] = 2
    
    genre_score[movie1] = 0
    genre_score[movie2] = 0
    
    return genre_score

#match_genres('Jumanji', 'Grumpier Old Men')

In [29]:
def range_score(movie1, movie2, prop):
    movie1 = float(metadata_df[metadata_df['title'] == movie1][prop])
    movie2 = float(metadata_df[metadata_df['title'] == movie2][prop])
    
    max_score = max(movie1, movie2)
    min_score = min(movie1, movie2)
    
    score = dict.fromkeys(metadata_df['title'], 1)
    for index in metadata_df.index:
        this_move_score = float(metadata_df[prop].iloc[index])
        if this_move_score > min_score and this_move_score < max_score:
            score[metadata_df['title'].iloc[index]] = 2
    
    score[movie1] = 0
    score[movie2] = 0 
    
    return score

In [30]:
import Levenshtein as lev

def fuzzy_match(movie):
    ratio_list = []
    for i in metadata_df['title']:
        if i[-1] == ')' and i[-6] == '(':
            ratio_list.append(lev.ratio(movie.lower(), i[0:-6].lower()))
        else:
            ratio_list.append(lev.ratio(movie.lower(), i.lower()))
    return metadata_df['title'].iloc[np.argmax(ratio_list)]

In [41]:
def movie_matcher(movie1, movie2):
    
    movie1 = fuzzy_match(movie1)
    movie2 = fuzzy_match(movie2)

    summary_score = cos_sim_match(movie1, movie2)
    cast_score = cast_paths(movie1, movie2)
    genre_score = match_genres(movie1, movie2)
    vote_score = range_score(movie1, movie2, 'vote_average')
    popularity_score = range_score(movie1, movie2, 'popularity')
    
    w1 = 5
    w2 = 1
    w3 = 3
    w4 = 1
    w5 = 1

    summary_score.update((x,y*w1) for x,y in summary_score.items())
    cast_score.update((x,y*w2) for x,y in cast_score.items())
    genre_score.update((x,y*w3) for x,y in genre_score.items())
    vote_score.update((x,y*w3) for x,y in vote_score.items())
    popularity_score.update((x,y*w3) for x,y in popularity_score.items())

    final_score = Counter(summary_score) + Counter(cast_score) + Counter(genre_score) \
                    + Counter(vote_score) + Counter(popularity_score)
    final_score = sorted(final_score.items(), key=lambda item: item[1], reverse=True)

    return final_score[0:10]#summary_score, cast_score, genre_score
movie_matcher('Toy Story 2', 'The Avengers')

[('The Santa Clause', 19.091242517500902),
 ('To Die For(1995)', 19.06321668330582),
 ('Sleepless in Seattle', 19.063090982314407),
 ('My Favorite Martian', 19.03929852209362),
 ("Bridget Jones's Diary", 19.038328904022066),
 ("Charlie Wilson's War", 19.03782612123299),
 ('The Terminal', 19.03769058027057),
 ('Larry Crowne', 19.036264928307258),
 ('Art School Confidential', 19.031385273159557),
 ('Space Jam', 19.03045102311177)]