# Rank Most Similar Movies

Using various distance metrics, rank the most similar movies to a query movie.

In [1]:
%matplotlib inline

In [2]:
import pandas as pd

import json

In [3]:
actor_name_map = {}
movie_actor_list = []

with open("../data/imdb_recent_movies.json", "r") as in_file:
    for line in in_file:
        
        this_movie = json.loads(line)
            
        for actor_id,actor_name in zip(this_movie['actor_ids'],this_movie['actor_names']):
            actor_name_map[actor_id] = actor_name
            
        movie_actor_list.append({
            "movie": this_movie["title_name"],
            "actors": set(this_movie['actor_ids'])
        })

In [4]:
df = pd.DataFrame(movie_actor_list)
df[df["movie"] == "Star Wars: Episode II - Attack of the Clones"]

Unnamed: 0,movie,actors
81,Star Wars: Episode II - Attack of the Clones,"{nm0000489, nm0000191, nm0159789}"


In [5]:
target_movie = movie_actor_list[81]
target_movie

{'movie': 'Star Wars: Episode II - Attack of the Clones',
 'actors': {'nm0000191', 'nm0000489', 'nm0159789'}}

In [6]:
distances = []

target_actors = target_movie["actors"]
for movie in movie_actor_list:
    these_actors = movie["actors"]
    
    numer = len(target_actors.intersection(these_actors))
    denom = len(target_actors.union(these_actors))
    
    jaccard_sim = numer / denom
    
    distances.append({
        "movie": movie,
        "similarity": jaccard_sim
    })

In [7]:
for similar_movie in sorted(distances, key=lambda x: x["similarity"], reverse=True)[:10]:
    print(similar_movie["movie"]["movie"], similar_movie["similarity"])
    for actor in similar_movie["movie"]["actors"]:
        print("\t", actor_name_map[actor])

Star Wars: Episode II - Attack of the Clones 1.0
	 Christopher Lee
	 Ewan McGregor
	 Hayden Christensen
Star Wars: Episode III - Revenge of the Sith 0.5
	 Samuel L. Jackson
	 Ewan McGregor
	 Hayden Christensen
Faster 0.3333333333333333
	 Ewan McGregor
Whales of Atlantis: In Search of Moby Dick 0.3333333333333333
	 Christopher Lee
Troy's Story 0.3333333333333333
	 Ewan McGregor
Miss Potter 0.3333333333333333
	 Ewan McGregor
The Final Fix 0.3333333333333333
	 Ewan McGregor
Perfect Sense 0.3333333333333333
	 Ewan McGregor
Fastest 0.3333333333333333
	 Ewan McGregor
Charge 0.3333333333333333
	 Ewan McGregor
