In [116]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [117]:
critics = {
    "Lisa Rose": {
        "Lady in the Water": 2.5,
        "Snakes on a Plane": 3.5,
        "Just My Luck": 3.0,
        "Superman Returns": 3.5,
        "You, Me and Dupree": 2.5,
        "The Night Listener": 3.0,
    },
    "Gene Seymour": {
        "Lady in the Water": 3.0,
        "Snakes on a Plane": 3.5,
        "Just My Luck": 1.5,
        "Superman Returns": 5.0,
        "The Night Listener": 3.0,
        "You, Me and Dupree": 3.5,
    },
    "Michael Phillips": {
        "Lady in the Water": 2.5,
        "Snakes on a Plane": 3.0,
        "Superman Returns": 3.5,
        "The Night Listener": 4.0,
    },
    "Claudia Puig": {
        "Snakes on a Plane": 3.5,
        "Just My Luck": 3.0,
        "The Night Listener": 4.5,
        "Superman Returns": 4.0,
        "You, Me and Dupree": 2.5,
    },
    "Mick LaSalle": {
        "Lady in the Water": 3.0,
        "Snakes on a Plane": 4.0,
        "Just My Luck": 2.0,
        "Superman Returns": 3.0,
        "The Night Listener": 3.0,
        "You, Me and Dupree": 2.0,
    },
    "Jack Matthews": {
        "Lady in the Water": 3.0,
        "Snakes on a Plane": 4.0,
        "The Night Listener": 3.0,
        "Superman Returns": 5.0,
        "You, Me and Dupree": 3.5,
    },
    "Toby": {
        "Snakes on a Plane": 4.5,
        "You, Me and Dupree": 1.0,
        "Superman Returns": 4.0,
    },
}

df = pd.DataFrame(critics)
df

Unnamed: 0,Lisa Rose,Gene Seymour,Michael Phillips,Claudia Puig,Mick LaSalle,Jack Matthews,Toby
Lady in the Water,2.5,3.0,2.5,,3.0,3.0,
Snakes on a Plane,3.5,3.5,3.0,3.5,4.0,4.0,4.5
Just My Luck,3.0,1.5,,3.0,2.0,,
Superman Returns,3.5,5.0,3.5,4.0,3.0,5.0,4.0
"You, Me and Dupree",2.5,3.5,,2.5,2.0,3.5,1.0
The Night Listener,3.0,3.0,4.0,4.5,3.0,3.0,


In [119]:
# NearestNeighbors complains when there is NA.
df.fillna(0, inplace=True)

In [129]:
nn = NearestNeighbors(n_neighbors=5, metric="cosine")
nn.fit(df)

In [133]:
distances, indices = nn.kneighbors(df)

# We remove the first row, which is the target itself, so the distance is 0.
distances = distances[:, 1:]
indices = indices[:, 1:]
distances, indices

(array([[0.16351355, 0.16700473, 0.18366493, 0.18431127],
        [0.02012194, 0.12323169, 0.13542826, 0.18431127],
        [0.21161357, 0.24014412, 0.29742666, 0.31977023],
        [0.02012194, 0.0846977 , 0.10782985, 0.16351355],
        [0.0846977 , 0.12323169, 0.16948491, 0.18366493],
        [0.10782985, 0.13542826, 0.16700473, 0.16948491]]),
 array([[3, 5, 4, 1],
        [3, 4, 5, 0],
        [5, 4, 1, 3],
        [1, 4, 5, 0],
        [3, 1, 5, 0],
        [3, 1, 0, 4]]))

In [135]:
target = 2
df.iloc[target : target + 1]

Unnamed: 0,Lisa Rose,Gene Seymour,Michael Phillips,Claudia Puig,Mick LaSalle,Jack Matthews,Toby
Just My Luck,3.0,1.5,0.0,3.0,2.0,0.0,0.0


In [162]:
print(f"Similar movies to '{df.index[target]}':")
for i, (index, dist) in enumerate(zip(indices[target], distances[target])):
    movie = df.index[index]

    sum_rating = 0
    sum_score = 0
    for j, d in zip(indices[index], distances[index]):
        score = 1 / (1 + d)
        rating = df.iloc[index, j]
        sum_score += score
        sum_rating += score * rating

    rating = sum_rating / sum_score
    print(
        f"{i+1}. {movie} Rating: {rating:0.1f}",
    )

Similar movies to 'Just My Luck':
1. The Night Listener Rating: 3.4
2. You, Me and Dupree Rating: 3.0
3. Snakes on a Plane Rating: 3.7
4. Superman Returns Rating: 4.1


## Nearest Neigbors movie recommendations

In [164]:
from movie_helper.movie import load_movies

movies = load_movies()
movies.head()

Unnamed: 0,movie_id,movie_title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [165]:
movie_genres = movies[
    [
        "unknown",
        "Action",
        "Adventure",
        "Animation",
        "Children's",
        "Comedy",
        "Crime",
        "Documentary",
        "Drama",
        "Fantasy",
        "Film-Noir",
        "Horror",
        "Musical",
        "Mystery",
        "Romance",
        "Sci-Fi",
        "Thriller",
        "War",
        "Western",
    ]
]
movie_genres.head()

Unnamed: 0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [166]:
nn = NearestNeighbors(n_neighbors=10, metric="cosine")
nn.fit(movie_genres)

In [167]:
movies[movies.movie_title.str.match("star", case=False)].head(5)

Unnamed: 0,movie_id,movie_title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
49,50,Star Wars (1977),01-Jan-1977,,http://us.imdb.com/M/title-exact?Star%20Wars%2...,0,1,1,0,0,...,0,0,0,0,0,1,1,0,1,0
61,62,Stargate (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Stargate%20(1...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
221,222,Star Trek: First Contact (1996),22-Nov-1996,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
226,227,Star Trek VI: The Undiscovered Country (1991),01-Jan-1991,,http://us.imdb.com/M/title-exact?Star%20Trek%2...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
227,228,Star Trek: The Wrath of Khan (1982),01-Jan-1982,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0


In [175]:
movie_index = 49
movies.iloc[movie_index].movie_title

'Star Wars (1977)'

In [176]:
distances, indices = nn.kneighbors(movie_genres.iloc[movie_index : movie_index + 1])
distances = distances.flatten()
indices = indices.flatten()

# Exclude the same id.
masks = np.where(indices != movie_index)
indices = indices[masks]
distances = distances[masks]

In [177]:
recs = movies.iloc[indices].copy()
recs["scores"] = 1 / (1 + distances)
recs

Unnamed: 0,movie_id,movie_title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,...,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,scores
180,181,Return of the Jedi (1983),14-Mar-1997,,http://us.imdb.com/M/title-exact?Return%20of%2...,0,1,1,0,0,...,0,0,0,0,1,1,0,1,0,1.0
171,172,"Empire Strikes Back, The (1980)",01-Jan-1980,,http://us.imdb.com/M/title-exact?Empire%20Stri...,0,1,1,0,0,...,0,0,0,0,1,1,0,1,0,0.919854
497,498,"African Queen, The (1951)",01-Jan-1951,,http://us.imdb.com/M/title-exact?African%20Que...,0,1,1,0,0,...,0,0,0,0,1,0,0,1,0,0.904508
270,271,Starship Troopers (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Starship+Troo...,0,1,1,0,0,...,0,0,0,0,0,1,0,1,0,0.904508
221,222,Star Trek: First Contact (1996),22-Nov-1996,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,1,0,0,0,0.816058
81,82,Jurassic Park (1993),01-Jan-1993,,http://us.imdb.com/M/title-exact?Jurassic%20Pa...,0,1,1,0,0,...,0,0,0,0,0,1,0,0,0,0.816058
372,373,Judge Dredd (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Judge%20Dredd...,0,1,1,0,0,...,0,0,0,0,0,1,0,0,0,0.816058
227,228,Star Trek: The Wrath of Khan (1982),01-Jan-1982,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,1,0,0,0,0.816058
120,121,Independence Day (ID4) (1996),03-Jul-1996,,http://us.imdb.com/M/title-exact?Independence%...,0,1,0,0,0,...,0,0,0,0,0,1,0,1,0,0.816058
