In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Reading ratings file
# Ignore the timestamp column

# Reading movies file
df = pd.read_csv('upcoming_movies_2019.csv',usecols=['name', 'genre'])

In [2]:
# Break up the big genre string into a string array
df['genre'] = df['genre'].str.split('|')
# Convert genres to string value
df['genre'] = df['genre'].fillna("").astype('str')
df

Unnamed: 0,genre,name
0,"['Action, Comedy ']",Stuber
1,"['Animation, Adventure, Drama ']",The Lion King
2,"['Comedy, Drama ']",Once Upon a Time ... in Hollywood
3,"['Action, Adventure, Comedy ']",Fast & Furious Presents: Hobbs & Shaw
4,['Horror '],Scary Stories to Tell in the Dark
5,"['Adventure, Family ']",Dora and the Lost City of Gold
6,"['Action, Crime, Drama ']",The Kitchen
7,"['Comedy, Drama ']",The Art of Racing in the Rain
8,"['Drama, History, War ']",The Nightingale
9,"['Animation, Adventure, Comedy ']",The Angry Birds Movie 2


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['genre'])
tfidf_matrix.shape

(54, 56)

In [4]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim[:4, :4]

array([[1.        , 0.        , 0.22639589, 0.34206607],
       [0.        , 1.        , 0.10800532, 0.13911066],
       [0.22639589, 0.10800532, 1.        , 0.19073169],
       [0.34206607, 0.13911066, 0.19073169, 1.        ]])

In [5]:
# Build a 1-dimensional array with movie titles
titles = df['name']
indices = pd.Series(df.index, index=df['name'])

# Function that get movie recommendations based on the cosine similarity score of movie genres
def genre_recommendations(title):
#     newtitle = title + '\xa0'
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [11]:
genre_recommendations('The Angry Birds Movie 2').head(3)

12    Playmobil: The Movie
17              Abominable
36               Frozen II
Name: name, dtype: object