In [1]:
import pandas as pd
import numpy as np

## Reading in Data

In [2]:
action = pd.read_csv('../data/genre_groups/action.csv')
adventure = pd.read_csv('../data/genre_groups/adventure.csv')
animation = pd.read_csv('../data/genre_groups/animation.csv')
bio = pd.read_csv('../data/genre_groups/biography.csv')
comedy = pd.read_csv('../data/genre_groups/comedy.csv')
crime = pd.read_csv('../data/genre_groups/crime.csv')
drama = pd.read_csv('../data/genre_groups/drama.csv')
family = pd.read_csv('../data/genre_groups/family.csv')
fantasy = pd.read_csv('../data/genre_groups/fantasy.csv')
horror = pd.read_csv('../data/genre_groups/horror.csv')
music = pd.read_csv('../data/genre_groups/music.csv')
musical = pd.read_csv('../data/genre_groups/musical.csv')
mystery = pd.read_csv('../data/genre_groups/mystery.csv')
noir = pd.read_csv('../data/genre_groups/noir.csv')
romance = pd.read_csv('../data/genre_groups/romance.csv')
scifi = pd.read_csv('../data/genre_groups/scifi.csv')
thriller = pd.read_csv('../data/genre_groups/thriller.csv')
western = pd.read_csv('../data/genre_groups/western.csv')

## Cleaning User Plots

In [3]:
import spacy

from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter

In [4]:
print(spacy.__version__)

2.3.2


In [5]:
nlp = spacy.load("en_core_web_lg")

In [57]:
def extract(doc):
    key_words = []
    stopwords = list(STOP_WORDS)
    pos_tag = ['NOUN']

    for token in doc:
        if token.text in stopwords or token.text in punctuation:
            continue
        if token.pos_ in pos_tag:
            key_words.append(token.text)
            
    freq_word = Counter(key_words)
    freq_tups = freq_word.most_common(n=5)
    
    new_keywords = [word[0] for word in freq_tups]
    
    freq_string = ''
    for word in new_keywords:
        freq_string += word + ' ' 
    
    return freq_string

In [58]:
plots = [plot for plot in action['user_plots']]
titles = [title for title in action['title']]

In [59]:
act = pd.DataFrame(action['title'])

In [60]:
act.set_index('title', inplace=True)

In [61]:
major_keywords = []
for plot in plots:
    plot_keywords = extract(nlp(plot))
    major_keywords.append(plot_keywords)

In [62]:
plot_dict = {}

for i in range(len(titles)):
    title = titles[i]
    key_words = major_keywords[i]
    plot_dict[title] =  key_words

In [12]:
plot_dict

{'The Dark Knight': 'mob chaos crime people mastermind line hero vigilante city injustice ',
 'The Lord of the Rings: The Return of the King': 'Ring forces army earth men battle quest city capital fate ',
 'Inception': 'dreams mind team dream inception secrets idea world job information ',
 'The Lord of the Rings: The Fellowship of the Ring': 'Ring earth power fate quest journey centuries evil hands task ',
 'The Lord of the Rings: The Two Towers': 'allies Ring power creature people assault destruction earth help fellowship ',
 'The Matrix': 'computer hacker truth life night stranger underworld deception cyber intelligence ',
 'The Empire Strikes Back': 'ice friends world walkers planet galaxy attack training forces duel ',
 'Star Wars': 'galaxy rebellion planet farm plans forces pilot droids battle station ',
 'Gladiator': 'emperor gladiator death son power general order family slavery execution ',
 'Léon: The Professional': 'family father girl revenge brother drug apartment life hitm

In [63]:
act['keywords'] = major_keywords

In [64]:
dirs = [d for d in action['join_director']]

In [65]:
cast = [cast for cast in action['join_cast']]

In [66]:
len(dirs), len(major_keywords), len(cast)

(1665, 1665, 1665)

In [67]:
combo = []
for i in range(len(dirs)):
    combo.append(major_keywords[i] + ' ' + cast[i] + ' ' + dirs[i])

In [68]:
len(combo)

1665

In [69]:
act['combo'] = combo

In [70]:
act[['combo']]

Unnamed: 0_level_0,combo
title,Unnamed: 1_level_1
The Dark Knight,mob chaos crime people mastermind ChristianBa...
The Lord of the Rings: The Return of the King,Ring forces army earth men ElijahWood IanMcKe...
Inception,dreams mind team dream inception LeonardoDiCa...
The Lord of the Rings: The Fellowship of the Ring,Ring earth power fate quest ElijahWood IanMcK...
The Lord of the Rings: The Two Towers,allies Ring power creature people ElijahWood ...
...,...
3 Ninjas: High Noon at Mega Mountain,park ninjas brothers hero kid VictorWong Math...
Dragonball Evolution,warrior quest time set orbs JustinChatwin Cho...
Battlefield Earth,race man humanity year Humanity JohnTravolta ...
Alone in the Dark,detective mystery friend face demons Christia...


In [81]:
action.head(100)

Unnamed: 0,index,imdb_id,title,overviews,join_director,join_cast,vote_average,vote_count,genre,user_plots,...,Fantasy,Mystery,Thriller,Western,Family,Sci-Fi,Romance,Film-Noir,Musical,Music
0,2,tt0468569,The Dark Knight,Batman raises the stakes in his war on crime. ...,ChristopherNolan,ChristianBale HeathLedger MichaelCaine,8.5,24993,Action Crime Drama,['When the menace known as the Joker wreaks ha...,...,0,0,0,0,0,0,0,0,0,0
1,5,tt0167260,The Lord of the Rings: The Return of the King,Aragorn is revealed as the heir to the ancient...,PeterJackson,ElijahWood IanMcKellen LivTyler,8.5,17557,Action Adventure Drama,"[""Gandalf and Aragorn lead the World of Men ag...",...,0,0,0,0,0,0,0,0,0,0
2,8,tt1375666,Inception,"Cobb, a skilled thief who commits corporate es...",ChristopherNolan,LeonardoDiCaprio KenWatanabe JosephGordon-Levitt,8.3,28959,Action Adventure Sci-Fi,['A thief who steals corporate secrets through...,...,0,0,0,0,0,1,0,0,0,0
3,10,tt0120737,The Lord of the Rings: The Fellowship of the Ring,"Young hobbit Frodo Baggins, after inheriting a...",PeterJackson,ElijahWood IanMcKellen LivTyler,8.4,19028,Action Adventure Drama,['A meek Hobbit from the Shire and eight compa...,...,0,0,0,0,0,0,0,0,0,0
4,13,tt0167261,The Lord of the Rings: The Two Towers,Frodo and Sam are trekking to Mordor to destro...,PeterJackson,ElijahWood IanMcKellen LivTyler,8.3,16432,Action Adventure Drama,"[""While Frodo and Sam edge closer to Mordor wi...",...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,543,tt5463162,Deadpool 2,Wisecracking mercenary Deadpool battles the ev...,DavidLeitch,RyanReynolds JoshBrolin MorenaBaccarin,7.5,13173,Action Adventure Comedy,['Foul-mouthed mutant mercenary Wade Wilson (a...,...,0,0,0,0,0,0,0,0,0,0
96,546,tt4912910,Mission: Impossible - Fallout,"When an IMF mission ends badly, the world is f...",ChristopherMcQuarrie,TomCruise HenryCavill VingRhames,7.4,5878,Action Adventure Thriller,"['Ethan Hunt and his IMF team, along with some...",...,0,0,1,0,0,0,0,0,0,0
97,550,tt2802144,Kingsman: The Secret Service,The story of a super-secret spy organization t...,MatthewVaughn,TaronEgerton ColinFirth MarkStrong,7.6,13050,Action Adventure Comedy,"[""A spy organisation recruits a promising stre...",...,0,0,0,0,0,0,0,0,0,0
98,555,tt1843866,Captain America: The Winter Soldier,After the cataclysmic events in New York with ...,AnthonyRusso JoeRusso,ChrisEvans ScarlettJohansson SebastianStan,7.7,14474,Action Adventure Sci-Fi,['As Steve Rogers struggles to embrace his rol...,...,0,0,0,0,0,1,0,0,0,0


In [None]:
#%%time
#for title in plot_dict:
#    key_words = plot_dict[title]
#    act[title] = [nlp(key_words).similarity(nlp(words)) for words in major_keywords]

## Vectorizing

In [71]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels

In [72]:
test = act['combo']

In [73]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1,2),
                     min_df=0, stop_words='english')
matrix = tf.fit_transform(test)

In [74]:
matrix.shape

(1665, 18315)

In [75]:
cos = sklearn.metrics.pairwise.linear_kernel(matrix, matrix)
cos = [row for row in cos]
cos_df = pd.DataFrame(cos)
cos_df['title'] = action['title']

title = [x for x in cos_df['title']]
cos_df.set_index('title', inplace=True)
cos_df.columns = [x for x in cos_df.index]

In [76]:
def recommend_me(title):
    l = cos_df[[title]]
    return l.sort_values(by=title, ascending=False)[1:11]

In [85]:
recommend_me('Rush')

Unnamed: 0_level_0,Rush
title,Unnamed: 1_level_1
In the Heart of the Sea,0.111663
12 Strong,0.068449
Fast & Furious 6,0.060488
The Death and Life of Bobby Z,0.057104
7 Days in Entebbe,0.056533
TRON: Legacy,0.056228
Cowboys & Aliens,0.055277
X-Men Origins: Wolverine,0.055054
Rollerball,0.055038
"It's a Mad, Mad, Mad, Mad World",0.054115


In [54]:
action.head(20)

Unnamed: 0,index,imdb_id,title,overviews,join_director,join_cast,vote_average,vote_count,genre,user_plots,...,Fantasy,Mystery,Thriller,Western,Family,Sci-Fi,Romance,Film-Noir,Musical,Music
0,2,tt0468569,The Dark Knight,Batman raises the stakes in his war on crime. ...,ChristopherNolan,ChristianBale HeathLedger MichaelCaine,8.5,24993,Action Crime Drama,['When the menace known as the Joker wreaks ha...,...,0,0,0,0,0,0,0,0,0,0
1,5,tt0167260,The Lord of the Rings: The Return of the King,Aragorn is revealed as the heir to the ancient...,PeterJackson,ElijahWood IanMcKellen LivTyler,8.5,17557,Action Adventure Drama,"[""Gandalf and Aragorn lead the World of Men ag...",...,0,0,0,0,0,0,0,0,0,0
2,8,tt1375666,Inception,"Cobb, a skilled thief who commits corporate es...",ChristopherNolan,LeonardoDiCaprio KenWatanabe JosephGordon-Levitt,8.3,28959,Action Adventure Sci-Fi,['A thief who steals corporate secrets through...,...,0,0,0,0,0,1,0,0,0,0
3,10,tt0120737,The Lord of the Rings: The Fellowship of the Ring,"Young hobbit Frodo Baggins, after inheriting a...",PeterJackson,ElijahWood IanMcKellen LivTyler,8.4,19028,Action Adventure Drama,['A meek Hobbit from the Shire and eight compa...,...,0,0,0,0,0,0,0,0,0,0
4,13,tt0167261,The Lord of the Rings: The Two Towers,Frodo and Sam are trekking to Mordor to destro...,PeterJackson,ElijahWood IanMcKellen LivTyler,8.3,16432,Action Adventure Drama,"[""While Frodo and Sam edge closer to Mordor wi...",...,0,0,0,0,0,0,0,0,0,0
5,14,tt0133093,The Matrix,"Set in the 22nd century, The Matrix tells the ...",LillyWachowski LanaWachowski,KeanuReeves LaurenceFishburne Carrie-AnneMoss,8.1,19103,Action Sci-Fi,['When a beautiful stranger leads computer hac...,...,0,0,0,0,0,1,0,0,0,0
6,16,tt0080684,The Empire Strikes Back,"The epic saga continues as Luke Skywalker, in ...",IrvinKershner,MarkHamill HarrisonFord CarrieFisher,8.4,12915,Action Adventure Fantasy,['After the Rebels are brutally overpowered by...,...,1,0,0,0,0,0,0,0,0,0
7,25,tt0076759,Star Wars,Princess Leia is captured and held hostage by ...,GeorgeLucas,MarkHamill HarrisonFord CarrieFisher,8.2,15469,Action Adventure Fantasy,"[""Luke Skywalker joins forces with a Jedi Knig...",...,1,0,0,0,0,0,0,0,0,0
8,32,tt0172495,Gladiator,"In the year 180, the death of emperor Marcus A...",RidleyScott,RussellCrowe JoaquinPhoenix ConnieNielsen,8.2,13550,Action Adventure Drama,['A former Roman General sets out to exact ven...,...,0,0,0,0,0,0,0,0,0,0
9,35,tt0110413,Léon: The Professional,"Léon, the top hit man in New York, has earned ...",LucBesson,JeanReno NataliePortman GaryOldman,8.3,10824,Action Crime Drama,"[""Mathilda, a 12-year-old girl, is reluctantly...",...,0,0,0,0,0,0,0,0,0,0
