# Top $10,000$ movies

Generates train (in-sample) and test (out-of-sample) embeddings using TF-IDF
trained on top $10,000$ movies.

In [9]:
import logging
import os
import sys

import pandas as pd

In [10]:
sys.path.append(os.path.join('..'))
from recommender.model import Embedder, Recommender

In [11]:
logging.basicConfig(level=logging.INFO)

In [12]:
medias = pd.read_csv(os.path.join('data', 'movies.csv'), index_col='id')
medias = medias.sort_values(by='popularity', ascending=False)

In [13]:
TOP_N = 10000

train = medias.iloc[:TOP_N, :]
test = medias.iloc[TOP_N:, :]

In [14]:
MODEL_PATH = os.path.join('embeddings', 'movie', 'top-10k')

In [15]:
%%script false --no-raise-error

embedder = Embedder()
embedder.train(train, vocab_size=5000, embedding_dim=64)
embedder.save(MODEL_PATH)

In [16]:
%%script false --no-raise-error

embedder = Embedder.load(MODEL_PATH)
embedder.embed_batches(medias, 10000, os.path.join(MODEL_PATH))

In [18]:
embeddings, ids = Embedder.load_embeddings(MODEL_PATH)
assert embeddings.shape[0] == medias.shape[0]
rec = Recommender(embeddings, ids)

## In-sample performance

In [56]:
rec.recommend_pprint(10138, medias['title'])

Top 10 similar movies to "Iron Man 2":
* Captain America: Civil War
* Raiders of the Sun
* Kamen Rider Drive Saga: Kamen Rider Chaser
* Steel
* Kamen Rider Blade: Missing Ace
* Kamen Rider Gaim the Movie: The Great Soccer Match! The Golden Fruit Cup!
* Kamen Rider V3 vs. Destron Mutants
* Kamen Rider × Super Sentai × Space Sheriff: Super Hero Wars Z
* Avengers: Infinity War
* Iron Man 3


In [54]:
rec.recommend_pprint(111, medias['title'])

Top 10 similar movies to "Scarface":
* The Weed of Crime
* The Janitor
* To Award (Posthumously)
* Drug Hunter
* Drug War
* No Way Back
* Elite Squad: The Enemy Within
* Rinchi
* Elite Squad
* Fearless Match


## Out-of-sample performance

In [70]:
rec.recommend_pprint(48246, medias['title'])

Top 10 similar movies to "Alvin and the Chipmunks Meet the Wolfman":
* Alvin and the Chipmunks: The Squeakquel
* Mad Monster Party?
* Mickey Mouse Clubhouse: Road Rally
* VeggieTales: An Easter Carol
* Walt Disney Treasures: More Silly Symphonies
* Music Land
* Rock-A-Doodle
* My Little Pony: Equestria Girls
* Mexico: Pátzcuaro, Veracruz and Acapulco
* The Emperor's Newest Clothes


In [71]:
48246 in train.index

False

In [73]:
rec.recommend_pprint(42668, medias['title'])

Top 10 similar movies to "Alexander the Great":
* Rules of Engagement
* 1911
* Red Cliff
* The Messenger: The Story of Joan of Arc
* The Dragon Kid
* Red Cliff II
* Kingdom of Heaven
* Night Ambush
* The Red Baron
* By the Will of Chingis Khan


In [74]:
42668 in train.index

False

In [77]:
rec.recommend_pprint(760336, medias['title'])

Top 10 similar movies to "Munich: The Edge of War":
* Taras. Return
* Das Zeugenhaus
* Urho – Between the Hammer and the Sickle
* The Sixth of July
* Final Portrait
* The Secret Diary of Symon Petliura
* Julius Caesar
* The Leader's Way. The Sky Of My Childhood
* Spy of Napoleon
* Once Upon a Time in Croatia
