# Experiments on dataset `m-st5000`

In [1]:
import logging
import os
import sys

import pandas as pd

In [2]:
sys.path.append(os.path.join('..'))
from recommender.model import Recommender

In [3]:
logging.basicConfig(level=logging.INFO)

In [4]:
credits = pd.read_csv(os.path.join('data', 'm-st5000', 'tmdb_5000_credits.csv'), index_col='movie_id')
movies = pd.read_csv(os.path.join('data', 'm-st5000', 'tmdb_5000_movies.csv'), index_col='id')

credits = credits.drop(columns=['title'])
movies = movies.join(credits)

del credits

In [5]:
EXPT_DIR = os.path.join('models', 'st5000')

## Experiment 1

* Complete vocabulary
* No dimensionality reduction

In [6]:
model_path = os.path.join(EXPT_DIR, 'vall-dall')

In [7]:
rec = Recommender()
rec.train(movies, vocab_size=None, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 42.654 s, for a vocabulary size of 18551.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 14.5051 s.


In [8]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Ice Age: The Meltdown
* Ice Age: Dawn of the Dinosaurs
* Ice Age: Continental Drift
* Kung Fu Panda
* Antz
* TMNT
* Bee Movie
* Up
* Cars
* Curious George

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* The Chronicles of Narnia: Prince Caspian
* The Wizard of Oz
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* Harry Potter and the Chamber of Secrets
* Harry Potter and the Philosopher's Stone
* The Indian in the Cupboard
* The Spiderwick Chronicles
* Oz: The Great and Powerful
* Harry Potter and the Half-Blood Prince
* Pan

Top 10 similar movies to "The Avengers":
* Avengers: Age of Ultron
* Fantastic Four
* Captain America: Civil War
* Iron Man 2
* Captain America: The First Avenger
* X-Men
* Iron Man
* Iron Man 3
* Captain America: The Winter Soldier
* Ant-Man


## Experiment 2

* Vocabulary size: $10000$
* No dimensionality reduction

In [9]:
model_path = os.path.join(EXPT_DIR, 'v10k-dall')

In [10]:
rec = Recommender()
rec.train(movies, vocab_size=10000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 35.5649 s, for a vocabulary size of 10000.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 8.0549 s.


In [11]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Ice Age: Dawn of the Dinosaurs
* Ice Age: The Meltdown
* Ice Age: Continental Drift
* Kung Fu Panda
* Bee Movie
* Antz
* Up
* TMNT
* Cars
* The Nut Job

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* The Chronicles of Narnia: Prince Caspian
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* The Wizard of Oz
* Harry Potter and the Chamber of Secrets
* Harry Potter and the Philosopher's Stone
* The Spiderwick Chronicles
* The Indian in the Cupboard
* Oz: The Great and Powerful
* Harry Potter and the Half-Blood Prince
* Pan

Top 10 similar movies to "The Avengers":
* Avengers: Age of Ultron
* Fantastic Four
* Captain America: Civil War
* Iron Man
* Iron Man 2
* Captain America: The First Avenger
* X-Men
* Iron Man 3
* Guardians of the Galaxy
* The Incredible Hulk


## Experiment 3

* Vocabulary size: 5000
* No dimensionality reduction

In [12]:
model_path = os.path.join(EXPT_DIR, 'v5k-dall')

In [13]:
rec = Recommender()
rec.train(movies, vocab_size=5000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 37.3858 s, for a vocabulary size of 5000.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 3.7418 s.


In [14]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Ice Age: Continental Drift
* Ice Age: The Meltdown
* Ice Age: Dawn of the Dinosaurs
* Kung Fu Panda
* Flushed Away
* TMNT
* Up
* Cars
* Antz
* Bee Movie

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* The Chronicles of Narnia: Prince Caspian
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* The Wizard of Oz
* Harry Potter and the Chamber of Secrets
* Harry Potter and the Philosopher's Stone
* The Spiderwick Chronicles
* Percy Jackson: Sea of Monsters
* Harry Potter and the Half-Blood Prince
* Oz: The Great and Powerful
* Pan

Top 10 similar movies to "The Avengers":
* Avengers: Age of Ultron
* Ant-Man
* Fantastic Four
* Captain America: Civil War
* Captain America: The First Avenger
* Iron Man 2
* Iron Man 3
* Iron Man
* Guardians of the Galaxy
* X-Men


## Experiment 4

* Vocabulary size: $3000$
* No dimensionality reduction

In [15]:
model_path = os.path.join(EXPT_DIR, 'v3k-dall')

In [16]:
rec = Recommender()
rec.train(movies, vocab_size=3000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 38.5387 s, for a vocabulary size of 3000.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 4.0739 s.


In [17]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Ice Age: Continental Drift
* Ice Age: The Meltdown
* Ice Age: Dawn of the Dinosaurs
* Kung Fu Panda
* A Bug's Life
* Antz
* Cars
* Bee Movie
* The Curse of the Were-Rabbit
* Up

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* The Wizard of Oz
* The Chronicles of Narnia: Prince Caspian
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* Harry Potter and the Chamber of Secrets
* Harry Potter and the Philosopher's Stone
* Harry Potter and the Half-Blood Prince
* Oz: The Great and Powerful
* The Spiderwick Chronicles
* Percy Jackson: Sea of Monsters
* Pan

Top 10 similar movies to "The Avengers":
* Avengers: Age of Ultron
* Fantastic Four
* Ant-Man
* Captain America: Civil War
* Iron Man 3
* Iron Man 2
* The Incredible Hulk
* Captain America: The First Avenger
* Iron Man
* Guardians of the Galaxy


## Experiment 5

* Vocabulary size: $1000$
* No dimensionality reduction

In [18]:
model_path = os.path.join(EXPT_DIR, 'v1k-dall')

In [19]:
rec = Recommender()
rec.train(movies, vocab_size=1000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 38.6074 s, for a vocabulary size of 1000.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 1.1334 s.


In [20]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Ice Age: The Meltdown
* Ice Age: Continental Drift
* Ice Age: Dawn of the Dinosaurs
* Kung Fu Panda
* A Bug's Life
* Bee Movie
* Rio
* Flushed Away
* Cars
* Curious George

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* The Wizard of Oz
* The Chronicles of Narnia: Prince Caspian
* Oz: The Great and Powerful
* Harry Potter and the Chamber of Secrets
* Percy Jackson: Sea of Monsters
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* Harry Potter and the Philosopher's Stone
* The Spiderwick Chronicles
* Pan
* Harry Potter and the Half-Blood Prince

Top 10 similar movies to "The Avengers":
* Fantastic Four
* Avengers: Age of Ultron
* Iron Man
* Iron Man 3
* Iron Man 2
* Ant-Man
* The Incredible Hulk
* Captain America: Civil War
* Captain America: The First Avenger
* X-Men


## Experiment 6

* Vocabulary size: $5$
* No dimensionality reduction

In [21]:
model_path = os.path.join(EXPT_DIR, 'v5-dall')

In [25]:
rec = Recommender()
rec.train(movies, vocab_size=5, embedding_dim=None)
rec.save(model_path, inference=False)

del rec

INFO:root:Preprocessing 'genres'
INFO:root:Encoding 'genres'
INFO:root:Preprocessing 'overview'
INFO:root:Injecting 'keywords' into 'overview'
INFO:root:Encoding 'overview'...
INFO:root:Encoding 'overview' took 35.2017 s, for a vocabulary size of 5.
INFO:root:Computing cosine similarity...
INFO:root:Computing cosine similarity took 0.447 s.


['id' 'life' 'love' 'new' 'woman']


In [23]:
rec = Recommender.load(model_path, inference=False)

print(f'Vocab: {rec.vocab}')

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

Top 10 similar movies to "Ice Age":
* Penguins of Madagascar
* Minions
* Rio 2
* A Bug's Life
* Cars 2
* Ice Age
* The Lion of Judah
* Stuart Little 2
* Wreck-It Ralph
* Rugrats in Paris: The Movie

Top 10 similar movies to "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe":
* Alice in Wonderland
* The Chronicles of Narnia: The Lion, the Witch and the Wardrobe
* City of Ember
* The Chronicles of Narnia: The Voyage of the Dawn Treader
* Harry Potter and the Goblet of Fire
* The Spiderwick Chronicles
* Harry Potter and the Half-Blood Prince
* Harry Potter and the Prisoner of Azkaban
* Oz: The Great and Powerful
* Percy Jackson: Sea of Monsters

Top 10 similar movies to "The Avengers":
* Star Wars: Episode III - Revenge of the Sith
* X-Men: First Class
* The Incredible Hulk
* Captain America: The First Avenger
* Ant-Man
* Captain America: The Winter Soldier
* Return of the Jedi
* The Empire Strikes Back
* Star Wars
* TRON: Legacy
