# Experiments on dataset `m-st5000`

In [1]:
import logging
import os
import sys

import pandas as pd

In [2]:
sys.path.append(os.path.join('..'))
from recommender.model import Recommender

In [3]:
logging.basicConfig(level=logging.INFO)

In [4]:
credits = pd.read_csv(os.path.join('data', 'm-st5000', 'tmdb_5000_credits.csv'), index_col='movie_id')
movies = pd.read_csv(os.path.join('data', 'm-st5000', 'tmdb_5000_movies.csv'), index_col='id')

credits = credits.drop(columns=['title'])
movies = movies.join(credits)

del credits

In [None]:
EXPT_DIR = os.path.join('models', 'st5000')

## Experiment 1

* Complete vocabulary
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'vall-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=None, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 2

* Vocabulary size: $10000$
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v10k-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=10000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 3

* Vocabulary size: 5000
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v5k-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=5000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 4

* Vocabulary size: $3000$
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v3k-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=3000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 5

* Vocabulary size: $1000$
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v1k-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=1000, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 6

* Vocabulary size: $100$
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v100-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=100, embedding_dim=None)
rec.save(model_path, inference=True)

del rec

In [None]:
rec = Recommender.load(model_path, inference=True)

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec

## Experiment 7

* Vocabulary size: $10$
* No dimensionality reduction

In [None]:
model_path = os.path.join(EXPT_DIR, 'v10-dall')

In [None]:
rec = Recommender()
rec.train(movies, vocab_size=10, embedding_dim=None)
rec.save(model_path, inference=False)

del rec

In [None]:
rec = Recommender.load(model_path, inference=False)

print(f'Vocab: {rec.vocab}')

rec.recommend_pprint(movies['title'], 425); print()
rec.recommend_pprint(movies['title'], 411); print()
rec.recommend_pprint(movies['title'], 24428)

del rec