# Import Libs

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Data

In [2]:
movies = pd.read_csv('data/preprocessed.csv')
df = movies.drop(['name'], axis=1)
df = df[['movie_id', 'genres', 'rating', 'runtime', 'story_line']]
df.set_index('movie_id')

Unnamed: 0_level_0,genres,rating,runtime,story_line
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2077677,99,6.4,86,1581.0
106332,182,8.1,171,1826.2
116421,16,5.4,91,900.0
3170832,195,8.1,118,2081.4
204175,116,5.4,94,607.4
...,...,...,...,...
2083231,26,5.1,104,455.0
2158531,128,6.6,90,1666.4
79945,76,6.4,132,1173.2
1653649,28,6.5,100,1704.0


In [3]:
movies_ = pd.read_csv('data/movies.csv')
movies_.rename(columns={'movieId': 'movie_id', 'title': 'name'}, inplace=True)
movies_

Unnamed: 0,movie_id,name,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


# Item-to-item

## Loading model

In [4]:
import pickle

loaded_model = pickle.load(open('data/knnpickle_file', 'rb'))

## Input

In [5]:
# input
movie_id = df['movie_id'].sample(1, random_state=0).values[0]

print('Movie ID:', movie_id)

Movie ID: 3164256


## Suggesting

In [6]:
# load data-to-model
movie_index = df[df['movie_id'] == movie_id].index[0]
# check
test = df.set_index('movie_id').values[movie_index].reshape(1,-1)
result = loaded_model.kneighbors(test, return_distance=True)
print(result[1][0])

[ 18 233 613 656 525 288]


In [7]:
movie_info = movies

in_ = [movie_index]
out_ = [i for i in result[1][0][1:]]

print('INPUT:\n', movie_info.loc[in_,:]['name'])
print('\n\nOUTPUT:\n\n', movie_info.loc[out_,:]['name'])

INPUT:
 820    Rock the Kasbah
Name: name, dtype: object


OUTPUT:

 233     The Silence of the Lambs
613           This Is Spinal Tap
656               Bound by Honor
525                    Lawn Dogs
288    Karski i wladcy ludzkosci
Name: name, dtype: object


# Session-based

## Loading model

In [8]:
from gensim.models import Word2Vec

session_model = Word2Vec.load('models_vecsize/64w9_model.model')
print(session_model)

Word2Vec(vocab=3416, size=64, alpha=0.025)


## Input

In [9]:
import random

random.seed(0)
l = list(session_model.wv.vocab.keys())

cur_session = random.sample(l, 5)
print('Current Session:', cur_session)

Current Session: ['2098', '2831', '3668', '2146', '2034']


## Suggesting

In [10]:
sug = session_model.wv.most_similar(cur_session, topn=10)
sug_ = [ele[0] for ele in sug]
print(sug_)

['2242', '902', '3073', '1014', '3144', '2942', '1010', '2047', '2079', '2339']


In [11]:
in_ = [int(i) for i in (cur_session)]
out_ = [int(i) for i in (sug_)]

movie_info = movies_

print('INPUT:\n\n', movie_info.loc[in_,:]['name'])
print('\n\nOUTPUT:\n\n', movie_info.loc[out_,:]['name'])

INPUT:

 2098        Wrong Man, The (1956)
2831             Body Heat (1981)
3668    Fun and Fancy Free (1947)
2146              Rounders (1998)
2034        Dead Zone, The (1983)
Name: name, dtype: object


OUTPUT:

 2242    Tales from the Darkside: The Movie (1990)
902                      Wizard of Oz, The (1939)
3073                              Magnolia (1999)
1014                Fox and the Hound, The (1981)
3144                          Saphead, The (1920)
2942                            Nighthawks (1981)
1010                                 Dumbo (1941)
2047           Autumn Sonata (Höstsonaten) (1978)
2079        Attack of the Killer Tomatoes! (1978)
2339                       You've Got Mail (1998)
Name: name, dtype: object
