# Recomendator demo

In [None]:
import pandas as pd
import os
import sys
import random
import numpy as np

In [2]:
PATH_TO_PROJECT = os.path.abspath("../")

sys.path.append(PATH_TO_PROJECT)
from recomendator import Ranginator, Collaborator, metrics

Using TensorFlow backend.


In [3]:
PATH_TO_RATINGS = os.path.join(PATH_TO_PROJECT, 'datasets/ml-latest-small/ratings.csv')
PATH_TO_MOVIES = os.path.join(PATH_TO_PROJECT, 'datasets/ml-latest-small/movies.csv')
PATH_TO_TAGS    = os.path.join(PATH_TO_PROJECT, 'datasets/ml-latest-small/tags.csv')

ratings = pd.read_csv(PATH_TO_RATINGS)
movies = pd.read_csv(PATH_TO_MOVIES)
tags    = pd.read_csv(PATH_TO_TAGS)

## Lets see the dataset
You may download this dataset from [here](http://files.grouplens.org/datasets/movielens/).

In [4]:
print("\n\n\t\t\tmovies\n\n")
print(movies[0:10])
print("\n\n\t\t\tratings\n\n")
print(ratings[0:10])



			movies


   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   
5        6                         Heat (1995)   
6        7                      Sabrina (1995)   
7        8                 Tom and Huck (1995)   
8        9                 Sudden Death (1995)   
9       10                    GoldenEye (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
5                        Action|Crime|Thriller  
6                               Comedy|Romance  
7                           Adventure|Child

## Example Ranginator 
Counts the difference between two objects on a given metric. 
<br>
Default metric is a sim_distance. 
<br>
All avalible metrics you may find [here](https://github.com/dimabendera/recomendator/blob/master/recomendator/metrics/metrics.py)
 <br>
[More about Ranginator...](https://github.com/dimabendera/recomendator/blob/master/recomendator/recomendators/ranginator.py)

In [5]:
ranginator = Ranginator()

In [6]:
ranginator.fit(ratings["userId"], ratings["movieId"], ratings["rating"])

In [7]:
ranginator.predict([1], N=15)

[[(1.0, 291),
  (1.0, 253),
  (1.0, 85),
  (1.0, 77),
  (0.8, 550),
  (0.8, 511),
  (0.8, 472),
  (0.8, 278),
  (0.6666666666666666, 366),
  (0.6666666666666666, 12),
  (0.5, 523),
  (0.5, 519),
  (0.5, 481),
  (0.5, 388),
  (0.5, 360)]]

Explain: we see similar users for user whith id 1.

## Example Collaborator 
Find simple items. It is like together with this look. Use Ranginator. 
 <br>
[More about Collaborator...](https://github.com/dimabendera/recomendator/blob/master/recomendator/recomendators/collaborator.py)

In [8]:
collaborator = Collaborator(metric=metrics.sim_pearson)

In [9]:
collaborator.fit(ratings["userId"], ratings["movieId"], ratings["rating"])

In [10]:
similarItems = collaborator.predict([2])
similarItems = [item[1] for item in similarItems[0] if item[0] > 0.9]
similarItems

[[(1.0, 416), (1.0, 370), (1.0, 363), (1.0, 341), (1.0, 313), (1.0, 240), (1.0, 148), (1.0, 143), (1.0, 93), (1.0, 60)]]


[122918,
 98491,
 69069,
 59900,
 46976,
 30816,
 26810,
 8827,
 7444,
 7169,
 6619,
 5951,
 5540,
 5254,
 5066,
 5026,
 5010,
 4979,
 4941,
 4915,
 4180,
 4006,
 3994,
 3863,
 3771,
 3740,
 3727,
 3704,
 3703,
 3701,
 3697,
 3550,
 3527,
 3262,
 3153,
 3032,
 2867,
 2710,
 2542,
 2530,
 2529,
 2467,
 2431,
 2287,
 2268,
 2144,
 2140,
 2138,
 2117,
 2028,
 2020,
 1997,
 1923,
 1885,
 1625,
 1587,
 1449,
 1394,
 1275,
 1274,
 1258,
 1235,
 1222,
 1215,
 1213,
 1200,
 1129,
 1059,
 968,
 880,
 866,
 858,
 610,
 553,
 542,
 524,
 516,
 432,
 383,
 370,
 344,
 338,
 328,
 317,
 292,
 276,
 267,
 255,
 248,
 216,
 174,
 110,
 104,
 101,
 34,
 19,
 2,
 81847,
 750,
 223,
 593]

Lets explain. <br>
User see films:

In [11]:
for movieId in collaborator.storage.storage[2].keys():
    a = movies[movies['movieId'] == movieId]["title"].to_string()
    print(a)

277    Shawshank Redemption, The (1994)
291    Tommy Boy (1995)
1284    Good Will Hunting (1997)
2674    Gladiator (2000)
4615    Kill Bill: Vol. 1 (2003)
5305    Collateral (2004)
6253    Talladega Nights: The Ballad of Ricky Bobby (2...
6315    Departed, The (2006)
6710    Dark Knight, The (2008)
6801    Step Brothers (2008)
7010    Inglourious Basterds (2009)
7154    Zombieland (2009)
7258    Shutter Island (2010)
7323    Exit Through the Gift Shop (2010)
7372    Inception (2010)
7415    Town, The (2010)
7436    Inside Job (2010)
7590    Louis C.K.: Hilarious (2010)
7697    Warrior (2011)
7768    Dark Knight Rises, The (2012)
7776    Girl with the Dragon Tattoo, The (2011)
8063    Django Unchained (2012)
8305    Wolf of Wall Street, The (2013)
8376    Interstellar (2014)
8466    Whiplash (2014)
8509    The Drop (2014)
8550    Ex Machina (2015)
8681    Mad Max: Fury Road (2015)
8828    The Jinx: The Life and Deaths of Robert Durst ...


We recomendate him:

In [12]:
for movieId in similarItems:
    a = movies[movies['movieId'] == movieId]["title"].to_string()
    print(a)

8695    Guardians of the Galaxy 2 (2017)
8043    Paperman (2012)
7041    Fired Up (2009)
6763    You Don't Mess with the Zohan (2008)
6256    Stranger than Fiction (2006)
5739    Phantom of the Opera, The (2004)
5580    Bad Boy Bubby (1993)
5316    Bill Cosby, Himself (1983)
4939    13 Going on 30 (2004)
4812    Chasing Liberty (2004)
4481    Uptown Girls (2003)
4136    Morvern Callar (2002)
3939    Clash of the Titans (1981)
3764    Blade II (2002)
3678    Walk to Remember, A (2002)
3656    Brotherhood of the Wolf (Pacte des loups, Le) ...
3646    Black Hawk Down (2001)
3628    Royal Tenenbaums, The (2001)
3599    Flash Gordon (1980)
3585    Beastmaster, The (1982)
3110    Reform School Girls (1986)
2991    Transformers: The Movie (1986)
2982    Unbreakable (2000)
2888    Cell, The (2000)
2823    The Golden Voyage of Sinbad (1973)
2798    Big Trouble in Little China (1986)
2786    Near Dark (1987)
2766    Mad Max Beyond Thunderdome (1985)
2765    Road Warrior, The (Mad Max 2) (1981)
2

# Item by item

Convert ower data:

In [18]:
c = collections.Counter()
for userId in set(ratings["userId"]):
    moviesl = ratings[ratings['userId'] == userId][ratings["rating"] > 4.0]["movieId"]
    for movieId in moviesl: 
        for othetMovieId in moviesl:
            if movieId != othetMovieId:
                c[(movieId, othetMovieId)] += 1
                
X_movie = []
y_movie = []
coefs = []
for item in c:
    X_movie.append(item[0])
    y_movie.append(item[1])
    coefs.append(c[item])
del c

  This is separate from the ipykernel package so we can avoid doing imports until


In [19]:
ranginator = Ranginator(metric=metrics.sim_pearson)
ranginator.fit(X_movie, y_movie, coefs)

In [20]:
similarItems = ranginator.predict([5816], N=30) # Harry Potter
similarItems

[[(0.9240925040551332, 4896),
  (0.8479107373148811, 647),
  (0.844836274621972, 40815),
  (0.8391637849228034, 54001),
  (0.8380081151949728, 8368),
  (0.8063019720507922, 830),
  (0.806028379420027, 88125),
  (0.7966763195252236, 81834),
  (0.792494285683096, 69844),
  (0.74448116122135, 2025),
  (0.7332578717910011, 41566),
  (0.6917170202292143, 4306),
  (0.6915389849645263, 4886),
  (0.6651148182227588, 5349),
  (0.6509140962411836, 6539),
  (0.6497430761157058, 5378),
  (0.6493539193266219, 2394),
  (0.6488363334684162, 33493),
  (0.64489715161454, 34405),
  (0.643744681396224, 31685),
  (0.6362633456578646, 3793),
  (0.6353643959308812, 45722),
  (0.6343545155644557, 6333),
  (0.6334372939672133, 5952),
  (0.63101182908416, 8636),
  (0.6306541393413291, 7153),
  (0.6232261089883168, 4993),
  (0.6200606642809044, 364),
  (0.619907255131367, 5218),
  (0.6089265419467582, 2012)]]

In [21]:
similarItemsSorted = [item[1] for item in similarItems[0]]
similarItemsSorted

[4896,
 647,
 40815,
 54001,
 8368,
 830,
 88125,
 81834,
 69844,
 2025,
 41566,
 4306,
 4886,
 5349,
 6539,
 5378,
 2394,
 33493,
 34405,
 31685,
 3793,
 45722,
 6333,
 5952,
 8636,
 7153,
 4993,
 364,
 5218,
 2012]

Explain:

In [22]:
for movieId in similarItemsSorted:
    a = movies[movies['movieId'] == movieId]["title"].to_string()
    print(a)

3574    Harry Potter and the Sorcerer's Stone (a.k.a. ...
545    Courage Under Fire (1996)
6062    Harry Potter and the Goblet of Fire (2005)
6522    Harry Potter and the Order of the Phoenix (2007)
5166    Harry Potter and the Prisoner of Azkaban (2004)
644    First Wives Club, The (1996)
7644    Harry Potter and the Deathly Hallows: Part 2 (...
7465    Harry Potter and the Deathly Hallows: Part 1 (...
7078    Harry Potter and the Half-Blood Prince (2009)
1500    Lolita (1997)
6075    Chronicles of Narnia: The Lion, the Witch and ...
3194    Shrek (2001)
3568    Monsters, Inc. (2001)
3819    Spider-Man (2002)
4427    Pirates of the Caribbean: The Curse of the Bla...
3832    Star Wars: Episode II - Attack of the Clones (...
1795    Prince of Egypt, The (1998)
5896    Star Wars: Episode III - Revenge of the Sith (...
5954    Serenity (2005)
5794    Hitch (2005)
2836    X-Men (2000)
6221    Pirates of the Caribbean: Dead Man's Chest (2006)
4334    X2: X-Men United (2003)
4137    Lord of 

# Textonator

In [None]:
c = collections.Counter()
for userId in set(ratings["userId"]):
    moviesl = ratings[ratings['userId'] == userId][ratings["rating"] > 4.0]["movieId"]
    for movieId in moviesl: 
        for othetMovieId in moviesl:
            if movieId != othetMovieId:
                c[(movieId, othetMovieId)] += 1

In [None]:
movieInfoDict = {}
for movie in movies.values:
    movieInfoDict[movie[0]] = "{} {}".format(movie[1], movie[2].replace("|", " "))

    movieTags = set(tags[tags['movieId'] == movie[0]]["tag"].values)
    for movieTag in movieTags:
        movieInfoDict[movie[0]] += " {}".format(str(movieTag))


In [None]:
X1_movie = []
X2_movie = []
y = []
for item in c:
    X1_movie.append(movieInfoDict[item[0]])
    X2_movie.append(movieInfoDict[item[1]])
    y.append(1)
    if len(y) > 100:
        break

In [None]:
c_negative = collections.Counter()
while len(c) > len(c_negative):
    randomMovie1 = random.choice(movies.values)
    randomMovie2 = random.choice(movies.values)
    if randomMovie1[0] != randomMovie2[0] and (randomMovie1[0], randomMovie2[0]) not in c and (randomMovie2[0], randomMovie1[0]) not in c:
        c_negative[randomMovie1[0], randomMovie2[0]] += 1
        c_negative[randomMovie2[0], randomMovie1[0]] += 1
        if len(c_negative) > 100:
            break

In [None]:
for item in c_negative:
    X1_movie.append(movieInfoDict[item[0]])
    X2_movie.append(movieInfoDict[item[1]])
    y.append(0)

In [None]:
textonator = Textonator(epochs=100)

In [None]:
textonator.fit(X1_movie, X2_movie, y)

In [None]:
textonator.predict(["comedy"], ["comedy"])