# Item-Based Memory Model

In [6]:
import numpy as np
import pandas as pd
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity

Read in the data into a dataframe:

In [7]:
ratings = pd.read_csv('./ml-100k/u.data', sep='\t', header=None)
ratings.columns = ['user_id', 'movie_id', 'rating', 'timestamp']
movie_titles = pd.read_csv('./ml-100k/u.item', sep='|', header=None, encoding="ISO-8859-1")
movie_titles.columns = """movie_id | movie_title | release_date | video_release_date |
              IMDb URL | unknown | Action | Adventure | Animation |
              Childrens | Comedy | Crime | Documentary | Drama | Fantasy |
              Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi |
              Thriller | War | Western""".replace('\n', '').replace(' ', '').split('|')

In [8]:
ratings = pd.merge(ratings, movie_titles[['movie_id', 'movie_title']], left_on='movie_id', right_on='movie_id')
ratings

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)
...,...,...,...,...,...
99995,840,1674,4,891211682,Mamma Roma (1962)
99996,655,1640,3,888474646,"Eighth Day, The (1996)"
99997,655,1637,3,888984255,Girls Town (1996)
99998,655,1630,3,887428735,"Silence of the Palace, The (Saimt el Qusur) (1..."


In [9]:
np.unique(ratings.movie_id).shape

(1682,)

Create a user-item matrix where rows are users and columns are items. Empty cell are filled with `nan`

In [10]:
user_item_matrix = np.empty((np.unique(ratings.user_id).shape[0], np.unique(ratings.movie_id).shape[0]))
user_item_matrix[:] = np.nan
user_item_matrix

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [11]:
for row in ratings.iterrows():
    row_index, row = row
    user_item_matrix[row['user_id'] - 1, row['movie_id'] - 1] = int(row['rating'])

In [12]:
user_item_matrix

array([[ 5.,  3.,  4., ..., nan, nan, nan],
       [ 4., nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [ 5., nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan,  5., nan, ..., nan, nan, nan]])

Normalise the ratings by calculating the average rating for each user and subtracting it from their respective ratings

In [13]:
def normalise(array: np.array) -> np.array:
    rating_idx = np.argwhere(~np.isnan(array))
    ratings = array[rating_idx]
    ratings = ratings - np.mean(ratings)
    array[rating_idx] = ratings
    return array

In [14]:
user_item_matrix_norm = np.apply_along_axis(normalise, 1, np.copy(user_item_matrix))

In [15]:
user_item_matrix_norm

array([[ 1.38970588, -0.61029412,  0.38970588, ...,         nan,
                nan,         nan],
       [ 0.29032258,         nan,         nan, ...,         nan,
                nan,         nan],
       [        nan,         nan,         nan, ...,         nan,
                nan,         nan],
       ...,
       [ 0.95454545,         nan,         nan, ...,         nan,
                nan,         nan],
       [        nan,         nan,         nan, ...,         nan,
                nan,         nan],
       [        nan,  1.58928571,         nan, ...,         nan,
                nan,         nan]])

Create a similarity matrix between items.

In [16]:
sim_matrix = np.zeros((user_item_matrix.shape[1], user_item_matrix.shape[1]))

In [17]:
for i1 in range(user_item_matrix_norm.shape[1]):
    i1_array = user_item_matrix_norm[:,i1]
    for i2 in range(user_item_matrix_norm.shape[1]):
        if i1 != i2:
            i2_array = user_item_matrix_norm[:,i2]
            col_cat = np.array([i1_array, i2_array])
            col_cat = col_cat[:, ~np.isnan(col_cat).any(axis=0)]

            if col_cat.size != 0:
                sim = cosine_similarity([col_cat[0,]], [col_cat[1,]]).flat[0]
                sim_matrix[i1, i2] = sim
            else:
                sim_matrix[i1, i2] = 0
        else:
            sim_matrix[i1, i2] = 1
    
    if i1% 100 == 0:
        print(i1)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600


In [18]:
sim_matrix.shape

(1682, 1682)

In [19]:
# Make a prediction for a given user_id and item_id
# Note user ids and item ids are offset by -1 so they start at zero index
def predict(user_id, item_id):
    sim_scores = sim_matrix[item_id, ]
    sim_items = np.argsort(sim_scores)[::-1][:20]
    sim_items = np.array([i for i in sim_items if i != item_id])
    mask = ~np.isnan(user_item_matrix[user_id, sim_items])
    user_ratings = user_item_matrix[user_id, sim_items][mask]

    pred = np.sum(user_ratings * sim_scores[sim_items][mask]) / np.sum(sim_scores[sim_items][mask])
    
    print(pred)


In [20]:
predict(3, 3)

5.0


In [21]:
ratings[ratings.user_id == 4].sort_values('rating', ascending=False).head(20)

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title
6518,4,327,5,892002352,Cop Land (1997)
37023,4,303,5,892002352,Ulee's Gold (1997)
92411,4,359,5,892002352,"Assignment, The (1997)"
89023,4,329,5,892002352,Desperate Measures (1998)
85076,4,361,5,892002353,Incognito (1997)
76276,4,300,5,892001445,Air Force One (1997)
69761,4,354,5,892002353,"Wedding Singer, The (1998)"
64690,4,324,5,892002353,Lost Highway (1997)
56372,4,301,5,892002353,In & Out (1997)
50790,4,50,5,892003526,Star Wars (1977)


In [38]:
# Here the movie ID is that of the movie lens data set
# e.g. Toy story movie id = 1
def get_similar_movies(movie_id):
    sim_movies = np.argsort(sim_matrix[movie_id - 1, ])[::-1][:10]
    for s in sim_movies:
        print(movie_titles[movie_titles.movie_id == s+1].movie_title.values[0])

In [30]:
# Star Trek: First Contact (1996)
get_similar_movies(222)

That Old Feeling (1997)
Scream of Stone (Schrei aus Stein) (1991)
Love Is All There Is (1996)
Angela (1995)
Last Summer in the Hamptons (1995)
Truth or Consequences, N.M. (1997)
Intimate Relations (1996)
Beyond Bedlam (1993)
Here Comes Cookie (1935)
Reluctant Debutante, The (1958)


In [33]:
# Toy Story
get_similar_movies(1)

Toy Story (1995)
Visitors, The (Visiteurs, Les) (1993)
Stars Fell on Henrietta, The (1995)
Here Comes Cookie (1935)
Sleepover (1995)
Shopping (1994)
Nemesis 2: Nebula (1995)
Beyond Bedlam (1993)
Relative Fear (1994)
Love Is All There Is (1996)


In [34]:
# Star Wars
get_similar_movies(50)

Bitter Sugar (Azucar Amargo) (1996)
Two Friends (1986) 
Savage Nights (Nuits fauves, Les) (1992)
Boys, Les (1997)
Hana-bi (1997)
Men With Guns (1997)
Butcher Boy, The (1998)
He Walked by Night (1948)
I Don't Want to Talk About It (De eso no se habla) (1993)
Some Mother's Son (1996)


In [80]:
movie_titles[movie_titles.movie_id == 862+1]

Unnamed: 0,movie_id,movie_title,release_date,video_release_date,IMDbURL,unknown,Action,Adventure,Animation,Childrens,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
862,863,"Garden of Finzi-Contini, The (Giardino dei Fin...",08-Nov-1996,,http://us.imdb.com/M/title-exact?Giardino%20de...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [73]:
movie_titles[movie_titles.movie_title.str.contains('Star')]

Unnamed: 0,movie_id,movie_title,release_date,video_release_date,IMDbURL,unknown,Action,Adventure,Animation,Childrens,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
49,50,Star Wars (1977),01-Jan-1977,,http://us.imdb.com/M/title-exact?Star%20Wars%2...,0,1,1,0,0,...,0,0,0,0,0,1,1,0,1,0
61,62,Stargate (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Stargate%20(1...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
123,124,Lone Star (1996),21-Jun-1996,,http://us.imdb.com/M/title-exact?Lone%20Star%2...,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
145,146,Unhook the Stars (1996),30-Oct-1996,,http://us.imdb.com/M/title-exact?Unhook%20the%...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
221,222,Star Trek: First Contact (1996),22-Nov-1996,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
226,227,Star Trek VI: The Undiscovered Country (1991),01-Jan-1991,,http://us.imdb.com/M/title-exact?Star%20Trek%2...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
227,228,Star Trek: The Wrath of Khan (1982),01-Jan-1982,,http://us.imdb.com/M/title-exact?Star%20Trek:%...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
228,229,Star Trek III: The Search for Spock (1984),01-Jan-1984,,http://us.imdb.com/M/title-exact?Star%20Trek%2...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
229,230,Star Trek IV: The Voyage Home (1986),01-Jan-1986,,http://us.imdb.com/M/title-exact?Star%20Trek%2...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
270,271,Starship Troopers (1997),01-Jan-1997,,http://us.imdb.com/M/title-exact?Starship+Troo...,0,1,1,0,0,...,0,0,0,0,0,0,1,0,1,0
