# Part 1 - Build the model

# Read in the data

In [2]:
import pandas as pd

ratings = pd.read_csv('ratings.csv', index_col=0)

# Inspect the data

In [3]:
ratings.fillna('')

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [4]:
ratings

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


# Calculating item similarities

## How we'll do this

### Similarity == Angle between ratings vectors
![](angle.png)
### Calculting the angle (cosine similarity)
![](similarity.png)
### Helpful to precalculate these
![](precalculate.png)

## Normalize the ratings

In [5]:
means = ratings.mean()
means

Bob       6.000
Jane      7.750
Alice     7.000
Lesley    8.600
Frank     6.975
Kate      5.750
dtype: float64

In [6]:
normalized = ratings - means
normalized

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,-1.0,0.25,,0.5,1.425,-0.75
Carcassonne,4.0,0.25,0.0,-1.0,,0.25
Cards Against Humanity,,-0.75,-2.0,,0.025,-1.75
Pandemic Legacy: Season 1,,,,0.5,1.225,
Dominion,-3.0,0.25,2.0,,-2.675,2.25


In [22]:
normalized.pow(2)

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,1.0,0.0625,,0.25,2.030625,0.5625
Carcassonne,16.0,0.0625,0.0,1.0,,0.0625
Cards Against Humanity,,0.5625,4.0,,0.000625,3.0625
Pandemic Legacy: Season 1,,,,0.25,1.500625,
Dominion,9.0,0.0625,4.0,,7.155625,5.0625


In [23]:
normalized.pow(2).sum(axis=1)

Game
Arkham Horror                 3.905625
Carcassonne                  17.125000
Cards Against Humanity        7.625625
Pandemic Legacy: Season 1     1.750625
Dominion                     25.280625
dtype: float64

In [24]:
normalized.pow(2).sum(axis=1).pow(0.5)

Game
Arkham Horror                1.976265
Carcassonne                  4.138236
Cards Against Humanity       2.761453
Pandemic Legacy: Season 1    1.323112
Dominion                     5.027984
dtype: float64

## Precompute item square root of sum of squares

In [25]:
sqrt_sum_squares = normalized.pow(2).sum(axis=1).pow(0.5)
rendering = normalized.copy()
rendering['Sqrt Sum Squares'] = sqrt_sum_squares
rendering

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate,Sqrt Sum Squares
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Arkham Horror,-1.0,0.25,,0.5,1.425,-0.75,1.976265
Carcassonne,4.0,0.25,0.0,-1.0,,0.25,4.138236
Cards Against Humanity,,-0.75,-2.0,,0.025,-1.75,2.761453
Pandemic Legacy: Season 1,,,,0.5,1.225,,1.323112
Dominion,-3.0,0.25,2.0,,-2.675,2.25,5.027984


## Calculate similarities

In [26]:
similarity = pd.DataFrame(index = ratings.index.values, columns = ratings.index.values)
similarity

Unnamed: 0,Arkham Horror,Carcassonne,Cards Against Humanity,Pandemic Legacy: Season 1,Dominion
Arkham Horror,,,,,
Carcassonne,,,,,
Cards Against Humanity,,,,,
Pandemic Legacy: Season 1,,,,,
Dominion,,,,,


In [27]:
for i in similarity.index.values:
    for j in similarity.index.values:
        numerator = (normalized.loc[i] * normalized.loc[j]).sum()
        denominator = sqrt_sum_squares.loc[i] * sqrt_sum_squares.loc[j]
        similarity.loc[i,j] = numerator / denominator
        
similarity

Unnamed: 0,Arkham Horror,Carcassonne,Cards Against Humanity,Pandemic Legacy: Season 1,Dominion
Arkham Horror,1.0,-0.565524,0.212671,0.763198,-0.245242
Carcassonne,-0.565524,1.0,-0.0546924,-0.0913184,-0.546691
Cards Against Humanity,0.212671,-0.0546924,1.0,0.00838189,-0.59
Pandemic Legacy: Season 1,0.763198,-0.0913184,0.00838189,1.0,-0.492572
Dominion,-0.245242,-0.546691,-0.59,-0.492572,1.0


# Part 2 - Apply the model


# Recommend similar items

In [28]:
def most_similar_to(item):
    ranked = similarity[item].sort_values(ascending=False)
    return (ranked.index[1], ranked[1])
    
most_similar_to('Arkham Horror')   

('Pandemic Legacy: Season 1', 0.76319778593577769)

# Predict user ratings
![](predict.png)

### Calculate the numerator

In [14]:
normalized

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [29]:
normalized['Bob'] * similarity.loc['Cards Against Humanity']

Game
Arkham Horror               -0.212671
Carcassonne                  -0.21877
Cards Against Humanity            NaN
Pandemic Legacy: Season 1         NaN
Dominion                         1.77
dtype: object

### Calculate the denominator

In [16]:
normalized['Bob'].abs()

Game
Arkham Horror                 5.0
Carcassonne                  10.0
Cards Against Humanity        NaN
Pandemic Legacy: Season 1     NaN
Dominion                      3.0
Name: Bob, dtype: float64

### Calculate the predicted normalized rating

In [32]:
(normalized['Bob'] * similarity.loc['Cards Against Humanity']).sum() / normalized['Bob'].abs().sum()

0.16731972021500008

### Calculated the predicted rating

In [34]:
(normalized['Bob'] * similarity.loc['Cards Against Humanity']).sum() / normalized['Bob'].abs().sum() + means['Bob']

6.1673197202149996

### Predict for any user + item

In [35]:
def predict_rating_for(user, item):
    return (ratings[user] * similarity.loc[item]).sum() / ratings[user].abs().sum() + means[user]
    
predict_rating_for('Bob', 'Cards Against Humanity')

5.9303574377218036

In [36]:
ratings_with_predictions = ratings.copy()
for user in ratings_with_predictions.columns.values:
    for item in ratings_with_predictions.index.values:
        if pd.isnull(ratings_with_predictions.loc[item, user]):
            ratings_with_predictions.loc[item, user] = predict_rating_for(user, item) 

In [37]:
ratings

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [38]:
ratings_with_predictions

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,6.757024,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,6.679916,6.0
Cards Against Humanity,5.930357,7.0,5.0,8.661857,7.0,4.0
Pandemic Legacy: Season 1,6.079172,7.798166,6.760454,9.1,8.2,5.722219
Dominion,3.0,8.0,9.0,8.178723,4.3,8.0
