# Read in the data

In [1]:
import pandas as pd

ratings = pd.read_csv('ratings.csv', index_col=0)

# Inspect the data

In [2]:
ratings.fillna('')

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [3]:
ratings

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


# Calculating item similarities

## Normalize the ratings

In [4]:
means = ratings.mean()
means

Bob       6.000
Jane      7.750
Alice     7.000
Lesley    8.600
Frank     6.975
Kate      5.750
dtype: float64

In [22]:
normalized = ratings - means
normalized

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,-1.0,0.25,,0.5,1.425,-0.75
Carcassonne,4.0,0.25,0.0,-1.0,,0.25
Cards Against Humanity,,-0.75,-2.0,,0.025,-1.75
Pandemic Legacy: Season 1,,,,0.5,1.225,
Dominion,-3.0,0.25,2.0,,-2.675,2.25


In [6]:
ratings.pow(2)

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,25.0,64.0,,82.81,70.56,25.0
Carcassonne,100.0,64.0,49.0,57.76,,36.0
Cards Against Humanity,,49.0,25.0,,49.0,16.0
Pandemic Legacy: Season 1,,,,82.81,67.24,
Dominion,9.0,64.0,81.0,,18.49,64.0


In [7]:
ratings.pow(2).sum(axis=1)

Game
Arkham Horror                267.37
Carcassonne                  306.76
Cards Against Humanity       139.00
Pandemic Legacy: Season 1    150.05
Dominion                     236.49
dtype: float64

In [8]:
ratings.pow(2).sum(axis=1).pow(0.5)

Game
Arkham Horror                16.351453
Carcassonne                  17.514565
Cards Against Humanity       11.789826
Pandemic Legacy: Season 1    12.249490
Dominion                     15.378231
dtype: float64

## Precompute item square root of sum of squares

In [24]:
sqrt_sum_squares = ratings.pow(2).sum(axis=1).pow(0.5)
rendering = normalized.copy()
rendering['Sqrt Sum Squares'] = sqrt_sum_squares
rendering

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate,Sqrt Sum Squares
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Arkham Horror,-1.0,0.25,,0.5,1.425,-0.75,16.351453
Carcassonne,4.0,0.25,0.0,-1.0,,0.25,17.514565
Cards Against Humanity,,-0.75,-2.0,,0.025,-1.75,11.789826
Pandemic Legacy: Season 1,,,,0.5,1.225,,12.24949
Dominion,-3.0,0.25,2.0,,-2.675,2.25,15.378231


## Calculate similarities

In [10]:
similarity = pd.DataFrame(index = ratings.index.values, columns = ratings.index.values)
similarity

Unnamed: 0,Arkham Horror,Carcassonne,Cards Against Humanity,Pandemic Legacy: Season 1,Dominion
Arkham Horror,,,,,
Carcassonne,,,,,
Cards Against Humanity,,,,,
Pandemic Legacy: Season 1,,,,,
Dominion,,,,,


In [26]:
for i in similarity.index.values:
    for j in similarity.index.values:
        nominator = (ratings.loc[i] * ratings.loc[j]).sum()
        denominator = sqrt_sum_squares.loc[i] * sqrt_sum_squares.loc[j]
        similarity.loc[i,j] = nominator / denominator
        
similarity

Unnamed: 0,Arkham Horror,Carcassonne,Cards Against Humanity,Pandemic Legacy: Season 1,Dominion
Arkham Horror,1.0,0.744303,0.69924,0.757326,0.616886
Carcassonne,0.744303,1.0,0.556918,0.322357,0.761111
Cards Against Humanity,0.69924,0.556918,1.0,0.397454,0.899581
Pandemic Legacy: Season 1,0.757326,0.322357,0.397454,1.0,0.187179
Dominion,0.616886,0.761111,0.899581,0.187179,1.0


# Recommend similar items

In [28]:
def most_similar_to(item):
    ranked = similarity[item].sort_values(ascending=False)
    return (ranked.index[1], ranked[1])
    
most_similar_to('Arkham Horror')   

('Pandemic Legacy: Season 1', 0.75732555784658306)

# Predict user ratings

In [13]:
ratings

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [14]:
ratings['Bob'] * similarity.loc['Cards Against Humanity']

Game
Arkham Horror                 3.4962
Carcassonne                  5.56918
Cards Against Humanity           NaN
Pandemic Legacy: Season 1        NaN
Dominion                     2.69874
dtype: object

In [15]:
ratings['Bob'].abs()

Game
Arkham Horror                 5.0
Carcassonne                  10.0
Cards Against Humanity        NaN
Pandemic Legacy: Season 1     NaN
Dominion                      3.0
Name: Bob, dtype: float64

In [16]:
(ratings['Bob'] * similarity.loc['Cards Against Humanity']).sum() / ratings['Bob'].abs().sum()

0.65356217615516188

In [17]:
def predict_rating_for(user, item):
    return (ratings[user] * similarity.loc[item]).sum() / ratings[user].abs().sum()
    
predict_rating_for('Bob', 'Cards Against Humanity')

0.65356217615516188

In [18]:
ratings_with_predictions = ratings.copy()
for user in ratings_with_predictions.columns.values:
    for item in ratings_with_predictions.index.values:
        if pd.isnull(ratings_with_predictions.loc[item, user]):
            ratings_with_predictions.loc[item, user] = predict_rating_for(user, item) 

In [19]:
ratings

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,,6.0
Cards Against Humanity,,7.0,5.0,,7.0,4.0
Pandemic Legacy: Season 1,,,,9.1,8.2,
Dominion,3.0,8.0,9.0,,4.3,8.0


In [20]:
ratings_with_predictions

Unnamed: 0_level_0,Bob,Jane,Alice,Lesley,Frank,Kate
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arkham Horror,5.0,8.0,0.678967,9.1,8.4,5.0
Carcassonne,10.0,8.0,7.0,7.6,0.575867,6.0
Cards Against Humanity,0.653562,7.0,5.0,0.550872,7.0,4.0
Pandemic Legacy: Season 1,0.420652,0.41668,0.282304,9.1,8.2,0.382957
Dominion,3.0,8.0,9.0,0.507808,4.3,8.0
