<a href="https://colab.research.google.com/github/albanda/CE888/blob/master/lab5-recommender/rec_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hybrid Models for Recommendation Systems

Load Pandas, we are going to need it for manipulating data

In [0]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)

Now load the data

In [0]:
user_ratings_df = pd.read_csv("https://raw.githubusercontent.com/albanda/CE888/master/lab5-recommender/user_ratings.csv")
user_features_df = pd.read_csv("https://raw.githubusercontent.com/albanda/CE888/master/lab5-recommender/user_features.csv")
item_features_df = pd.read_csv("https://raw.githubusercontent.com/albanda/CE888/master/lab5-recommender/item_features.csv")

In [0]:
user_features_df["key"] = 0
user_features_df["user_id"] = range(user_features_df.shape[0])
item_features_df["key"] = 0
item_features_df["item_id"] = range(item_features_df.shape[0])

merged_df = pd.merge(user_features_df, item_features_df, left_index=True, on="key")
merged_df.drop(labels='key', axis=1, inplace=True)  # drop the "key" column


In [4]:
merged_df[["item_id", "user_id"]]

Unnamed: 0,item_id,user_id
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
0,0,1
1,1,1
2,2,1
3,3,1
4,4,1


In [0]:
merged_df["rating"] = map(lambda ids: user_ratings_df.values[ids[1]][ids[2]], 
                          merged_df[["user_id", "item_id"]].itertuples())

train = merged_df.dropna()
test = merged_df[merged_df.isnull().any(axis=1)]


In [8]:
n_latent_factors = 2

user_ratings = user_ratings_df.values
latent_user_preferences = np.random.random((user_ratings.shape[0], n_latent_factors))
latent_item_features = np.random.random((user_ratings.shape[1], n_latent_factors))

user_features = user_features_df.values
item_features = item_features_df.values

user_features = np.concatenate([np.ones(shape=(user_features.shape[0],1)), user_features], axis=1)
item_features = np.concatenate([np.ones(shape=(item_features.shape[0],1)), item_features], axis=1)

user_features_weights = np.random.random((user_ratings.shape[0], user_features.shape[1]))
item_features_weights = np.random.random((user_ratings.shape[1], item_features.shape[1]))

print(user_features)

[[1. 1. 0. 0. 0.]
 [1. 0. 1. 0. 1.]
 [1. 0. 0. 0. 2.]
 [1. 1. 0. 0. 3.]
 [1. 0. 1. 0. 4.]
 [1. 0. 0. 0. 5.]
 [1. 0. 0. 0. 6.]
 [1. 1. 0. 0. 7.]
 [1. 0. 1. 0. 8.]
 [1. 1. 0. 0. 9.]]


In [0]:
def predict_rating(user_id, item_id):
    """
    Predict a rating given a user_id and an item_id.
    """
    user_preference = latent_user_preferences[user_id]
    item_preference = latent_item_features[item_id]
    
    user_score = user_features_weights[user_id].dot(user_features[user_id])
    item_score = item_features_weights[item_id].dot(item_features[item_id])
    #print(user_preference.dot(item_preference), user_score, item_score)
    return user_preference.dot(item_preference) + user_score + item_score


def train(user_id, item_id, rating, alpha=0.001, 
          latent_feature_weight_decay=0.1,
          user_weight_decay=0.01, item_weight_decay=0.0001):
    #print(item_id)
    prediction_rating = predict_rating(user_id, item_id)
    err = prediction_rating - rating
    #print(err)
    user_pref_values = latent_user_preferences[user_id][:]
    latent_user_preferences[user_id] -= alpha * err * (latent_item_features[item_id] + latent_feature_weight_decay * latent_user_preferences[user_id])
    latent_item_features[item_id] -= alpha * err * (user_pref_values + latent_feature_weight_decay * latent_item_features[item_id])
    
    user_features_weights[user_id] -=alpha * err * (user_features[user_id] + user_weight_decay * user_features_weights[user_id])
    item_features_weights[item_id] -=alpha * err * (item_features_weights[item_id] + item_weight_decay * item_features_weights[item_id])
    
    return err


def sgd(iterations=30000):
    """ 
    Iterate over all users and all items and train for 
    a certain number of iterations
    """
    for iteration in range(iterations):
        error = []
        for user_id in range(latent_user_preferences.shape[0]):
            for item_id in range(latent_item_features.shape[0]):
                rating = user_ratings[user_id][item_id]
                if not np.isnan(rating):
                    err = train(user_id, item_id, rating)
                    error.append(err)
    mse = (np.array(error) ** 2).mean()          
    print(mse)


In [10]:
for _ in range(10): 
    sgd()  # Note decreasing values with increasing iterations

0.32261243782864374
0.28066926578646123
0.2771998964825736
0.27588242050757866
0.27517851696080026
0.27474212546944327
0.2744497984393649
0.27424606378568467
0.27410223745777107
0.27400204487913044


In [11]:
predictions = np.zeros(shape=(latent_user_preferences.shape[0], latent_item_features.shape[0]))
print(user_features_weights)
print(item_features_weights)
for user_id in range(latent_user_preferences.shape[0]):
    for item_id in range(latent_item_features.shape[0]):
        predictions[user_id, item_id] =  predict_rating(user_id, item_id)
  

[[ 1.748e+00  2.227e+00  2.428e-01  4.247e-01  9.384e-01]
 [-4.244e-01  8.108e-01 -1.207e+00  6.888e-01 -5.249e-01]
 [ 1.418e+00  9.786e-01  3.333e-01  1.817e-01  1.417e+00]
 [ 7.184e-01  4.949e-01  7.616e-01  5.079e-01  1.196e+00]
 [ 3.673e-01  7.081e-01  4.383e-01  1.505e-01  2.253e-01]
 [ 3.244e-01  1.674e-02  5.467e-01  1.088e-01 -1.709e+00]
 [ 7.459e-02  3.708e-01  3.287e-01  9.784e-01  9.081e-02]
 [ 4.745e-01  4.161e-01  7.249e-02  5.527e-01  2.194e-01]
 [ 1.729e-01  2.942e-01  1.567e-01  1.781e-01 -3.221e-01]
 [ 6.527e-02  4.328e-02  7.411e-01  1.527e-03  1.599e-03]]
[[0.929 0.985 3.812 3.428 1.484]
 [0.332 0.59  0.598 0.446 0.245]
 [0.268 0.111 0.24  0.233 0.362]
 [0.873 0.488 0.13  1.85  1.209]
 [0.015 0.041 0.031 0.023 0.037]]


In [0]:
values = [zip(user_ratings[i], predictions[i]) for i in range(predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = user_ratings_df.columns

In [14]:
comparison_data


Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.8722159060439685)","(2.0, 2.4015007152902252)","(nan, 17.23167283803451)","(5.0, 4.728696311383871)","(4.0, 3.9974613453869194)"
1,"(3.0, 2.913582791970542)","(2.0, 2.2766743023732756)","(nan, -21.981784292294837)","(7.0, 6.811953568159626)","(7.0, 6.99835284541697)"
2,"(9.0, 8.742796730056938)","(nan, 5.028267998227328)","(7.0, 7.043125919522596)","(8.0, 8.183539393493147)","(5.0, 5.022599822189027)"
3,"(nan, 8.97487726737108)","(nan, 5.0010229842696)","(7.0, 7.000073239738195)","(8.0, 7.99843188992668)","(9.0, 9.000053962455278)"
4,"(nan, 5.500233867389502)","(1.0, 0.688389517805895)","(8.0, 8.013085383414248)","(3.0, 3.2759489722090094)","(7.0, 7.010484475449793)"
5,"(2.0, 2.010872829518508)","(3.0, 2.990339014231661)","(5.0, 4.99843855802554)","(nan, 8.99758260020766)","(nan, -68.92512036187104)"
6,"(4.0, 4.544496236951191)","(2.0, 0.2864593959129367)","(nan, 3.110814998744761)","(2.0, 3.1265884929873256)","(7.0, 7.010052053302367)"
7,"(7.0, 6.518715982361011)","(1.0, 2.7472670437867435)","(2.0, 2.0430203405931464)","(7.0, 5.817244734570055)","(9.0, 8.983361062458037)"
8,"(3.0, 3.124398612218365)","(3.0, 2.6131397433571486)","(nan, -20.426194745785974)","(7.0, 7.247499919772771)","(3.0, 3.0020768084946483)"
9,"(4.0, 4.264889180280363)","(nan, -0.03233074623283194)","(5.0, 4.976135615825362)","(3.0, 2.8247220278855973)","(3.0, 2.9785206304857756)"
