In [263]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from itertools import product as cartesian_product

## Program Arguments

In [264]:
LMAX = 6      # Maximum transitive path length
USER = 1090   # User to check
K = 5

## Preprocessing

In [265]:
df = pd.read_csv('ratings.csv')
df.tail(5)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
5038,1082,tt0413267,1,Weekend,Home,Partner
5039,1082,tt1637706,2,Weekend,Home,Partner
5040,1082,tt0343660,1,Weekend,Home,Partner
5041,1082,tt1133985,1,Weekend,Home,Partner
5042,1082,tt1099212,1,Weekend,Home,Partner


In [266]:
encoder = preprocessing.LabelEncoder()

user_item_context_encodings = []
maximum_rating = df[df.columns[2]].max()

encoded_df = df.copy()
# Encode userid, itemid, and contextual informations for item splitting
for column_index in range(len(df.columns)):
    
    # Column attribute is not rating
    if column_index != 2:
        
        # Fit encoder
        encoder.fit(df[df.columns[column_index]])
        encoded_df[df.columns[column_index]] = encoder.transform(
                df[df.columns[column_index]]
            )
    
    # Column is nor user or rating
    if column_index != 2:
            user_item_context_encodings.append(
                dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
            )

In [267]:
display(encoded_df)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
0,96,58,2,2,2,3
1,96,33,4,2,2,3
2,96,1,5,2,2,3
3,96,0,3,2,2,3
4,96,10,3,2,2,3
...,...,...,...,...,...,...
5038,70,35,1,1,1,2
5039,70,62,2,1,1,2
5040,70,25,1,1,1,2
5041,70,50,1,1,1,2


## Item Splitting

In [268]:
# Cartesian product all items and contexts

users = user_item_context_encodings[0].values()
items = user_item_context_encodings[1].values()
contexts = [
    context_trans.values() for context_trans in user_item_context_encodings[2:]
]

context_T = list(cartesian_product(items, *contexts))

In [269]:
# Generate new user-item matrix for new items
rating_matrix = np.zeros((len(users), len(context_T)), dtype=object)

In [270]:
for row in encoded_df.iterrows():
    data = tuple(row[1])
    user = data[0]
    item = data[1]
    rating = data[2]
    context_item = (item, *data[3:])
    
    index = context_T.index(context_item)
    
    rating_matrix[user][index] = rating/maximum_rating

In [271]:
display(rating_matrix)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0.2, 0, 0, ..., 0, 0, 0],
       ...,
       [1.0, 0.8, 1.0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

## Graph Similarity Calculation (User-Based)

In [272]:
rating_matrix_transposed = np.transpose(rating_matrix)

In [273]:
L = 2

WWT = np.matmul(rating_matrix, rating_matrix_transposed)
M = np.matmul(rating_matrix, rating_matrix_transposed)

while L != LMAX:
    M = np.matmul(WWT, M)
    L = L + 2

In [274]:
display(M)

array([[319887.94342400023, 310866.42374400015, 228599.42560000002, ...,
        500325.6400640003, 156418.95948800008, 115412.14131200004],
       [310866.42374399997, 326760.6565120002, 232016.7691520001, ...,
        492466.1069439999, 157922.43334399996, 116548.61279999999],
       [228599.42560000013, 232016.769152, 173603.19462399997, ...,
        376846.6775680003, 119292.55020800003, 84620.83385600001],
       ...,
       [500325.640064, 492466.106944, 376846.6775680002, ...,
        897376.6468479999, 258292.61318400013, 184038.68224000005],
       [156418.95948800008, 157922.43334400008, 119292.55020800002, ...,
        258292.61318400007, 93437.86163200001, 56122.17920000002],
       [115412.14131200004, 116548.6128, 84620.83385599998, ...,
        184038.68224, 56122.17920000002, 43899.90822400003]], dtype=object)

## Recommendation Generation

In [276]:
wanted_user = user_item_context_encodings[0][USER]

M[wanted_user]

array([70128.83097600003, 68965.61766400002, 51260.10764800003,
       164223.62630400006, 109443.39372799998, 136824.82566399997,
       34750.930816, 101064.268928, 38599.988032, 73166.71251200003,
       82744.65094400001, 147764.15366400007, 95158.81676800005,
       21435.279680000007, 52766.02700800001, 84522.87513600003,
       59682.64998400002, 29112.688704000007, 49961.371327999994,
       76732.46003200003, 41715.96, 135189.39897600008, 38882.99590400002,
       43335.654143999986, 220575.8040320001, 223988.30943999998,
       113409.55334400001, 71951.871936, 66938.70483200003,
       49886.17401600001, 47180.644991999994, 67247.57036799997,
       59070.50246400002, 116605.64441600005, 165441.61516800002,
       119461.27878400002, 89918.83865599998, 57346.099520000025,
       29373.870528000014, 112393.06656000005, 34587.861376,
       92542.65887999999, 154814.516736, 137268.89068800004,
       140766.834432, 166928.465984, 68315.69171200001, 65747.86982400001,
       58