# Context-Aware Recommendation Algorithm by PAPERDAA-2

In [697]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from itertools import product as cartesian_product

# Training

## Program Arguments

In [698]:
LMAX = 64     # Maximum transitive path length
K = 60

## Preprocessing

In [699]:
df = pd.read_csv('fold1.csv')
df.head(5)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
0,1003,tt0454876,1,Weekday,Cinema,Alone
1,1003,tt0120912,1,Weekday,Cinema,Alone
2,1003,tt3793764,1,Weekday,Cinema,Alone
3,1003,tt0114148,1,Weekday,Cinema,Alone
4,1003,tt0110357,1,Weekday,Cinema,Alone


In [700]:
encoder = preprocessing.LabelEncoder()

user_item_context_encodings = []
user_item_context_reverse_encodings = []
maximum_rating = df[df.columns[2]].max()

encoded_df = df.copy()
# Encode userid, itemid, and contextual informations for item splitting
for column_index in range(len(df.columns)):
    
    # Column attribute is not rating
    if column_index != 2:
        
        # Fit encoder
        encoder.fit(df[df.columns[column_index]])
        encoded_df[df.columns[column_index]] = encoder.transform(
                df[df.columns[column_index]]
            )
    
    # Column is nor user or rating
    if column_index != 2:
            user_item_context_encodings.append(
                dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
            )
            user_item_context_reverse_encodings.append(
                dict(zip(encoder.transform(encoder.classes_), encoder.classes_))
            )

print(user_item_context_encodings[1], end="\n\n")

print(user_item_context_reverse_encodings[1])

{'tt0088763': 0, 'tt0109830': 1, 'tt0110357': 2, 'tt0110475': 3, 'tt0111161': 4, 'tt0114148': 5, 'tt0114369': 6, 'tt0120338': 7, 'tt0120912': 8, 'tt0125439': 9, 'tt0133093': 10, 'tt0138097': 11, 'tt0147800': 12, 'tt0169547': 13, 'tt0181689': 14, 'tt0211915': 15, 'tt0213149': 16, 'tt0232500': 17, 'tt0266543': 18, 'tt0268380': 19, 'tt0289879': 20, 'tt0293662': 21, 'tt0315733': 22, 'tt0319262': 23, 'tt0327084': 24, 'tt0343660': 25, 'tt0356910': 26, 'tt0362165': 27, 'tt0367594': 28, 'tt0376541': 29, 'tt0378194': 30, 'tt0382625': 31, 'tt0388795': 32, 'tt0405422': 33, 'tt0407304': 34, 'tt0413267': 35, 'tt0441773': 36, 'tt0454848': 37, 'tt0454876': 38, 'tt0462538': 39, 'tt0489099': 40, 'tt0800369': 41, 'tt0816199': 42, 'tt0944835': 43, 'tt0945513': 44, 'tt0993846': 45, 'tt1041829': 46, 'tt1055369': 47, 'tt1068680': 48, 'tt1099212': 49, 'tt1133985': 50, 'tt1190080': 51, 'tt1232829': 52, 'tt1291150': 53, 'tt1369829': 54, 'tt1375666': 55, 'tt1453405': 56, 'tt1478338': 57, 'tt1499658': 58, 'tt156

In [701]:
display(encoded_df)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
0,2,38,1,0,0,0
1,2,8,1,0,0,0
2,2,74,1,0,0,0
3,2,5,1,0,0,0
4,2,2,1,0,0,0
...,...,...,...,...,...,...
7009,65,35,1,1,1,2
7010,65,62,2,1,1,2
7011,65,25,1,1,1,2
7012,65,50,1,1,1,2


## Item Splitting

In [702]:
# Cartesian product all items and contexts

users = user_item_context_encodings[0].values()
items = user_item_context_encodings[1].values()
contexts = [
    context_trans.values() for context_trans in user_item_context_encodings[2:]
]

context_T = list(cartesian_product(items, *contexts))

In [703]:
# Generate new user-item matrix for new items
rating_matrix = np.zeros((len(users), len(context_T)), dtype=object)

In [704]:
for row in encoded_df.iterrows():
    data = tuple(row[1])
    user = data[0]
    item = data[1]
    rating = data[2]
    context_item = (item, *data[3:])
    
    index = context_T.index(context_item)
    
    rating_matrix[user][index] = rating/maximum_rating

In [705]:
display(rating_matrix)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0.2, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [1.0, 0.8, 1.0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

## Graph Similarity Calculation (User-Based)

In [706]:
rating_matrix_transposed = np.transpose(rating_matrix)

In [707]:
L = 0

WWT = np.matmul(rating_matrix, rating_matrix_transposed)
M = np.matmul(rating_matrix, rating_matrix_transposed)

while L != LMAX:
    M = np.matmul(WWT, M)
    L = L + 2

In [708]:
display(M)

array([[2.0237070794127122e+70, 1.9021091831483966e+70,
        6.2630634360575696e+69, ..., 4.1071533836889e+70,
        5.193301979878106e+70, 1.7600611425969454e+70],
       [1.9021091831483947e+70, 1.787817705017248e+70,
        5.886736579053427e+69, ..., 3.860368052073188e+70,
        4.881253566705946e+70, 1.6543048637552317e+70],
       [6.263063436057566e+69, 5.886736579053426e+69,
        1.938322207062486e+69, ..., 1.2711010567230068e+70,
        1.607247422009859e+70, 5.4471196538455e+69],
       ...,
       [4.107153383688897e+70, 3.8603680520731864e+70,
        1.2711010567230069e+70, ..., 8.335548752830638e+70,
        1.0539908618300876e+71, 3.572078752874245e+70],
       [5.193301979878103e+70, 4.8812535667059455e+70,
        1.6072474220098604e+70, ..., 1.053990861830088e+71,
        1.33272180784875e+71, 4.5167253342893486e+70],
       [1.7600611425969448e+70, 1.6543048637552317e+70,
        5.4471196538455004e+69, ..., 3.572078752874246e+70,
        4.51672533428934

# Prediction

In [709]:
USER = 1016   # User to check
N = 10

In [710]:
# # Context translation
# translated_context = []

# for cnt_index in range(len(CONTEXT)):
#     # 0 --> User
#     # 1 --> Item
#     # >= 2 --> context
#     map_index = cnt_index + 2
#     translation_table = user_item_context_encodings[map_index]
    
#     translated_context.append(translation_table[CONTEXT[cnt_index]])

# translated_context = tuple(translated_context)
# display(translated_context)

## Ratings Prediction

In [711]:
# Get K most similar users
wanted_user = user_item_context_encodings[0][USER]

scores = M[wanted_user]
K_similar_users = np.argpartition(scores, -(K+1))[-(K+1):]

if wanted_user in K_similar_users:
    K_similar_users = K_similar_users[K_similar_users != wanted_user]
else:
    K_similar_users = K_similar_users[:-1]

for usr in K_similar_users:
    print(user_item_context_reverse_encodings[0][usr], scores[usr])

1064 3.0088390844481863e+70
1018 3.063597409891779e+70
1075 3.126710021778636e+70
1037 3.21376470744407e+70
1045 3.3633130592679916e+70
1056 3.4373436534436326e+70
1084 3.602023775308071e+70
1114 3.6516222299606774e+70
1011 3.707634963264652e+70
1112 3.787164048406158e+70
1054 3.867252294681388e+70
1035 3.9731575916660455e+70
1113 3.9973225828947753e+70
1109 4.011394002757031e+70
1014 4.012661634463937e+70
1040 4.095512121463542e+70
1105 4.280744870156414e+70
1068 4.408595616998372e+70
1047 4.472856586424395e+70
1034 4.4775688136045637e+70
1039 4.5848318974036184e+70
1065 4.598242415378376e+70
1060 4.6661564469133324e+70
1055 4.6920350831529694e+70
1044 5.041127837267102e+70
1049 5.0480067519547914e+70
1061 5.1815930234859014e+70
1015 5.54293579640467e+70
1005 5.647823728783339e+70
1119 5.799892607524853e+70
1006 6.42504362608687e+70
1008 6.454333771843442e+70
1077 6.46208150809425e+70
1004 6.620537999532373e+70
1033 6.780390581424756e+70
1041 6.883446264653437e+70
1078 7.0217043221589

### Get list of rated items

In [712]:
inferred_ratings = []
user_rated_items = rating_matrix[wanted_user]

In [713]:
user_item_context_encodings[1]['tt1707386']

64

### KNN

In [714]:
for item in range(len(user_rated_items)):
    rating = user_rated_items[item]
    
    # Item has not been rated
    if rating == 0:
        rating_sum = 0
        neighbor_count = 0
        
        for neighbor in K_similar_users:
            neighbor_rating = rating_matrix[neighbor][item]
            
            # If neighbor has rated the item
            if neighbor_rating != 0:
                rating_sum += neighbor_rating
                neighbor_count += 1
            
        inferred_rating = rating_sum / neighbor_count if neighbor_count != 0 else 0
        
        if inferred_rating != 0:
            inferred_ratings.append((item, inferred_rating))

In [715]:
mapped_ratings = []
for i in inferred_ratings:
    # Get actual item encoding (from the cartesian product result)
    item_context = context_T[i[0]]
    
    item = user_item_context_reverse_encodings[1][item_context[0]]
    
    new_entry = [item]
    
    for j in range(1, len(item_context)):
        new_entry.append(item_context[j])
    
    new_entry.append(i[1])
    
    mapped_ratings.append(new_entry)

In [716]:
predicted_df = pd.DataFrame(
    mapped_ratings, 
    columns=[
        'Item', 
        *encoded_df.columns[3:], 
        'predicted_rating']
).sort_values(by='predicted_rating', ascending=False)
display(predicted_df)

Unnamed: 0,Item,Time,Location,Companion,predicted_rating
85,tt0120338,1,1,2,1.0
797,ttnanana2,0,0,2,1.0
776,tt4411490,0,0,2,1.0
451,tt0816199,1,0,1,1.0
317,tt0378194,0,1,2,1.0
...,...,...,...,...,...
718,tt3203616,1,0,0,0.2
752,tt3637328,1,1,2,0.2
715,tt2574698,1,0,0,0.2
713,tt2574698,0,1,0,0.2


## Recommendation Generation

In [717]:
# Select N items to be recommended
chosen = []

for data in predicted_df.iterrows():
    data_tup = tuple(data[1])
        
    if len(chosen) < N:
        chosen.append(data_tup)

for i in chosen:
    print(
        i[0], 
        user_item_context_reverse_encodings[2][i[1]],
        user_item_context_reverse_encodings[3][i[2]],
        user_item_context_reverse_encodings[4][i[3]],
        i[4]
    )

tt0120338 Weekend Home Partner 1.0
ttnanana2 Weekday Cinema Partner 1.0
tt4411490 Weekday Cinema Partner 1.0
tt0816199 Weekend Cinema Family 1.0
tt0378194 Weekday Home Partner 1.0
tt2096673 Weekday Cinema Partner 1.0
tt0110357 Weekday Home Partner 1.0
tt0125439 Weekend Home Alone 1.0
tt0268380 Weekend Cinema Family 1.0
tt2096673 Weekday Home Alone 1.0


## Rating Prediction Translated Result

In [718]:
mapped_ratings = []
for i in inferred_ratings:
    
    # Get actual item encoding (from the cartesian product result)
    item_context = context_T[i[0]]
    
    item = user_item_context_reverse_encodings[1][item_context[0]]
    
    new_entry = [item]
    
    # Translate remaining context
    for j in range(1, len(item_context)):
        # 1 --> item
        # >= 2 --> context
        translator = user_item_context_reverse_encodings[j+1]
        new_entry.append(translator[item_context[j]])
    
    new_entry.append(i[1])
    
    mapped_ratings.append(new_entry)

In [719]:
predicted = pd.DataFrame(
    mapped_ratings, 
    columns=[
        'Item', 
        *encoded_df.columns[3:], 
        'predicted_rating']
).sort_values(by='predicted_rating', ascending=False)
display(predicted)

Unnamed: 0,Item,Time,Location,Companion,predicted_rating
85,tt0120338,Weekend,Home,Partner,1.0
797,ttnanana2,Weekday,Cinema,Partner,1.0
776,tt4411490,Weekday,Cinema,Partner,1.0
451,tt0816199,Weekend,Cinema,Family,1.0
317,tt0378194,Weekday,Home,Partner,1.0
...,...,...,...,...,...
718,tt3203616,Weekend,Cinema,Alone,0.2
752,tt3637328,Weekend,Home,Partner,0.2
715,tt2574698,Weekend,Cinema,Alone,0.2
713,tt2574698,Weekday,Home,Alone,0.2


In [720]:
predicted[
    (predicted['Item'] == 'tt0110357') &
    (predicted['Time'] == 'Weekend') &
    (predicted['Location'] == 'Cinema') &
    (predicted['Companion'] == 'Partner')
]

Unnamed: 0,Item,Time,Location,Companion,predicted_rating
