# Context-Aware Recommendation Algorithm by PAPERDAA-2

In [32]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from itertools import product as cartesian_product

# Training

## Program Arguments

In [33]:
LMAX = 6      # Maximum transitive path length
K = 2

## Preprocessing

In [34]:
df = pd.read_csv('ratings.txt')
df.tail(5)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
5038,1082,tt0413267,1,Weekend,Home,Partner
5039,1082,tt1637706,2,Weekend,Home,Partner
5040,1082,tt0343660,1,Weekend,Home,Partner
5041,1082,tt1133985,1,Weekend,Home,Partner
5042,1082,tt1099212,1,Weekend,Home,Partner


In [35]:
encoder = preprocessing.LabelEncoder()

user_item_context_encodings = []
user_item_context_reverse_encodings = []
maximum_rating = df[df.columns[2]].max()

encoded_df = df.copy()
# Encode userid, itemid, and contextual informations for item splitting
for column_index in range(len(df.columns)):
    
    # Column attribute is not rating
    if column_index != 2:
        
        # Fit encoder
        encoder.fit(df[df.columns[column_index]])
        encoded_df[df.columns[column_index]] = encoder.transform(
                df[df.columns[column_index]]
            )
    
    # Column is nor user or rating
    if column_index != 2:
            user_item_context_encodings.append(
                dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
            )
            user_item_context_reverse_encodings.append(
                dict(zip(encoder.transform(encoder.classes_), encoder.classes_))
            )

In [36]:
display(encoded_df)

Unnamed: 0,userid,itemid,rating,Time,Location,Companion
0,96,58,2,2,2,3
1,96,33,4,2,2,3
2,96,1,5,2,2,3
3,96,0,3,2,2,3
4,96,10,3,2,2,3
...,...,...,...,...,...,...
5038,70,35,1,1,1,2
5039,70,62,2,1,1,2
5040,70,25,1,1,1,2
5041,70,50,1,1,1,2


## Item Splitting

In [37]:
# Cartesian product all items and contexts

users = user_item_context_encodings[0].values()
items = user_item_context_encodings[1].values()
contexts = [
    context_trans.values() for context_trans in user_item_context_encodings[2:]
]

context_T = list(cartesian_product(items, *contexts))

In [38]:
# Generate new user-item matrix for new items
rating_matrix = np.zeros((len(users), len(context_T)), dtype=object)

In [39]:
for row in encoded_df.iterrows():
    data = tuple(row[1])
    user = data[0]
    item = data[1]
    rating = data[2]
    context_item = (item, *data[3:])
    
    index = context_T.index(context_item)
    
    rating_matrix[user][index] = rating/maximum_rating

In [40]:
display(rating_matrix)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0.2, 0, 0, ..., 0, 0, 0],
       ...,
       [1.0, 0.8, 1.0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

## Graph Similarity Calculation (User-Based)

In [41]:
rating_matrix_transposed = np.transpose(rating_matrix)

In [42]:
L = 2

WWT = np.matmul(rating_matrix, rating_matrix_transposed)
M = np.matmul(rating_matrix, rating_matrix_transposed)

while L != LMAX:
    M = np.matmul(WWT, M)
    L = L + 2

In [43]:
display(M)

array([[319887.94342400023, 310866.42374400015, 228599.42560000002, ...,
        500325.6400640003, 156418.95948800008, 115412.14131200004],
       [310866.42374399997, 326760.6565120002, 232016.7691520001, ...,
        492466.1069439999, 157922.43334399996, 116548.61279999999],
       [228599.42560000013, 232016.769152, 173603.19462399997, ...,
        376846.6775680003, 119292.55020800003, 84620.83385600001],
       ...,
       [500325.640064, 492466.106944, 376846.6775680002, ...,
        897376.6468479999, 258292.61318400013, 184038.68224000005],
       [156418.95948800008, 157922.43334400008, 119292.55020800002, ...,
        258292.61318400007, 93437.86163200001, 56122.17920000002],
       [115412.14131200004, 116548.6128, 84620.83385599998, ...,
        184038.68224, 56122.17920000002, 43899.90822400003]], dtype=object)

# Prediction

In [44]:
USER = 1090   # User to check
CONTEXT = ('Weekend', 'Home', 'Partner')
N = 10

In [45]:
# Context translation
translated_context = []

for cnt_index in range(len(CONTEXT)):
    # 0 --> User
    # 1 --> Item
    # >= 2 --> context
    map_index = cnt_index + 2
    translation_table = user_item_context_encodings[map_index]
    
    translated_context.append(translation_table[CONTEXT[cnt_index]])

translated_context = tuple(translated_context)
display(translated_context)

(1, 1, 2)

## Ratings Prediction

In [46]:
# Get K most similar users
wanted_user = user_item_context_encodings[0][USER]

scores = M[wanted_user]
K_similar_users = np.argpartition(scores, -(K+1))[-(K+1):]

if wanted_user in K_similar_users:
    K_similar_users = K_similar_users[K_similar_users != wanted_user]
else:
    K_similar_users = K_similar_users[:-1]

### Get list of rated items

In [47]:
inferred_ratings = []
user_rated_items = rating_matrix[wanted_user]

### KNN

In [48]:
for item in range(len(user_rated_items)):
    rating = user_rated_items[item]
    
    # Item has not been rated
    if rating == 0:
        rating_sum = 0
        neighbor_count = 0
        
        for neighbor in K_similar_users:
            neighbor_rating = rating_matrix[neighbor][item]
            
            # If neighbor has rated the item
            if neighbor_rating != 0:
                rating_sum += neighbor_rating
                neighbor_count += 1
            
        inferred_rating = rating_sum / neighbor_count if neighbor_count != 0 else 0
        
        if inferred_rating != 0:
            inferred_ratings.append((item, inferred_rating))

In [49]:
mapped_ratings = []
for i in inferred_ratings:
    
    # Get actual item encoding (from the cartesian product result)
    item_context = context_T[i[0]]
    
    item = user_item_context_reverse_encodings[1][item_context[0]]
    
    new_entry = [item]
    
    for j in range(1, len(item_context)):
        new_entry.append(item_context[j])
    
    new_entry.append(i[1])
    
    mapped_ratings.append(new_entry)

In [50]:
predicted_df = pd.DataFrame(
    mapped_ratings, 
    columns=[
        'Item', 
        *encoded_df.columns[3:], 
        'predicted_rating']
).sort_values(by='predicted_rating', ascending=False)
display(predicted_df)

Unnamed: 0,Item,Time,Location,Companion,predicted_rating
203,tt1375666,1,0,2,1.0
63,tt0266543,1,0,1,1.0
58,tt0213149,1,0,2,1.0
92,tt0343660,1,0,0,1.0
115,tt0378194,1,0,2,1.0
...,...,...,...,...,...
74,tt0289879,1,0,1,0.2
216,tt1499658,1,0,1,0.2
30,tt0120338,2,2,3,0.2
29,tt0120338,1,0,0,0.2


## Recommendation Generation

In [51]:
# Select N items to be recommended
chosen = []

for data in predicted_df.iterrows():
    data_tup = tuple(data[1])
    
    if tuple(data[1][1:-1]) == translated_context:
        
        if len(chosen) <= N:
            chosen.append(data_tup)

for i in chosen:
    print(i[:-1])

('tt1478338', 1, 1, 2)
('tt0266543', 1, 1, 2)
('tt0993846', 1, 1, 2)
('tt1232829', 1, 1, 2)
('tt0268380', 1, 1, 2)
('tt0327084', 1, 1, 2)
('tt3793764', 1, 1, 2)
('tt4411490', 1, 1, 2)
('tt2096673', 1, 1, 2)
('tt1707386', 1, 1, 2)
('tt0356910', 1, 1, 2)


## Rating Prediction Translated Result

In [52]:
mapped_ratings = []
for i in inferred_ratings:
    
    # Get actual item encoding (from the cartesian product result)
    item_context = context_T[i[0]]
    
    item = user_item_context_reverse_encodings[1][item_context[0]]
    
    new_entry = [item]
    
    # Translate remaining context
    for j in range(1, len(item_context)):
        # 1 --> item
        # >= 2 --> context
        translator = user_item_context_reverse_encodings[j+1]
        new_entry.append(translator[item_context[j]])
    
    new_entry.append(i[1])
    
    mapped_ratings.append(new_entry)

In [53]:
pd.DataFrame(
    mapped_ratings, 
    columns=[
        'Item', 
        *encoded_df.columns[3:], 
        'predicted_rating']
).sort_values(by='predicted_rating', ascending=False)

Unnamed: 0,Item,Time,Location,Companion,predicted_rating
203,tt1375666,Weekend,Cinema,Partner,1.0
63,tt0266543,Weekend,Cinema,Family,1.0
58,tt0213149,Weekend,Cinema,Partner,1.0
92,tt0343660,Weekend,Cinema,Alone,1.0
115,tt0378194,Weekend,Cinema,Partner,1.0
...,...,...,...,...,...
74,tt0289879,Weekend,Cinema,Family,0.2
216,tt1499658,Weekend,Cinema,Family,0.2
30,tt0120338,,,,0.2
29,tt0120338,Weekend,Cinema,Alone,0.2
