In [3]:
import pandas as pd
import numpy as np
import csv
import networkx as nx
import torch
import torch.nn as nn
import pickle
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_absolute_error
from math import sqrt

# Input user-item relations and user-user relations data

In [104]:
class Attention(nn.Module):
    def __init__(self, embedding_dims):
        super(Attention, self).__init__()
        self.embed_dim = embedding_dims
        self.att1 = nn.Linear(self.embed_dim * 2, self.embed_dim) # For joint embeddings of (x_ia, p_i)
        self.att2 = nn.Linear(self.embed_dim, self.embed_dim)     # For reducing size of join embeddings to embed_dim
        self.att3 = nn.Linear(self.embed_dim, 1)
        self.softmax = nn.Softmax(0)

    def forward(self, node1, u_rep, num_neighs):
        uv_reps = u_rep.repeat(num_neighs, 1)
        x = torch.cat((node1, uv_reps), 1)
        x = F.relu(self.att1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.att2(x))
        x = F.dropout(x, training=self.training)
        x = self.att3(x)
        att = F.softmax(x, dim=0)
        return att

In [115]:
class UV_Aggregator(nn.Module):
    """
    For user-item aggregations; will be used for both user latent encodings and item latent encodings
    """
    def __init__(self, v2e, r2e, u2e, embed_dim, user_or_item_lists, user_or_item_ratings_lists, cuda, uv):
        super(UV_Aggregator, self).__init__()
        self.uv = uv
        self.v2e = v2e
        self.r2e = r2e
        self.u2e = u2e
        self.device = cuda
        self.embed_dim = embed_dim
        self.linear1 = nn.Linear(self.embed_dim*2, self.embed_dim) # To obtain x_ia  
        self.linear2 = nn.Linear(self.embed_dim, self.embed_dim)   # To obtain x_ia        
        self.att = Attention(self.embed_dim) # Add attention α∗=wT·σ(W·[x_ia ⊕ user_embeddings]+bias1)+bias2; αia = softmax(α∗) 
        # in above relation embed_dim is corresponding to user_embeddings. If not using attention, then just put 1/C(i) as att values
        self.user_or_item_lists = user_or_item_lists
        self.user_or_item_ratings_lists = user_or_item_ratings_lists
        self.linear3 = nn.Linear(self.embed_dim*2, self.embed_dim)  # For Self-connections in case of neighbours social graph
        
    def forward(self, nodes):
        # tmp_history_uv.append(self.user_or_item_lists[int(node)])
        # Will also be used in case of Social Encoders
        tmp_history_uv = []
        tmp_history_r = []
        for node in nodes:
            tmp_history_uv.append(self.user_or_item_lists[int(node)])     # contains all the items rated by neighbours of user i
            tmp_history_r.append(self.user_or_item_ratings_lists[int(node)])       # contains all the ratings of items rated by neighbours of user i
            
        # Aggregations Starts
        # embed_matrix to store item aggregations of all the neighbours of user i
        embed_matrix = torch.empty(len(tmp_history_uv), self.embed_dim, dtype=torch.float).to(self.device)        

        for i in range(len(tmp_history_uv)):
            history = tmp_history_uv[i]
            num_histroy_item = len(history)
            tmp_label = tmp_history_r[i]

            if self.uv == True:
                # user component for user latent embeddings
                e_uv = self.v2e.weight[history]
                uv_rep = self.u2e.weight[nodes[i]]
            else:
                # item component
                e_uv = self.u2e.weight[history]
                uv_rep = self.v2e.weight[nodes[i]]

            e_r = self.r2e.weight[tmp_label]
            x = torch.cat((e_uv, e_r), 1)
            x = F.relu(self.linear1(x))
            o_history = F.relu(self.linear2(x))     # To obtain x_ia just before applying attention

            att_w = self.att(o_history, uv_rep, num_histroy_item)
            att_history = torch.mm(o_history.t(), att_w)        #Matrix Multiplication; to calculate hi =σ(W·(summation αia*x_ia)+b)
            att_history = att_history.t()

            embed_matrix[i] = att_history                               # Item space for all neighbours

        neigh_feats = embed_matrix
        # Aggregations Ends

        # For self features (u2e)
        self_feats = self.u2e.weight[nodes]
        # self-connection could be considered.
        combined = torch.cat([self_feats, neigh_feats], dim=1)
        combined = F.relu(self.linear3(combined))

        return combined

In [None]:
class Social_Aggregator(nn.Module):
    """
    Social Aggregator: for aggregating embeddings of social neighbors.
    """

    def __init__(self, features, u2e, embed_dim, social_adj_lists, base_model=None, cuda):
        super(Social_Aggregator, self).__init__()

        self.features = features
        self.device = cuda
        self.u2e = u2e
        self.embed_dim = embed_dim
        self.att = Attention(self.embed_dim)
        if base_model != None:
            self.base_model = base_model
        self.social_adj_lists = social_adj_lists
        self.linear1 = nn.Linear(2 * self.embed_dim, self.embed_dim) 



    def forward(self, nodes):
        embed_matrix = torch.empty(len(nodes), self.embed_dim, dtype=torch.float).to(self.device)        
        self_feats = self.features(torch.LongTensor(nodes.cpu().numpy())).to(self.device)

        to_neighs = []
        for node in nodes:
            to_neighs.append(self.social_adj_lists[int(node)])

        neigh_feats = self.aggregator.forward(nodes, to_neighs)
        
        for i in range(len(nodes)):
            tmp_adj = to_neighs[i]
            num_neighs = len(tmp_adj)
            # 
            e_u = self.u2e.weight[list(tmp_adj)] # fast: user embedding 

            u_rep = self.u2e.weight[nodes[i]]

            att_w = self.att(e_u, u_rep, num_neighs)
            att_history = torch.mm(e_u.t(), att_w).t()
            embed_matrix[i] = att_history
        self_feats = embed_matrix
        self_feats = self_feats.t()


        # self-connection could be considered.
        combined = torch.cat([self_feats, neigh_feats], dim=1)
        combined = F.relu(self.linear1(combined))
        return combined

In [None]:
class Recommendation(nn.Module):
    def __init__(self, enc_u, enc_v_history, r2e, embed_dim):
        super(Recommendation, self).__init__()
        self.enc_u = enc_u
        self.enc_v_history = enc_v_history
        self.embed_dim = embed_dim

        self.w_ur1 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_ur2 = nn.Linear(self.embed_dim, self.embed_dim)
        
        self.w_vr1 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_vr2 = nn.Linear(self.embed_dim, self.embed_dim)

        self.w_uv1 = nn.Linear(self.embed_dim * 2, self.embed_dim)
        self.w_uv2 = nn.Linear(self.embed_dim, 16)
        self.w_uv3 = nn.Linear(16, 1)
        
        self.r2e = r2e
        self.bn1 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn2 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn3 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn4 = nn.BatchNorm1d(16, momentum=0.5)
        self.criterion = nn.MSELoss()

    def forward(self, nodes_u, nodes_v):
        embeds_u = self.enc_u(nodes_u)
        embeds_v = self.enc_v_history(nodes_v)

        x_u = F.relu(self.bn1(self.w_ur1(embeds_u)))
        x_u = F.dropout(x_u, training=self.training)
        x_u = self.w_ur2(x_u)
        
        x_v = F.relu(self.bn2(self.w_vr1(embeds_v)))
        x_v = F.dropout(x_v, training=self.training)
        x_v = self.w_vr2(x_v)

        x_uv = torch.cat((x_u, x_v), 1)
        x = F.relu(self.bn3(self.w_uv1(x_uv)))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.bn4(self.w_uv2(x)))
        x = F.dropout(x, training=self.training)
        scores = self.w_uv3(x)
        return scores.squeeze()

    def loss(self, nodes_u, nodes_v, labels_list):
        scores = self.forward(nodes_u, nodes_v)
        return self.criterion(scores, labels_list)

    

# Initialization of toy_dataset

In [92]:
batch_size = 128
embed_dim = 64
lr = 0.001
epochs = 2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_file = open("toy_dataset.pickle", 'rb')
history_u_lists, history_ur_lists, history_v_lists, history_vr_lists, train_u, train_v, train_r, test_u, test_v, test_r, social_adj_lists, ratings_list = pickle.load(data_file)

"""
## toy dataset 
history_u_lists, history_ur_lists:  user's purchased history (item set in training set), and his/her rating score (dict)
history_v_lists, history_vr_lists:  user set who have interacted with the item, and rating score (dict)

# Will be using test_u, test_v, test_r for validation purpose
train_u, train_v, train_r: training_set (user, item, rating)
test_u, test_v, test_r: testing set (user, item, rating)

social_adj_lists: user's connected neighborhoods
ratings_list: rating value from 0.5 to 4.0 (8 opinion embeddings)
"""

# Creating Embedding Matrix
num_users = history_u_lists.__len__()
num_items = history_v_lists.__len__()
num_ratings = ratings_list.__len__()

u2e = nn.Embedding(num_users, embed_dim).to(device)
v2e = nn.Embedding(num_items, embed_dim).to(device)
r2e = nn.Embedding(num_ratings, embed_dim).to(device)

train_set = TensorDataset(torch.LongTensor(train_u), torch.LongTensor(train_v), torch.FloatTensor(train_r))
test_set = TensorDataset(torch.LongTensor(test_u), torch.LongTensor(test_v), torch.FloatTensor(test_r))
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = True)

# For user-item space encoding; uv=True indicates that this is for user_features
# # User latent features
# Item aggregations
enc_u_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, history_u_lists, history_ur_lists, cuda=device, uv=True)
# Social Relations
enc_u = Social_Aggregator(lambda nodes: enc_u_history(nodes).t(), u2e, embed_dim, social_adj_lists, base_model=enc_u_history, cuda=device)


# # Item latent features
enc_v_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, history_v_lists, history_vr_lists, cuda=device, uv=False)

# model
graphrec = Recommendation(enc_u, enc_v_history, r2e, embed_dim).to(device)
optimizer = torch.optim.RMSprop(graphrec.parameters(), lr=lr, alpha=0.9)

best_mae = 9999.0
endure_count = 0

In [None]:
def train(model, device, train_loader, optimizer, epoch, best_rmse, best_mae):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        batch_nodes_u, batch_nodes_v, labels_list = data
        optimizer.zero_grad()
        loss = model.loss(batch_nodes_u.to(device), batch_nodes_v.to(device), labels_list.to(device))
        loss.backward(retain_graph=True)
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 0:
            print('[%d, %5d] loss: %.3f, The best rmse/mae: %.6f' % (
                epoch, i, running_loss / 100, best_mae))
            running_loss = 0.0
    return 0


def test(model, device, test_loader):
    model.eval()
    tmp_pred = []
    target = []
    with torch.no_grad():
        for test_u, test_v, tmp_target in test_loader:
            test_u, test_v, tmp_target = test_u.to(device), test_v.to(device), tmp_target.to(device)
            val_output = model.forward(test_u, test_v)
            tmp_pred.append(list(val_output.data.cpu().numpy()))
            target.append(list(tmp_target.data.cpu().numpy()))
    tmp_pred = np.array(sum(tmp_pred, []))
    target = np.array(sum(target, []))
    mae = mean_absolute_error(tmp_pred, target)
    return mae


In [None]:
for epoch in range(1, epochs + 1):

    train(graphrec, device, train_loader, optimizer, epoch, best_rmse, best_mae)
    expected_rmse, mae = test(graphrec, device, test_loader)

    # early stopping
    if best_rmse > expected_rmse:
        best_rmse = expected_rmse
        best_mae = mae
        endure_count = 0
    else:
        endure_count += 1
    print("rmse: %.4f, mae:%.4f " % (expected_rmse, mae))

    if endure_count > 5:
        break