In [20]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.sparse import dok_matrix
import json
from importlib import import_module
import os

In [21]:
def zero_based_mapping(data) :
    with open('/opt/ml/movie-recommendation/data/train/zero_mapping.json', 'r') as f:
        dict_data= json.load(f)

    n_user = len(dict_data['user'])
    n_item = len(dict_data['item'])

    data['user']  = data['user'].map(lambda x : dict_data['user'][str(x)])
    data['item']  = data['item'].map(lambda x : dict_data['item'][str(x)])
    
    return data, n_user, n_item

In [22]:
def recall_at_k(actual, predicted, topk):
    sum_recall = 0.0
    num_users = len(predicted)
    true_users = 0
    for i in range(num_users):
        act_set = set(actual[i])
        pred_set = set(predicted[i][:topk])
        if len(act_set) != 0:
            sum_recall += len(act_set & pred_set) / float(len(act_set))
            true_users += 1
    return sum_recall / true_users

In [23]:
def get_valid_score(all_preds, data) :
    all_preds = torch.cat(all_preds).detach().cpu().numpy()
    data['preds'] = all_preds

    user_group_dfs = list(data.groupby(by='user'))

    predicted = []
    actual = []

    for _, user_df in user_group_dfs :
        recommends = np.array(user_df.nlargest(10, ['preds'])['item'])
        predicted.append(recommends)
        ground_truth = np.array(user_df[user_df['rating'] == 1]['item'])
        actual.append(ground_truth)
    
    return recall_at_k(actual, predicted, 10)

In [24]:
class BPRDataset(Dataset):
    def __init__(self, data_path, num_negative=5, is_training=True, all_cases=False):
        super(BPRDataset, self).__init__()

        if all_cases :
            self.data = self.get_all_cases()
        else :
            self.data = pd.read_csv(data_path)
            if 'rating' not in self.data.columns :
                self.data = self.data[['user', 'item']].sort_values(by=['user'])
            else :
                self.data = self.data[['user', 'item', 'rating']].sort_values(by=['user'])

        self.data, self.n_user, self.n_item= zero_based_mapping(self.data)
        
        if is_training :
            self.get_sparse_matrix()

        self.num_negative = num_negative
        self.is_training = is_training
        self.features = self.data.values

    def negative_sampling(self):
        assert self.is_training, 'no need to sampling when testing'
        negative_samples = []
        
        for u, i in self.data.values:
            for _ in range(self.num_negative):
                j = np.random.randint(self.n_item)
                while (u, j) in self.train_matrix:
                    j = np.random.randint(self.n_item)
                negative_samples.append([u, i, j])
        
        self.features = negative_samples
    
    def __len__(self):
        if self.is_training :
            return self.num_negative * len(self.data)
        return len(self.data)

    def __getitem__(self, idx):
        user = self.features[idx][0]
        item_i = self.features[idx][1]
        item_j = self.features[idx][2] if \
				self.is_training else self.features[idx][1]
        return user, item_i, item_j 
    
    def get_sparse_matrix(self):
        train_matrix = dok_matrix((self.n_user, self.n_item), dtype=np.float32)
        for u, i in self.data.values:
            train_matrix[u, i] = 1.0
        self.train_matrix = train_matrix
    
    def get_all_cases(self):
        # Extract Top Most Popular movies
        train_df = pd.read_csv('/opt/ml/movie-recommendation/data/train/train_ratings.csv')

        items = set(train_df['item'])
        observed_items_per_user = list(train_df.groupby('user')['item'])

        # 각 유저마다 안본 영화만 선택
        unseen_items_dfs = list()

        for user, observed_items in observed_items_per_user:
            observed_items = set(observed_items)
            unseen_item = list(items - observed_items)

            user_id = [user]*len(unseen_item)
            unseen_items_dfs.append(pd.DataFrame(zip(user_id,unseen_item), columns=['user','item']))

        test_df = pd.concat(unseen_items_dfs, axis = 0, sort=False)

        test_df = test_df.sort_values(by=['user'])
        test_df.reset_index(drop=True, inplace=True)
        return test_df

In [25]:
class ContextualBPRDataset(BPRDataset):
    def __init__(self, data_path, num_negative=5, is_training=True, all_cases=False):
        super(ContextualBPRDataset, self).__init__(
            data_path=data_path,
            num_negative=num_negative,
            is_training=is_training,
            all_cases=all_cases
        )

        self.item_context = self.get_item_context()

    def __getitem__(self, idx):
        user = self.features[idx][0]
        item_i = self.features[idx][1]
        item_j = self.features[idx][2] if \
				self.is_training else self.features[idx][1]
        context_i = self.item_context[item_i]
        context_j = self.item_context[item_j]

        return user, item_i, item_j, context_i, context_j
    
    def get_item_context(self):
        data_dir = '/opt/ml/movie-recommendation/data/train/'

        with open(data_dir+'item2attributes.json', 'r') as f:
            item2attributes = json.load(f)

        attributes = []

        for item in range(6807):    
            attribute = [0] * 18
            now_attribute = item2attributes[str(item)]
            for a in now_attribute[1:]:
                attribute[a] = 1
            attributes.append([now_attribute[0]]+attribute)
        
        return np.array(attributes)

In [26]:
class BPR(nn.Module):
	def __init__(self, user_num, item_num, factor_num):
		super(BPR, self).__init__()
		"""
		user_num: number of users;
		item_num: number of items;
		factor_num: number of predictive factors.
		"""		
		self.embed_user = nn.Embedding(user_num, factor_num)
		self.embed_item = nn.Embedding(item_num, factor_num)

		nn.init.normal_(self.embed_user.weight, std=0.01)
		nn.init.normal_(self.embed_item.weight, std=0.01)

	def forward(self, user, item_i, item_j):
		user = self.embed_user(user)
		item_i = self.embed_item(item_i)
		item_j = self.embed_item(item_j)

		prediction_i = (user * item_i).sum(dim=-1)
		prediction_j = (user * item_j).sum(dim=-1)
		return prediction_i, prediction_j

In [27]:
class EmbeddingLayer(nn.Module):
    def __init__(self, input_dim, embedding_dim, field_num, offsets):
        super(EmbeddingLayer, self).__init__()

        self.field_num = field_num
        self.offsets = torch.tensor(offsets, device='cuda')
        self.embedding = nn.Embedding(input_dim+1, embedding_dim, padding_idx=self.offsets[-1])

    def forward(self, x):
        one_hot_x = x[:,:self.field_num-1]
        multi_hot_x = x[:,self.field_num-1:].clone()

        embed_x = self.embedding(one_hot_x + self.offsets[:-1])

        sum_embed = []

        indices = multi_hot_x.nonzero()
        multi_hot_x[indices[:,0], indices[:,1]] = indices[:,1]+1
        embed = self.embedding(multi_hot_x + self.offsets[-1])
        sum_embed = torch.sum(embed, axis=1)

        embed_x= torch.cat([embed_x, sum_embed.unsqueeze(1)], axis=1)

        return embed_x

In [28]:
class ContextualBPR(BPR):
    def __init__(self, user_num, item_num, factor_num, context_dims=[12,18]):
        super(ContextualBPR, self).__init__(user_num,item_num,factor_num)

        field_num = len(context_dims)
        context_num = int(sum(context_dims))
        offsets = [0]+context_dims[:-1]

        self.total_embed_num = factor_num * field_num

        self.bias_item = nn.Parameter(torch.zeros(item_num))
        self.context_bias = EmbeddingLayer(context_num, 1, field_num, offsets)

        self.embed_context = EmbeddingLayer(context_num, factor_num, field_num, offsets)
        self.embed_user_context = nn.Embedding(user_num, self.total_embed_num)
    
    def forward(self, user, item_i, item_j, context_i, context_j):
        bpr_i, bpr_j = self.bpr(user, item_i, item_j)
        cbpr_i, cbpr_j = self.cbpr(user, context_i, context_j)

        return bpr_i + cbpr_i, bpr_j + cbpr_j
    
    def bpr(self, user, item_i, item_j):
        user = self.embed_user(user)
        b_i = self.bias_item[item_i]
        b_j = self.bias_item[item_j]
        
        item_i = self.embed_item(item_i)
        item_j = self.embed_item(item_j)
        
        prediction_i = (user * item_i).sum(dim=-1) + b_i
        prediction_j = (user * item_j).sum(dim=-1) + b_j

        return prediction_i, prediction_j
    
    def cbpr(self, user, context_i, context_j):
        context_i_bias = torch.sum(self.context_bias(context_i), dim=1).squeeze()
        context_j_bias = torch.sum(self.context_bias(context_j), dim=1).squeeze()        

        context_i = self.embed_context(context_i).view(-1, self.total_embed_num)
        context_j = self.embed_context(context_j).view(-1, self.total_embed_num)

        context_user = self.embed_user_context(user)

        prediction_i = (context_user * context_i).sum(dim=-1) + context_i_bias
        prediction_j = (context_user * context_j).sum(dim=-1) + context_j_bias
    
        return prediction_i, prediction_j
    

In [29]:
class FM(nn.Module):
    def __init__(self, input_dims, embedding_dim):
        super(FM, self).__init__()
        self.field_num = len(input_dims)
        total_input_dim = int(sum(input_dims))
        self.offsets = [0]+input_dims[:-1]

        self.bias = nn.Parameter(torch.zeros((1,)))
        self.fc = EmbeddingLayer(total_input_dim, 1, self.field_num, self.offsets)
        
        self.embedding = EmbeddingLayer(total_input_dim, embedding_dim, self.field_num, self.offsets)
        self.embedding_dim = self.field_num * embedding_dim

    def fm(self, x, embed_x):
        fm_y = self.bias + torch.sum(self.fc(x), dim=1)
        square_of_sum = torch.sum(embed_x, dim=1) ** 2         
        sum_of_square = torch.sum(embed_x ** 2, dim=1)
        fm_y += 0.5 * torch.sum(square_of_sum - sum_of_square, dim=1, keepdim=True)
        return fm_y

    def forward(self, x):
        #embedding component
        embed_x = self.embedding(x)
        #fm component
        fm_y = self.fm(x, embed_x).squeeze(1)

        y = torch.sigmoid(fm_y)
        return y

In [30]:
def load_model(saved_model, device):
    model = FM(
        input_dims=[31360,6807,12,18],
        embedding_dim=10
    )
    model_path = os.path.join(saved_model, 'best.pth')
    model.load_state_dict(torch.load(model_path, map_location=device))

    for param in model.parameters():
        param.requires_grad=False

    return model

In [31]:
class ContextualBPRv2(BPR):
    def __init__(self, user_num, item_num, factor_num, context_dim=10):
        super(ContextualBPRv2, self).__init__(user_num, item_num, factor_num)

        self.fm = load_model('/opt/ml/movie-recommendation/BPR/model/exp13/', device)
        for _ in filter(lambda p: p.requires_grad, self.fm.parameters()) :
            assert 'Freezing doesn\'t work'

        self.bias_item = nn.Parameter(torch.zeros(item_num))
        self.context_bias = nn.Linear(context_dim, 1, bias=False)

        self.embed_context = nn.Linear(context_dim, factor_num, bias=False)
        self.embed_user_context = nn.Embedding(user_num, factor_num)
    
    def forward(self, user, item_i, item_j, context_i, context_j):
        bpr_i, bpr_j = self.bpr(user, item_i, item_j)
        
        context_i = self.extract_features(torch.cat([user.unsqueeze(1), item_i.unsqueeze(1), context_i], dim=1))
        context_j = self.extract_features(torch.cat([user.unsqueeze(1), item_j.unsqueeze(1), context_j], dim=1))

        cbpr_i, cbpr_j = self.cbpr(user, context_i, context_j)

        return bpr_i + cbpr_i, bpr_j + cbpr_j
    
    def bpr(self, user, item_i, item_j):
        user = self.embed_user(user)
        b_i = self.bias_item[item_i]
        b_j = self.bias_item[item_j]
        
        item_i = self.embed_item(item_i)
        item_j = self.embed_item(item_j)
        
        prediction_i = (user * item_i).sum(dim=-1) + b_i
        prediction_j = (user * item_j).sum(dim=-1) + b_j

        return prediction_i, prediction_j
    
    def cbpr(self, user, context_i, context_j):
        context_i_bias = torch.sum(self.context_bias(context_i), dim=1).squeeze()
        context_j_bias = torch.sum(self.context_bias(context_j), dim=1).squeeze()        

        context_i = self.embed_context(context_i)
        context_j = self.embed_context(context_j)

        context_user = self.embed_user_context(user)

        prediction_i = (context_user * context_i).sum(dim=-1) + context_i_bias
        prediction_j = (context_user * context_j).sum(dim=-1) + context_j_bias
    
        return prediction_i, prediction_j
    
    def extract_features(self, x):
        embed_x = self.fm.embedding(x)
        bias = self.fm.bias + torch.sum(self.fm.fc(x), dim=1)
        square_of_sum = torch.sum(embed_x, dim=1) ** 2         
        sum_of_square = torch.sum(embed_x ** 2, dim=1)
        features = 0.5 * (square_of_sum - sum_of_square) + bias
        
        return features

In [11]:
data_dir = '/opt/ml/movie-recommendation/data/train/bpr/'
train_dataset = ContextualBPRDataset(data_dir + 'train.csv', num_negative=10)
valid_dataset = ContextualBPRDataset(data_dir + 'valid.csv', is_training=False)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ContextualBPRv2(train_dataset.n_user, train_dataset.n_item, 10, 10).to(device)

train_loader = DataLoader(
    train_dataset, 
    batch_size=1024, 
    shuffle=True, 
    drop_last=True, 
    num_workers=4
)

valid_loader = DataLoader(
    valid_dataset, 
    batch_size=1024, 
    shuffle=False, 
    drop_last=False, 
    num_workers=4
)

optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), 
    lr=0.001
)

In [15]:
epochs = 10
for epoch in range(epochs):
	model.train() 
	train_loader.dataset.negative_sampling()

	for user, item_i, item_j, context_i, context_j in train_loader:
		user = user.to(device)
		item_i = item_i.to(device)
		item_j = item_j.to(device)
		context_i = context_i.to(device)
		context_j = context_j.to(device)

		model.zero_grad()
		prediction_i, prediction_j = model(user, item_i, item_j, context_i, context_j)
		loss =- (prediction_i - prediction_j).sigmoid().log().sum()
		loss.backward()
		optimizer.step()
	
	print(f"Calculating validation results...{epoch}/{epochs}")

	with torch.no_grad():
		model.eval()

		all_preds = []
		for user, item_i, item_j, context_i, context_j in valid_loader:
			user = user.to(device)
			item_i = item_i.to(device)
			item_j = item_j.to(device)
			context_i = context_i.to(device)
			context_j = context_j.to(device)

			prediction_i, prediction_j = model(user, item_i, item_j, context_i, context_j)

			all_preds.append(prediction_i)
		
		val_recall_k = get_valid_score(all_preds, valid_dataset.data)

	print(f"[Val] recall@10 : {val_recall_k:4.4%}")

Calculating validation results...
[Val] recall@10 : 45.2105%
Calculating validation results...
[Val] recall@10 : 45.4323%
Calculating validation results...
[Val] recall@10 : 45.5011%
Calculating validation results...
[Val] recall@10 : 45.5560%
Calculating validation results...
[Val] recall@10 : 45.5850%
Calculating validation results...
[Val] recall@10 : 45.5024%
Calculating validation results...
[Val] recall@10 : 45.5563%
Calculating validation results...
[Val] recall@10 : 45.6143%
Calculating validation results...
[Val] recall@10 : 45.5441%


KeyboardInterrupt: 