We tried for around 20 times on Kaggle, and showed part of them here. The best trial would be our trial 19 as seen on Kaggle with a loss score as 0.41989.

In [14]:
import pandas as pd
import numpy as np
import torch 
import torch.autograd as autograd 
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

In [69]:
item = pd.read_csv('item_feature.csv')
train = pd.read_csv('training.csv')
df = train.merge(item, on = 'item_id', how = 'left')
df['label'] =1

In [70]:
# Random Negative Sample
u = np.random.randint(low=0.0, high=df.user_id.max(), size=int(len(df)*3))
i = np.random.randint(low=0.0, high=df.item_id.max(), size=int(len(df)*3))
c = np.random.randint(low=0.0, high=df.context_feature_id.max(), size=int(len(df)*3))

In [71]:
sample= pd.concat([pd.Series(u),pd.Series(i),pd.Series(c)], axis =1).\
rename(columns={0:'user_id', 1:'item_id', 2:'context_feature_id'})
sample = sample.merge(item, on = 'item_id', how = 'left')
sample['label'] = 0
df = pd.concat([df,sample])
df = df.drop_duplicates(subset=['user_id','item_id']).reset_index(drop = True)

In [72]:
df

Unnamed: 0,user_id,item_id,context_feature_id,item_feature_id,label
0,0,28366,2,7,1
1,0,16109,2,7,1
2,0,11500,3,7,1
3,0,20750,2,7,1
4,0,8759,2,7,1
...,...,...,...,...,...
3772931,13279,39020,1,138,0
3772932,161997,28197,2,142,0
3772933,189188,25692,2,138,0
3772934,37728,3128,1,148,0


In [73]:
pos = df[df.label ==1].reset_index(drop = True)
neg = df[df.label ==0].reset_index(drop = True)

In [74]:
# Define sample function for multiple sampling every 2 or 3 epoch in training/val
# for the purpose of cross validation
def data_sample(pos, neg):

    msk = np.random.rand(len(pos)) < 0.8
    train_pos = pos[msk].reset_index(drop = True)
    val_pos = pos[~msk].reset_index(drop = True)

    msk = np.random.rand(len(neg)) < 0.8
    train_neg = neg[msk].sample(frac = len(pos)/len(neg)).reset_index(drop = True)
    val_neg = neg[~msk].sample(frac = len(pos)/len(neg)).reset_index(drop = True)

    train = pd.concat([train_pos, train_neg]).sample(frac=1).reset_index(drop = True)
    val = pd.concat([val_pos, val_neg]).sample(frac=1).reset_index(drop = True)

    return train, val

In [58]:
train, val = data_sample(pos, neg)

### Model 1: Basic Matrix Factorization Model

In [75]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100, seed=23):
        super(MF, self).__init__()
        torch.manual_seed(seed)
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.item_bias = nn.Embedding(num_items, 1)
        self.user_emb.weight.data.uniform_(0, 0.05)
        self.item_emb.weight.data.uniform_(0, 0.05)
        self.user_bias.weight.data.uniform_(-0.01, 0.01)
        self.item_bias.weight.data.uniform_(-0.01, 0.01)

    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        b_u = self.user_bias(u).squeeze()
        b_v = self.item_bias(v).squeeze()
        return torch.sigmoid((U * V).sum(1) + b_u + b_v)

In [76]:
def train_one_epoch(model, train_df, optimizer):
    """ Trains the model for one epoch"""
    model.train()
    y = torch.FloatTensor(train_df.label.values)
    u = torch.LongTensor(train_df.user_id.values)
    v = torch.LongTensor(train_df.item_id.values)
    y_hat = model(u,v)
    output = torch.as_tensor(y_hat > 0.5, dtype = torch.int8)
    train_acc = accuracy_score(output,y)
    train_loss = F.binary_cross_entropy(y_hat, y)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    return train_loss.item(), train_acc

def valid_metrics(model, valid_df):
    """Computes validation loss and accuracy"""
    model.eval()
    u = torch.LongTensor(valid_df.user_id.values)
    v = torch.LongTensor(valid_df.item_id.values)
    y = torch.FloatTensor(valid_df.label.values)
    y_hat = model(u,v)
    valid_loss = F.binary_cross_entropy(y_hat, y)
    output = torch.as_tensor(y_hat > 0.5, dtype = torch.int8)
    auc = roc_auc_score( y.detach().numpy(), y_hat.detach().numpy())
    valid_acc = accuracy_score(output,y)
    return valid_loss.item(), valid_acc, auc

def training(model, pos, neg, epochs=10, lr=0.01, wd=0.0):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    train, val = data_sample(pos, neg)
    for i in range(epochs):
        train_loss, train_acc = train_one_epoch(model, train, optimizer)
        valid_loss, valid_acc, auc = valid_metrics(model, val) 
        if i%5==0: 
            print("train loss %.3f train acc %.3f valid loss %.3f valid acc %.3f roc auc acc %.3f" % (train_loss,train_acc,valid_loss, valid_acc, auc)) 
        if i%3 == 0: 
            train, val = data_sample(pos, neg)

In [11]:
model = MF(df.user_id.max()+1, df.item_id.max()+1, emb_size=50) 
#optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
training(model, pos, neg, epochs=51, lr=0.1, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.687 valid acc 0.568 roc auc acc 0.496
train loss 0.302 train acc 0.885 valid loss 0.318 valid acc 0.875 roc auc acc 0.941
train loss 0.284 train acc 0.909 valid loss 0.291 valid acc 0.909 roc auc acc 0.967
train loss 0.208 train acc 0.956 valid loss 0.257 valid acc 0.928 roc auc acc 0.975
train loss 0.192 train acc 0.961 valid loss 0.216 valid acc 0.947 roc auc acc 0.986
train loss 0.188 train acc 0.960 valid loss 0.193 valid acc 0.956 roc auc acc 0.991
train loss 0.161 train acc 0.971 valid loss 0.207 valid acc 0.943 roc auc acc 0.987
train loss 0.176 train acc 0.963 valid loss 0.198 valid acc 0.951 roc auc acc 0.990
train loss 0.186 train acc 0.961 valid loss 0.192 valid acc 0.958 roc auc acc 0.992
train loss 0.169 train acc 0.971 valid loss 0.211 valid acc 0.946 roc auc acc 0.988
train loss 0.181 train acc 0.965 valid loss 0.201 valid acc 0.953 roc auc acc 0.991


In [12]:
training(model, pos, neg, epochs=51, lr=0.01, wd=1e-6)

train loss 0.187 train acc 0.962 valid loss 0.189 valid acc 0.961 roc auc acc 0.993
train loss 0.181 train acc 0.966 valid loss 0.185 valid acc 0.964 roc auc acc 0.995
train loss 0.182 train acc 0.967 valid loss 0.183 valid acc 0.967 roc auc acc 0.996
train loss 0.178 train acc 0.969 valid loss 0.185 valid acc 0.965 roc auc acc 0.995
train loss 0.178 train acc 0.968 valid loss 0.181 valid acc 0.967 roc auc acc 0.996
train loss 0.178 train acc 0.969 valid loss 0.178 valid acc 0.969 roc auc acc 0.996
train loss 0.175 train acc 0.970 valid loss 0.181 valid acc 0.967 roc auc acc 0.996
train loss 0.175 train acc 0.970 valid loss 0.178 valid acc 0.969 roc auc acc 0.996
train loss 0.175 train acc 0.970 valid loss 0.176 valid acc 0.970 roc auc acc 0.997
train loss 0.173 train acc 0.971 valid loss 0.178 valid acc 0.969 roc auc acc 0.996
train loss 0.174 train acc 0.971 valid loss 0.177 valid acc 0.969 roc auc acc 0.997


In [15]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
# see how many probability prediction are over 0.5
sum(prob.rating>0.5)/len(prob)

0.42271720177773114

In [16]:
prob.to_csv('trial4.csv',index = False)

### Model 2 (best so far)
### Add ReLU and Dropout 

In [77]:
class MF2(nn.Module):
    def __init__(self, num_users, num_items, emb_size=20):
        super(MF2, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.item_bias = nn.Embedding(num_items, 1)
        # init 
        self.user_emb.weight.data.uniform_(0,0.05)
        self.item_emb.weight.data.uniform_(0,0.05)
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.item_bias.weight.data.uniform_(-0.01,0.01)
        self.classifier = nn.Sigmoid()
        self.nonlin = nn.ReLU()
        self.drop = nn.Dropout(p = 0.1)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        U = self.nonlin(U)
        V = self.item_emb(v)
        V = self.drop(V)
        b_u = self.user_bias(u).squeeze()
        b_v = self.item_bias(v).squeeze()
        return self.classifier((U*V).sum(1) +  b_u  + b_v)

In [19]:
model2 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=75) 
training(model2, pos, neg, epochs=101, lr=0.1, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.617 valid acc 0.832 roc auc acc 0.866
train loss 0.291 train acc 0.884 valid loss 0.331 valid acc 0.877 roc auc acc 0.942
train loss 0.292 train acc 0.907 valid loss 0.295 valid acc 0.907 roc auc acc 0.968
train loss 0.248 train acc 0.929 valid loss 0.282 valid acc 0.910 roc auc acc 0.967
train loss 0.235 train acc 0.928 valid loss 0.252 valid acc 0.917 roc auc acc 0.974
train loss 0.234 train acc 0.927 valid loss 0.238 valid acc 0.925 roc auc acc 0.978
train loss 0.213 train acc 0.941 valid loss 0.250 valid acc 0.920 roc auc acc 0.974
train loss 0.217 train acc 0.941 valid loss 0.234 valid acc 0.932 roc auc acc 0.980
train loss 0.218 train acc 0.942 valid loss 0.221 valid acc 0.941 roc auc acc 0.985
train loss 0.199 train acc 0.952 valid loss 0.234 valid acc 0.932 roc auc acc 0.981
train loss 0.206 train acc 0.950 valid loss 0.221 valid acc 0.941 roc auc acc 0.985
train loss 0.210 train acc 0.948 valid loss 0.213 valid acc 0.946 roc auc ac

In [20]:
training(model2, pos, neg, epochs=51, lr=0.01, wd=1e-6)

train loss 0.205 train acc 0.951 valid loss 0.205 valid acc 0.951 roc auc acc 0.990
train loss 0.202 train acc 0.953 valid loss 0.204 valid acc 0.953 roc auc acc 0.991
train loss 0.203 train acc 0.953 valid loss 0.202 valid acc 0.954 roc auc acc 0.992
train loss 0.200 train acc 0.955 valid loss 0.203 valid acc 0.953 roc auc acc 0.991
train loss 0.200 train acc 0.954 valid loss 0.201 valid acc 0.954 roc auc acc 0.992
train loss 0.200 train acc 0.954 valid loss 0.199 valid acc 0.955 roc auc acc 0.992
train loss 0.198 train acc 0.956 valid loss 0.200 valid acc 0.955 roc auc acc 0.992
train loss 0.198 train acc 0.956 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.198 train acc 0.956 valid loss 0.197 valid acc 0.956 roc auc acc 0.993
train loss 0.196 train acc 0.957 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.197 train acc 0.957 valid loss 0.196 valid acc 0.957 roc auc acc 0.993


In [21]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model2(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.4084245578614786

In [22]:
prob.to_csv('trial5.csv',index = False)

### Tuning parameters
#### Change the resample frequency from every 3 epochs to 2.
#### Play around with other parameters: epoch, embedding size, learning rate, weight decay...
#### We show our trial 8, 11, 13, 14, 15 here. Trial 13 perfoms the best with a loss score as 0.42518.

In [78]:
def train_one_epoch(model, train_df, optimizer):
    """ Trains the model for one epoch"""
    model.train()
    y = torch.FloatTensor(train_df.label.values)
    u = torch.LongTensor(train_df.user_id.values)
    v = torch.LongTensor(train_df.item_id.values)
    y_hat = model(u,v)
    output = torch.as_tensor(y_hat > 0.5, dtype = torch.int8)
    train_acc = accuracy_score(output,y)
    train_loss = F.binary_cross_entropy(y_hat, y)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    return train_loss.item(), train_acc

def valid_metrics(model, valid_df):
    """Computes validation loss and accuracy"""
    model.eval()
    u = torch.LongTensor(valid_df.user_id.values)
    v = torch.LongTensor(valid_df.item_id.values)
    y = torch.FloatTensor(valid_df.label.values)
    y_hat = model(u,v)
    valid_loss = F.binary_cross_entropy(y_hat, y)
    output = torch.as_tensor(y_hat > 0.5, dtype = torch.int8)
    auc = roc_auc_score( y.detach().numpy(), y_hat.detach().numpy())
    valid_acc = accuracy_score(output,y)
    return valid_loss.item(), valid_acc, auc

def training(model, pos, neg, epochs=10, lr=0.01, wd=0.0):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    train, val = data_sample(pos, neg)
    for i in range(epochs):
        train_loss, train_acc = train_one_epoch(model, train, optimizer)
        valid_loss, valid_acc, auc = valid_metrics(model, val) 
        if i%10== 0:
            print("train loss %.3f train acc %.3f valid loss %.3f valid acc %.3f roc auc acc %.3f" % (train_loss,train_acc,valid_loss, valid_acc, auc)) 
        if i%2 == 0: 
            train, val = data_sample(pos, neg)
        

In [24]:
model_8 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=75) 
training(model_8, pos, neg, epochs=126, lr=0.1, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.618 valid acc 0.831 roc auc acc 0.865
train loss 0.285 train acc 0.909 valid loss 0.306 valid acc 0.902 roc auc acc 0.964
train loss 0.233 train acc 0.929 valid loss 0.250 valid acc 0.919 roc auc acc 0.975
train loss 0.218 train acc 0.939 valid loss 0.234 valid acc 0.931 roc auc acc 0.980
train loss 0.207 train acc 0.948 valid loss 0.223 valid acc 0.939 roc auc acc 0.984
train loss 0.202 train acc 0.952 valid loss 0.218 valid acc 0.943 roc auc acc 0.987
train loss 0.200 train acc 0.954 valid loss 0.215 valid acc 0.945 roc auc acc 0.988
train loss 0.199 train acc 0.954 valid loss 0.213 valid acc 0.947 roc auc acc 0.988
train loss 0.198 train acc 0.955 valid loss 0.212 valid acc 0.947 roc auc acc 0.988
train loss 0.198 train acc 0.955 valid loss 0.211 valid acc 0.947 roc auc acc 0.988
train loss 0.198 train acc 0.955 valid loss 0.211 valid acc 0.947 roc auc acc 0.988
train loss 0.197 train acc 0.955 valid loss 0.211 valid acc 0.947 roc auc ac

In [25]:
training(model_8, pos, neg, epochs=126, lr=0.001, wd=1e-6)

train loss 0.202 train acc 0.953 valid loss 0.200 valid acc 0.955 roc auc acc 0.991
train loss 0.201 train acc 0.953 valid loss 0.199 valid acc 0.955 roc auc acc 0.992
train loss 0.201 train acc 0.954 valid loss 0.199 valid acc 0.955 roc auc acc 0.992
train loss 0.200 train acc 0.954 valid loss 0.199 valid acc 0.956 roc auc acc 0.992
train loss 0.200 train acc 0.955 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.200 train acc 0.955 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.199 train acc 0.955 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.199 train acc 0.955 valid loss 0.198 valid acc 0.956 roc auc acc 0.993
train loss 0.199 train acc 0.955 valid loss 0.197 valid acc 0.956 roc auc acc 0.993
train loss 0.199 train acc 0.956 valid loss 0.197 valid acc 0.957 roc auc acc 0.993
train loss 0.198 train acc 0.956 valid loss 0.197 valid acc 0.957 roc auc acc 0.993
train loss 0.198 train acc 0.956 valid loss 0.197 valid acc 0.957 roc auc ac

In [26]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_8(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating > 0.5) / len(prob)

0.4057002766233596

In [27]:
prob.to_csv('trial8.csv',index = False)

In [28]:
model_9 = MF2(df.user_id.max()+1, df.item_id.max()+1,emb_size=75) 
training(model_9, pos, neg, epochs=126, lr=0.1, wd=5e-07)

train loss 0.693 train acc 0.500 valid loss 0.599 valid acc 0.842 roc auc acc 0.865
train loss 0.224 train acc 0.933 valid loss 0.244 valid acc 0.926 roc auc acc 0.976
train loss 0.144 train acc 0.969 valid loss 0.159 valid acc 0.962 roc auc acc 0.991
train loss 0.128 train acc 0.979 valid loss 0.145 valid acc 0.971 roc auc acc 0.995
train loss 0.118 train acc 0.982 valid loss 0.132 valid acc 0.975 roc auc acc 0.996
train loss 0.113 train acc 0.985 valid loss 0.126 valid acc 0.978 roc auc acc 0.997
train loss 0.110 train acc 0.986 valid loss 0.123 valid acc 0.980 roc auc acc 0.998
train loss 0.109 train acc 0.986 valid loss 0.122 valid acc 0.981 roc auc acc 0.998
train loss 0.108 train acc 0.986 valid loss 0.121 valid acc 0.981 roc auc acc 0.998
train loss 0.108 train acc 0.987 valid loss 0.120 valid acc 0.981 roc auc acc 0.998
train loss 0.107 train acc 0.987 valid loss 0.119 valid acc 0.982 roc auc acc 0.998
train loss 0.107 train acc 0.987 valid loss 0.119 valid acc 0.981 roc auc ac

In [29]:
training(model_9, pos, neg, epochs=126, lr=0.00001, wd=5e-07)

train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.108 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.108 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.108 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.108 valid acc 0.987 roc auc acc 0.999
train loss 0.111 train acc 0.985 valid loss 0.109 valid acc 0.987 roc auc ac

In [30]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_9(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.3593167009714593

In [31]:
prob.to_csv('trial11.csv',index = False)

In [32]:
model_13 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=100) 
training(model_13, pos, neg, epochs=126, lr=.09, wd=1e-6)

train loss 0.694 train acc 0.500 valid loss 0.618 valid acc 0.831 roc auc acc 0.864
train loss 0.284 train acc 0.911 valid loss 0.306 valid acc 0.902 roc auc acc 0.965
train loss 0.231 train acc 0.932 valid loss 0.249 valid acc 0.921 roc auc acc 0.976
train loss 0.214 train acc 0.941 valid loss 0.232 valid acc 0.932 roc auc acc 0.981
train loss 0.205 train acc 0.949 valid loss 0.221 valid acc 0.940 roc auc acc 0.985
train loss 0.200 train acc 0.953 valid loss 0.216 valid acc 0.944 roc auc acc 0.987
train loss 0.198 train acc 0.955 valid loss 0.213 valid acc 0.946 roc auc acc 0.988
train loss 0.197 train acc 0.955 valid loss 0.213 valid acc 0.947 roc auc acc 0.988
train loss 0.197 train acc 0.956 valid loss 0.211 valid acc 0.948 roc auc acc 0.989
train loss 0.196 train acc 0.956 valid loss 0.211 valid acc 0.948 roc auc acc 0.989
train loss 0.196 train acc 0.956 valid loss 0.210 valid acc 0.949 roc auc acc 0.989
train loss 0.196 train acc 0.956 valid loss 0.209 valid acc 0.949 roc auc ac

In [33]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_13(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.4047432384598241

In [34]:
prob.to_csv('trial13.csv',index = False) 

In [35]:
model_14 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=60) 
training(model_14, pos, neg, epochs=251, lr=.1, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.624 valid acc 0.830 roc auc acc 0.868
train loss 0.286 train acc 0.906 valid loss 0.306 valid acc 0.900 roc auc acc 0.964
train loss 0.238 train acc 0.926 valid loss 0.252 valid acc 0.918 roc auc acc 0.974
train loss 0.221 train acc 0.936 valid loss 0.237 valid acc 0.928 roc auc acc 0.979
train loss 0.210 train acc 0.946 valid loss 0.225 valid acc 0.938 roc auc acc 0.984
train loss 0.203 train acc 0.951 valid loss 0.218 valid acc 0.942 roc auc acc 0.986
train loss 0.202 train acc 0.953 valid loss 0.215 valid acc 0.944 roc auc acc 0.987
train loss 0.200 train acc 0.953 valid loss 0.214 valid acc 0.945 roc auc acc 0.988
train loss 0.200 train acc 0.953 valid loss 0.212 valid acc 0.947 roc auc acc 0.988
train loss 0.199 train acc 0.954 valid loss 0.212 valid acc 0.947 roc auc acc 0.988
train loss 0.199 train acc 0.954 valid loss 0.212 valid acc 0.947 roc auc acc 0.988
train loss 0.199 train acc 0.954 valid loss 0.211 valid acc 0.947 roc auc ac

In [36]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_14(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.40841406977201516

In [37]:
prob.to_csv('trial14.csv',index = False)

In [54]:
model_15 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=125) 
training(model_15, pos, neg, epochs=251, lr=.09, wd=1e-6)

train loss 0.694 train acc 0.500 valid loss 0.611 valid acc 0.834 roc auc acc 0.863
train loss 0.284 train acc 0.913 valid loss 0.306 valid acc 0.905 roc auc acc 0.965
train loss 0.228 train acc 0.934 valid loss 0.248 valid acc 0.922 roc auc acc 0.976
train loss 0.212 train acc 0.943 valid loss 0.230 valid acc 0.933 roc auc acc 0.981
train loss 0.202 train acc 0.951 valid loss 0.220 valid acc 0.940 roc auc acc 0.985
train loss 0.198 train acc 0.954 valid loss 0.215 valid acc 0.945 roc auc acc 0.987
train loss 0.197 train acc 0.955 valid loss 0.213 valid acc 0.946 roc auc acc 0.988
train loss 0.195 train acc 0.956 valid loss 0.212 valid acc 0.947 roc auc acc 0.989
train loss 0.195 train acc 0.957 valid loss 0.211 valid acc 0.948 roc auc acc 0.989
train loss 0.195 train acc 0.957 valid loss 0.211 valid acc 0.948 roc auc acc 0.989
train loss 0.195 train acc 0.957 valid loss 0.210 valid acc 0.949 roc auc acc 0.989
train loss 0.195 train acc 0.957 valid loss 0.209 valid acc 0.949 roc auc ac

In [55]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_15(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.40132412129475464

In [56]:
prob.to_csv('trial15.csv',index = False)

## Model3: Add more ReLU and Dropout layer
### Trial 16

In [79]:
class MF3(nn.Module):
    def __init__(self, num_users, num_items, emb_size=20):
        super(MF3, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.item_bias = nn.Embedding(num_items, 1)
        # init 
        self.user_emb.weight.data.uniform_(0,0.05)
        self.item_emb.weight.data.uniform_(0,0.05)
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.item_bias.weight.data.uniform_(-0.01,0.01)
        self.classifier = nn.Sigmoid()
        self.nonlin = nn.ReLU()
        self.nonlin2 = nn.ReLU()
        self.drop = nn.Dropout(p = 0.05)
        self.drop2 = nn.Dropout(p = 0.05)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        U = self.nonlin(U)
        U = self.drop(U)
        V = self.item_emb(v)
        V = self.nonlin2(V)
        V = self.drop2(V)
        b_u = self.user_bias(u).squeeze()
        b_v = self.item_bias(v).squeeze()
        return self.classifier((U*V).sum(1) +  b_u  + b_v)

In [58]:
model_16 = MF3(df.user_id.max()+1, df.item_id.max()+1, emb_size=75) 
training(model_16, pos, neg, epochs=76, lr=.1, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.629 valid acc 0.773 roc auc acc 0.852
train loss 0.325 train acc 0.891 valid loss 0.336 valid acc 0.886 roc auc acc 0.953
train loss 0.261 train acc 0.906 valid loss 0.277 valid acc 0.900 roc auc acc 0.965
train loss 0.253 train acc 0.916 valid loss 0.266 valid acc 0.911 roc auc acc 0.970
train loss 0.241 train acc 0.928 valid loss 0.253 valid acc 0.922 roc auc acc 0.976
train loss 0.227 train acc 0.938 valid loss 0.238 valid acc 0.933 roc auc acc 0.981
train loss 0.221 train acc 0.943 valid loss 0.233 valid acc 0.937 roc auc acc 0.983
train loss 0.220 train acc 0.945 valid loss 0.231 valid acc 0.939 roc auc acc 0.984


In [59]:
training(model_16, pos, neg, epochs=76, lr=.001, wd=1e-6)

train loss 0.223 train acc 0.943 valid loss 0.221 valid acc 0.945 roc auc acc 0.987
train loss 0.222 train acc 0.944 valid loss 0.220 valid acc 0.945 roc auc acc 0.987
train loss 0.221 train acc 0.944 valid loss 0.219 valid acc 0.946 roc auc acc 0.987
train loss 0.220 train acc 0.945 valid loss 0.219 valid acc 0.946 roc auc acc 0.987
train loss 0.220 train acc 0.945 valid loss 0.218 valid acc 0.947 roc auc acc 0.988
train loss 0.220 train acc 0.945 valid loss 0.218 valid acc 0.947 roc auc acc 0.988
train loss 0.219 train acc 0.945 valid loss 0.218 valid acc 0.947 roc auc acc 0.988
train loss 0.219 train acc 0.946 valid loss 0.217 valid acc 0.947 roc auc acc 0.988


In [60]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_16(u,v)
y_hat

tensor([0.3365, 0.2597, 0.7683,  ..., 0.8741, 0.8741, 0.1681],
       grad_fn=<SigmoidBackward0>)

In [61]:
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.391179516761278

In [62]:
prob.to_csv('trial16.csv',index = False) # 12 and 13

## Revisit Model 2
### Trial 17 and 18

In [63]:
model_17 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=90) 
training(model_17, pos, neg, epochs=101, lr=.07, wd=1e-6) # trail 17

train loss 0.694 train acc 0.500 valid loss 0.639 valid acc 0.829 roc auc acc 0.867
train loss 0.285 train acc 0.901 valid loss 0.309 valid acc 0.894 roc auc acc 0.960
train loss 0.247 train acc 0.929 valid loss 0.256 valid acc 0.923 roc auc acc 0.977
train loss 0.221 train acc 0.938 valid loss 0.235 valid acc 0.931 roc auc acc 0.981
train loss 0.212 train acc 0.946 valid loss 0.224 valid acc 0.939 roc auc acc 0.985
train loss 0.203 train acc 0.951 valid loss 0.215 valid acc 0.944 roc auc acc 0.987
train loss 0.200 train acc 0.953 valid loss 0.211 valid acc 0.947 roc auc acc 0.989
train loss 0.198 train acc 0.955 valid loss 0.210 valid acc 0.948 roc auc acc 0.989
train loss 0.197 train acc 0.956 valid loss 0.208 valid acc 0.949 roc auc acc 0.990
train loss 0.196 train acc 0.956 valid loss 0.207 valid acc 0.950 roc auc acc 0.990
train loss 0.196 train acc 0.956 valid loss 0.208 valid acc 0.949 roc auc acc 0.990


In [64]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_17(u,v)
y_hat

tensor([0.3272, 0.2307, 0.6433,  ..., 0.7915, 0.7915, 0.1271],
       grad_fn=<SigmoidBackward0>)

In [65]:
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.41192495771988935

In [66]:
prob.to_csv('trial17.csv',index = False) 

In [67]:
training(model_17, pos, neg, epochs=51, lr=.0001, wd=1e-6) # trial 18

train loss 0.199 train acc 0.954 valid loss 0.198 valid acc 0.955 roc auc acc 0.992
train loss 0.200 train acc 0.954 valid loss 0.198 valid acc 0.955 roc auc acc 0.992
train loss 0.199 train acc 0.954 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.199 train acc 0.954 valid loss 0.198 valid acc 0.956 roc auc acc 0.992
train loss 0.199 train acc 0.955 valid loss 0.199 valid acc 0.956 roc auc acc 0.992
train loss 0.199 train acc 0.955 valid loss 0.198 valid acc 0.955 roc auc acc 0.992


In [68]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_17(u,v)
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.411948555921182

In [69]:
prob.to_csv('trial18.csv',index = False)

### Trial 19

Tuning the hyperparam in random negative sampling

In [80]:
item = pd.read_csv('item_feature.csv')
train = pd.read_csv('training.csv')
df = train.merge(item, on = 'item_id', how = 'left')
df['label'] = 1

In [81]:
# Random Negative Sample
u = np.random.randint(low=0.0, high=df.user_id.max(), size=int(len(df)*2))
i = np.random.randint(low=0.0, high=df.item_id.max(), size=int(len(df)*2))
c = np.random.randint(low=0.0, high=df.context_feature_id.max(), size=int(len(df)*2))

In [82]:
sample= pd.concat([pd.Series(u),pd.Series(i),pd.Series(c)], axis =1).\
rename(columns={0:'user_id', 1:'item_id', 2:'context_feature_id'})
sample = sample.merge(item, on = 'item_id', how = 'left')
sample['label'] = 0
df = pd.concat([df,sample])
df = df.drop_duplicates(subset=['user_id','item_id']).reset_index(drop = True)

In [83]:
pos = df[df.label ==1].reset_index(drop = True)
neg = df[df.label ==0].reset_index(drop = True)

In [84]:
model_18 = MF2(df.user_id.max()+1, df.item_id.max()+1, emb_size=75) # our best result with test data
training(model_18, pos, neg, epochs=126, lr=.12, wd=1e-6)

train loss 0.693 train acc 0.500 valid loss 0.601 valid acc 0.837 roc auc acc 0.865
train loss 0.271 train acc 0.918 valid loss 0.292 valid acc 0.908 roc auc acc 0.967
train loss 0.219 train acc 0.935 valid loss 0.242 valid acc 0.922 roc auc acc 0.976
train loss 0.208 train acc 0.947 valid loss 0.227 valid acc 0.937 roc auc acc 0.982
train loss 0.198 train acc 0.954 valid loss 0.218 valid acc 0.942 roc auc acc 0.986
train loss 0.195 train acc 0.957 valid loss 0.215 valid acc 0.945 roc auc acc 0.987
train loss 0.194 train acc 0.957 valid loss 0.213 valid acc 0.946 roc auc acc 0.988
train loss 0.193 train acc 0.958 valid loss 0.211 valid acc 0.948 roc auc acc 0.988
train loss 0.192 train acc 0.958 valid loss 0.211 valid acc 0.948 roc auc acc 0.988
train loss 0.192 train acc 0.958 valid loss 0.209 valid acc 0.948 roc auc acc 0.989
train loss 0.192 train acc 0.959 valid loss 0.210 valid acc 0.948 roc auc acc 0.988
train loss 0.192 train acc 0.958 valid loss 0.209 valid acc 0.949 roc auc ac

In [85]:
test = pd.read_csv('test_kaggle.csv')
u = torch.LongTensor(test.user_id.values)
v = torch.LongTensor(test.item_id.values)
y_hat = model_18(u,v)
y_hat

tensor([0.4645, 0.2722, 0.8072,  ..., 0.9325, 0.9325, 0.1507],
       grad_fn=<SigmoidBackward0>)

In [86]:
prob = pd.Series(y_hat.detach().numpy()).reset_index().rename(columns = {'index':'id',0:'rating'})
sum(prob.rating>0.5)/len(prob)

0.4235090525322181

In [70]:
prob.to_csv('trial19.csv',index = False)