In [1]:
%matplotlib inline
import torch 
import torch.autograd as autograd 
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd

# Import Data

In [2]:
item_feature = pd.read_csv('../data/item_feature.csv')
test_kaggle = pd.read_csv('../data/test_kaggle.csv')
training = pd.read_csv('../data/training.csv')
sample_submission = pd.read_csv('../data/sample_submission.csv')

In [3]:
# number of users we don't have in the train
len(set(test_kaggle.user_id.unique()) - set(training.user_id.unique()))

30455

# Data Preparation: Implicit Ratings to training data

In [4]:
# training.drop(columns=['context_feature_id'], inplace=True)
training.head()

Unnamed: 0,user_id,item_id,context_feature_id
0,0,28366,2
1,0,16109,2
2,0,11500,3
3,0,20750,2
4,0,8759,2


In [5]:
from collections import defaultdict
positive_item_dict = defaultdict(set)
user_items_list = [tuple(x) for x in training[['user_id', 'item_id']].to_numpy()]
for key, val in user_items_list:
    positive_item_dict[key].add(val)

In [6]:
all_itemIds = set(training['item_id'].unique())
# finding top N common items in the entire training sample
most_common_items = set(training['item_id'].value_counts()[:len(all_itemIds)-200].index)

In [7]:
len(all_itemIds)

37978

In [8]:
len(most_common_items)

37778

In [9]:
import random
def find_negatives_items_id(df, user_id):
    #items ids user interacted are considered as positives
    item_ids_Interacted = positive_item_dict[user_id]
    all_negative_items_id = list(most_common_items - item_ids_Interacted)
    negative_items_id = random.sample(all_negative_items_id, 4*len(item_ids_Interacted))
    return negative_items_id

In [10]:
from tqdm.notebook import tqdm

In [11]:
## creating a dictionary with user as key and negative_items_id list as value
all_UserIds = training['user_id'].unique()
negative_samples_dict = {}
for user in tqdm(all_UserIds):
    negative_items_id = find_negatives_items_id(training, user) ## getting the list of negative items
    negative_samples_dict[user] = negative_items_id ## building a dictionary

  0%|          | 0/169698 [00:00<?, ?it/s]

In [12]:
len(all_UserIds)

169698

In [13]:
# rating = 1 for all the interacted items
training['rating'] = 1
training.head(5)

Unnamed: 0,user_id,item_id,context_feature_id,rating
0,0,28366,2,1
1,0,16109,2,1
2,0,11500,3,1
3,0,20750,2,1
4,0,8759,2,1


In [14]:
# converting the dictionary value list as rows of dataframe 
neg_samples_df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in negative_samples_dict.items()])).melt().dropna()
neg_samples_df.rename(columns={'variable': 'user_id', 'value': 'item_id'}, inplace = True)
neg_samples_df['item_id'] = pd.to_numeric(neg_samples_df['item_id'], downcast='integer')
neg_samples_df['rating'] = 0 ## giving negative samples as 0 probability
neg_samples_df.head(5)

Unnamed: 0,user_id,item_id,rating
0,0,22164,0
1,0,19310,0
2,0,38140,0
3,0,13516,0
4,0,20259,0


In [15]:
# adding negative samples to the training dataframe
new_train = training.append(neg_samples_df, ignore_index=True)
new_train.sample(5)

Unnamed: 0,user_id,item_id,context_feature_id,rating
846166,174507,37313,2.0,1
1518941,31899,31583,,0
2423880,84138,1026,,0
3507637,146992,4004,,0
4138861,183669,9199,,0


In [16]:
new_train.shape

(4422217, 4)

In [17]:
item_feature.head()

Unnamed: 0,item_id,item_feature_id
0,0,139
1,1,55
2,2,11
3,3,138
4,4,138


In [18]:
new_train = new_train.merge(item_feature, on = 'item_id')
new_train.head()

Unnamed: 0,user_id,item_id,context_feature_id,rating,item_feature_id
0,0,28366,2.0,1,7
1,1731,28366,1.0,1,7
2,10168,28366,2.0,1,7
3,18883,28366,1.0,1,7
4,19763,28366,2.0,1,7


In [19]:
new_train.shape

(4422217, 5)

In [20]:
new_train['context_feature_id'].value_counts()

2.0    485857
1.0    270187
3.0    145267
0.0     68934
Name: context_feature_id, dtype: int64

In [21]:
user_id_context_mode = new_train.groupby(['user_id'])['context_feature_id'].apply(lambda x: x.mode().iloc[0])
user_id_context_mode

user_id
0         2.0
1         2.0
2         1.0
3         1.0
5         1.0
         ... 
200147    2.0
200148    2.0
200149    2.0
200150    2.0
200152    2.0
Name: context_feature_id, Length: 169698, dtype: float64

In [22]:
new_train = new_train.join(user_id_context_mode,on='user_id',how='left',lsuffix='l_')

In [23]:
new_train.head()

Unnamed: 0,user_id,item_id,context_feature_idl_,rating,item_feature_id,context_feature_id
0,0,28366,2.0,1,7,2.0
1,1731,28366,1.0,1,7,1.0
2,10168,28366,2.0,1,7,1.0
3,18883,28366,1.0,1,7,1.0
4,19763,28366,2.0,1,7,1.0


In [24]:
new_train.drop(columns=['context_feature_idl_'], inplace=True)

In [25]:
new_train.isnull().values.any()

False

In [26]:
new_train['context_feature_id'] = pd.to_numeric(new_train['context_feature_id'], downcast='integer')
new_train.head()

Unnamed: 0,user_id,item_id,rating,item_feature_id,context_feature_id
0,0,28366,1,7,2
1,1731,28366,1,7,1
2,10168,28366,1,7,1
3,18883,28366,1,7,1
4,19763,28366,1,7,1


# Splitting the dataset into Train and validation dataset

In [27]:
from sklearn.model_selection import train_test_split
random_state = 3
train, val = train_test_split(new_train, test_size=0.2, random_state = random_state, shuffle=True,stratify=new_train['rating'])

In [28]:
train.shape, val.shape

((3537773, 5), (884444, 5))

In [29]:
val.head(5)

Unnamed: 0,user_id,item_id,rating,item_feature_id,context_feature_id
4278067,2828,13904,0,139,0
2753690,175511,15602,0,84,2
2168548,169268,5813,0,148,2
2632089,158707,23201,0,94,3
482265,153989,34678,1,147,1


In [30]:
#max(list(train.user_id.values)) + 1
train.user_id.max() + 1

200153

# Model

In [35]:
class Implicit_NeuralNets(nn.Module):
    def __init__(self, num_users, num_items, num_features_id, num_user_context_id):
        super(Implicit_NeuralNets, self).__init__()
        #user and items embeddings
        self.user_emb = nn.Embedding(num_users, 50)
        self.item_emb = nn.Embedding(num_items, 50)
        #features
        self.item_feature_id = nn.Embedding(num_features_id, 8)
        self.user_context_id = nn.Embedding(num_user_context_id, 2)
        self.linear1 = nn.Linear(110, 80)
        self.linear2 = nn.Linear(80, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)


    def forward(self, u, v, f, uc):
        ### BEGIN SOLUTION
        U = self.user_emb(u)
        V = self.item_emb(v)
        F = self.item_feature_id(f)
        UC = self.user_context_id(uc)
        x = torch.cat([U,V,F,UC], 1)
        x = self.dropout(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x
        
        #return torch.sigmoid((U*V).sum(1) + (U*F).sum(1) + b_u  + b_v + b_f)

In [36]:
train.count()

user_id               3537773
item_id               3537773
rating                3537773
item_feature_id       3537773
context_feature_id    3537773
dtype: int64

# Training the model on train dataset

In [37]:
def train_one_epoch(model, train_df, optimizer):
    
    ###""" Trains the model for one epoch"""
    model.train()
    ### BEGIN SOLUTION
    # here we are not using data loaders because our data fits well in memory
    users = torch.LongTensor(train_df.user_id.values)  #.cuda()
    items = torch.LongTensor(train_df.item_id.values) #.cuda()
    ratings = torch.FloatTensor(train_df.rating.values)  #.cuda()
    item_features = torch.LongTensor(train_df.item_feature_id.values)
    user_context_id = torch.LongTensor(train_df.context_feature_id.values)
    y_hat = torch.sigmoid(model(users, items, item_features, user_context_id))
    loss = F.binary_cross_entropy(y_hat, ratings.unsqueeze(1).float())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_loss = loss.item()
    ### END SOLUTION
    return train_loss

def valid_metrics(model, valid_df):
    ###"""Computes validation loss and accuracy"""
    model.eval()
    ### BEGIN SOLUTION
    users = torch.LongTensor(valid_df.user_id.values) # .cuda()
    items = torch.LongTensor(valid_df.item_id.values) #.cuda()
    ratings = torch.FloatTensor(valid_df.rating.values) #.cuda()
    item_features = torch.LongTensor(valid_df.item_feature_id.values)
    user_context_id = torch.LongTensor(valid_df.context_feature_id.values)
    
    y_hat = torch.sigmoid(model(users, items, item_features, user_context_id))
    loss = F.binary_cross_entropy(y_hat, ratings.unsqueeze(1).float())
    valid_loss = loss.item()
    y_pred = (y_hat > 0.5).float()
    valid_acc = torch.sum(y_pred == ratings.unsqueeze(1).float()) / len(valid_df)
    ### END SOLUTION
    return valid_loss, valid_acc.item()
    #return valid_loss

def training_loop(model, train_df, valid_df, epochs=10, lr=0.01, wd=0.0):
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    for i in range(epochs):
        train_loss = train_one_epoch(model, train_df, optimizer)
        valid_loss, valid_acc = valid_metrics(model, valid_df)
        #valid_loss= valid_metrics(model, valid_df) 
        print("train loss %.3f valid loss %.3f valid acc %.3f" % (train_loss, valid_loss, valid_acc))
        #print("train loss %.3f valid loss %.3f" % (train_loss, valid_loss)) 

# Initiating the model to create user_embedding and item_embedding¶

In [38]:
(np.isnan(train.context_feature_id.values) == True).sum()

0

In [39]:
num_users = train.user_id.max() + 1
num_items = train.item_id.max() + 1
num_features = train.item_feature_id.max() + 1
num_user_context_id = train.context_feature_id.max() + 1

model = Implicit_NeuralNets(num_users, num_items, num_features, num_user_context_id)
num_users, num_items, num_features, num_user_context_id

(200153, 39901, 195, 4)

# Training with train dataset and validating with val dataset

In [55]:
model = Implicit_NeuralNets(num_users, num_items, num_features, num_user_context_id)
training_loop(model, train, val, epochs=20, lr=0.01, wd=1e-5)

train loss 0.659 valid loss 0.573 valid acc 0.780
train loss 0.568 valid loss 0.530 valid acc 0.781
train loss 0.531 valid loss 0.516 valid acc 0.781
train loss 0.528 valid loss 0.515 valid acc 0.781
train loss 0.537 valid loss 0.513 valid acc 0.781
train loss 0.538 valid loss 0.506 valid acc 0.781
train loss 0.531 valid loss 0.496 valid acc 0.781
train loss 0.519 valid loss 0.487 valid acc 0.781
train loss 0.506 valid loss 0.480 valid acc 0.783
train loss 0.495 valid loss 0.474 valid acc 0.788
train loss 0.487 valid loss 0.470 valid acc 0.794
train loss 0.480 valid loss 0.464 valid acc 0.798
train loss 0.473 valid loss 0.457 valid acc 0.802
train loss 0.466 valid loss 0.448 valid acc 0.807
train loss 0.458 valid loss 0.436 valid acc 0.813
train loss 0.448 valid loss 0.423 valid acc 0.820
train loss 0.438 valid loss 0.410 valid acc 0.826
train loss 0.428 valid loss 0.397 valid acc 0.833
train loss 0.418 valid loss 0.385 valid acc 0.839
train loss 0.409 valid loss 0.373 valid acc 0.845


In [68]:
training_loop(model, train, val, epochs=20, lr=0.01, wd=1e-5)

train loss 0.399 valid loss 0.375 valid acc 0.856
train loss 0.394 valid loss 0.353 valid acc 0.861
train loss 0.378 valid loss 0.337 valid acc 0.864
train loss 0.369 valid loss 0.327 valid acc 0.869
train loss 0.362 valid loss 0.318 valid acc 0.875
train loss 0.354 valid loss 0.312 valid acc 0.880
train loss 0.345 valid loss 0.308 valid acc 0.883
train loss 0.339 valid loss 0.304 valid acc 0.886
train loss 0.333 valid loss 0.300 valid acc 0.889
train loss 0.327 valid loss 0.294 valid acc 0.891
train loss 0.322 valid loss 0.288 valid acc 0.893
train loss 0.316 valid loss 0.282 valid acc 0.895
train loss 0.312 valid loss 0.278 valid acc 0.896
train loss 0.307 valid loss 0.275 valid acc 0.899
train loss 0.304 valid loss 0.273 valid acc 0.900
train loss 0.300 valid loss 0.271 valid acc 0.901
train loss 0.297 valid loss 0.269 valid acc 0.902
train loss 0.294 valid loss 0.268 valid acc 0.903
train loss 0.291 valid loss 0.267 valid acc 0.903
train loss 0.289 valid loss 0.266 valid acc 0.904


In [69]:
training_loop(model, train, val, epochs=10, lr=0.01, wd=1e-5)

train loss 0.287 valid loss 0.266 valid acc 0.904
train loss 0.292 valid loss 0.261 valid acc 0.904
train loss 0.282 valid loss 0.265 valid acc 0.905
train loss 0.281 valid loss 0.267 valid acc 0.905
train loss 0.281 valid loss 0.262 valid acc 0.905
train loss 0.277 valid loss 0.258 valid acc 0.905
train loss 0.274 valid loss 0.257 valid acc 0.905
train loss 0.273 valid loss 0.257 valid acc 0.906
train loss 0.272 valid loss 0.256 valid acc 0.906
train loss 0.270 valid loss 0.255 valid acc 0.906


In [76]:
training_loop(model, train, val, epochs=10, lr=0.01, wd=1e-6)

train loss 0.268 valid loss 0.266 valid acc 0.906
train loss 0.273 valid loss 0.255 valid acc 0.906
train loss 0.265 valid loss 0.255 valid acc 0.906
train loss 0.265 valid loss 0.255 valid acc 0.906
train loss 0.265 valid loss 0.254 valid acc 0.906
train loss 0.262 valid loss 0.253 valid acc 0.906
train loss 0.259 valid loss 0.254 valid acc 0.907
train loss 0.258 valid loss 0.254 valid acc 0.907
train loss 0.257 valid loss 0.253 valid acc 0.907
train loss 0.255 valid loss 0.252 valid acc 0.907


In [77]:
training_loop(model, train, val, epochs=10, lr=0.01, wd=1e-4)

train loss 0.253 valid loss 0.257 valid acc 0.907
train loss 0.258 valid loss 0.252 valid acc 0.907
train loss 0.252 valid loss 0.253 valid acc 0.907
train loss 0.251 valid loss 0.253 valid acc 0.907
train loss 0.250 valid loss 0.250 valid acc 0.907
train loss 0.247 valid loss 0.249 valid acc 0.907
train loss 0.246 valid loss 0.250 valid acc 0.907
train loss 0.245 valid loss 0.250 valid acc 0.908
train loss 0.243 valid loss 0.249 valid acc 0.908
train loss 0.241 valid loss 0.249 valid acc 0.908


In [78]:
training_loop(model, train, val, epochs=10, lr=0.001, wd=0)

train loss 0.239 valid loss 0.248 valid acc 0.908
train loss 0.239 valid loss 0.248 valid acc 0.908
train loss 0.239 valid loss 0.248 valid acc 0.908
train loss 0.239 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908
train loss 0.238 valid loss 0.248 valid acc 0.908


In [79]:
training_loop(model, train, val, epochs=10, lr=0.01, wd=5e-4)

train loss 0.237 valid loss 0.250 valid acc 0.908
train loss 0.238 valid loss 0.247 valid acc 0.909
train loss 0.238 valid loss 0.245 valid acc 0.909
train loss 0.234 valid loss 0.247 valid acc 0.908
train loss 0.233 valid loss 0.247 valid acc 0.908
train loss 0.231 valid loss 0.246 valid acc 0.909
train loss 0.229 valid loss 0.246 valid acc 0.910
train loss 0.229 valid loss 0.246 valid acc 0.910
train loss 0.228 valid loss 0.245 valid acc 0.910
train loss 0.225 valid loss 0.245 valid acc 0.910


# Hyperparameter Tuning

In [45]:
# model1:
model = Implicit_NeuralNets(num_users, num_items, num_features, num_user_context_id) #.cuda()
training_loop(model, train, val, epochs=40, lr=0.01, wd = 1e-5)

train loss 0.714 valid loss 0.646 valid acc 0.701
train loss 0.646 valid loss 0.606 valid acc 0.727
train loss 0.606 valid loss 0.583 valid acc 0.727
train loss 0.588 valid loss 0.572 valid acc 0.727
train loss 0.584 valid loss 0.567 valid acc 0.727
train loss 0.585 valid loss 0.562 valid acc 0.728
train loss 0.584 valid loss 0.555 valid acc 0.729
train loss 0.578 valid loss 0.546 valid acc 0.731
train loss 0.569 valid loss 0.537 valid acc 0.734
train loss 0.558 valid loss 0.528 valid acc 0.739
train loss 0.547 valid loss 0.519 valid acc 0.744
train loss 0.537 valid loss 0.511 valid acc 0.752
train loss 0.528 valid loss 0.503 valid acc 0.762
train loss 0.519 valid loss 0.493 valid acc 0.774
train loss 0.509 valid loss 0.483 valid acc 0.785
train loss 0.499 valid loss 0.471 valid acc 0.796
train loss 0.489 valid loss 0.458 valid acc 0.807
train loss 0.478 valid loss 0.445 valid acc 0.817
train loss 0.467 valid loss 0.431 valid acc 0.826
train loss 0.455 valid loss 0.417 valid acc 0.835


In [46]:
# model1:
model = Implicit_NeuralNets(num_users, num_items, num_features, num_user_context_id) #.cuda()
training_loop(model, train, val, epochs=40, lr=0.01, wd = 1e-6)

train loss 0.767 valid loss 0.673 valid acc 0.604
train loss 0.679 valid loss 0.624 valid acc 0.720
train loss 0.625 valid loss 0.593 valid acc 0.727
train loss 0.597 valid loss 0.577 valid acc 0.727
train loss 0.585 valid loss 0.568 valid acc 0.727
train loss 0.583 valid loss 0.563 valid acc 0.727
train loss 0.584 valid loss 0.559 valid acc 0.728
train loss 0.583 valid loss 0.552 valid acc 0.729
train loss 0.579 valid loss 0.544 valid acc 0.732
train loss 0.571 valid loss 0.535 valid acc 0.736
train loss 0.560 valid loss 0.525 valid acc 0.742
train loss 0.550 valid loss 0.516 valid acc 0.750
train loss 0.539 valid loss 0.507 valid acc 0.758
train loss 0.528 valid loss 0.498 valid acc 0.768
train loss 0.518 valid loss 0.489 valid acc 0.777
train loss 0.508 valid loss 0.479 valid acc 0.785
train loss 0.498 valid loss 0.468 valid acc 0.793
train loss 0.488 valid loss 0.456 valid acc 0.802
train loss 0.477 valid loss 0.443 valid acc 0.812
train loss 0.467 valid loss 0.430 valid acc 0.821


In [88]:
# model1:
model = Implicit_NeuralNets(num_users, num_items, num_features, num_user_context_id) #.cuda()
training_loop(model, train, val, epochs=40, lr=0.035, wd = 1e-4)

train loss 0.669 valid loss 0.579 valid acc 0.727
train loss 0.602 valid loss 0.563 valid acc 0.727
train loss 0.594 valid loss 0.531 valid acc 0.733
train loss 0.552 valid loss 0.509 valid acc 0.754
train loss 0.522 valid loss 0.487 valid acc 0.782
train loss 0.499 valid loss 0.457 valid acc 0.809
train loss 0.473 valid loss 0.423 valid acc 0.833
train loss 0.445 valid loss 0.391 valid acc 0.854
train loss 0.417 valid loss 0.362 valid acc 0.866
train loss 0.392 valid loss 0.342 valid acc 0.873
train loss 0.373 valid loss 0.327 valid acc 0.879
train loss 0.357 valid loss 0.312 valid acc 0.885
train loss 0.338 valid loss 0.299 valid acc 0.890
train loss 0.321 valid loss 0.292 valid acc 0.893
train loss 0.310 valid loss 0.289 valid acc 0.895
train loss 0.305 valid loss 0.283 valid acc 0.896
train loss 0.298 valid loss 0.277 valid acc 0.896
train loss 0.290 valid loss 0.275 valid acc 0.897
train loss 0.287 valid loss 0.275 valid acc 0.897
train loss 0.284 valid loss 0.273 valid acc 0.898


# Training the dataset on complete training dataset

In [80]:
test = pd.read_csv('../data/test_kaggle.csv')
test.head()

Unnamed: 0,id,user_id,item_id,context_feature_id
0,0,4,16835,2
1,1,4,22590,3
2,2,4,1978,1
3,3,4,28916,1
4,4,4,14427,2


In [81]:
test = test.merge(item_feature, on = 'item_id')

# Prediction on test_dataset

In [86]:
test_ids = torch.LongTensor(test.id.values)
test_users = torch.LongTensor(test.user_id.values) # .cuda()
test_items = torch.LongTensor(test.item_id.values) #.cuda()
test_item_features = torch.LongTensor(test.item_feature_id.values) #.cuda()
test_context_feature_id = torch.LongTensor(test.context_feature_id.values)
test_y_hat = torch.sigmoid(model(test_users, test_items, test_item_features, test_context_feature_id))

In [87]:
len(test_y_hat)

381385

In [88]:
test_y_hat

tensor([[0.5578],
        [0.2370],
        [0.3410],
        ...,
        [0.2699],
        [0.3602],
        [0.8165]], grad_fn=<SigmoidBackward0>)

In [89]:
d = {'id':test_ids,'rating':(test_y_hat.detach().numpy()).reshape(-1)}
test_knownUsers_df = pd.DataFrame(d)

In [90]:
test_knownUsers_df = test_knownUsers_df.groupby(['id']).mean()

In [91]:
test_knownUsers_df.to_csv('submission35.csv')