In [1]:
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import TransformerEncoder, TransformerDecoder, TransformerEncoderLayer, TransformerDecoderLayer
import pdb


In [2]:
grped_df = pd.read_pickle('train_siamese.pkl')

In [9]:
grped_df = grped_df.reset_index(drop=True)

In [3]:
class similarityModel(nn.Module):
    def __init__(self, nhead, dim_model, dim_ff):
        super(similarityModel, self).__init__()
        self.num_head = nhead
        self.dim_model = dim_model
        self.dim_feedforward = dim_ff
        self.encoder_layer = TransformerEncoderLayer(self.dim_model, self.num_head, self.dim_feedforward)
        #self.linear_layer = nn.Linear(self.dim_model, 2)
        
    def forward_one(self, inp):
        encoding = self.encoder_layer(inp)
        return encoding#self.linear_layer(encoding)
        
    def forward(self, inp1, inp2):
        out1 = self.forward_one(inp1)
        out2 = self.forward_one(inp2)
        return out1, out2


In [4]:
class contrastiveLoss(nn.Module):
    """
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    """

    def __init__(self, margin=2.0):
        super(contrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim = True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


        return loss_contrastive

In [5]:
class similarityDataset(Dataset):
    def __init__(self, df):
        self.df = df
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        x = self.df.loc[index]
        if np.where(x['rating_score']==4)[0].size>1:
            DIFF = int(np.random.random() > 0.6)
            if not DIFF:
                rel_ind = np.where(x['rating_score']==4)[0]
                sampled_ind = np.random.choice(rel_ind, 2, replace=False)
                return x['title_em'][sampled_ind[0]], x['title_em'][sampled_ind[1]], torch.tensor([DIFF], dtype=torch.float32) 
        zero_ind = np.random.choice(np.where(x['rating_score']==0)[0])
        four_ind = np.random.choice(np.where(x['rating_score']==4)[0])
        return x['title_em'][zero_ind], x['title_em'][four_ind], torch.tensor([1], dtype=torch.float32)

In [10]:
dataset = similarityDataset(grped_df)

train_dataloader = DataLoader(dataset=dataset, batch_size=32,  shuffle = True)


model = similarityModel(nhead=16, dim_model=384, dim_ff= 2048)
criterion = contrastiveLoss(margin = 50)

optimizer = optim.Adam(model.parameters(), lr=0.0005)


In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

similarityModel(
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=384, out_features=384, bias=True)
    )
    (linear1): Linear(in_features=384, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=384, bias=True)
    (norm1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
)

In [16]:

for epoch in range(0,50):
    epoch_loss = 0
    for i, data in enumerate(train_dataloader,0):
        item1, item2 , label = data
        item1, item2 , label = item1.to(device), item2.to(device) , label.to(device)
        item1 = item1.unsqueeze(0)
        item2 = item2.unsqueeze(0)
        optimizer.zero_grad()
        output1,output2 = model(item1,item2)
        loss_contrastive = criterion(output1,output2,label)
        loss_contrastive.backward()
        optimizer.step()
        epoch_loss += loss_contrastive.item()
#         if i %10 == 0 :
#             print(f'running_loss: {epoch_loss}')
    print(f'epoch_loss: {epoch_loss/i}')
            


epoch_loss: 52.91680579961732
epoch_loss: 49.19379214353339
epoch_loss: 47.03952414490456
epoch_loss: 47.0314237572426
epoch_loss: 49.78848503911218
epoch_loss: 44.704648572345114
epoch_loss: 46.39190689353056
epoch_loss: 43.92085825010788
epoch_loss: 43.751387640487316
epoch_loss: 44.3611142801684
epoch_loss: 52.256461964097134
epoch_loss: 49.97697958835336
epoch_loss: 43.52263093549152
epoch_loss: 44.15555962850881
epoch_loss: 42.61065007365027
epoch_loss: 46.856279617132145
epoch_loss: 49.16236096759175
epoch_loss: 46.700584655584294
epoch_loss: 45.28972916270411
epoch_loss: 43.003936856292015
epoch_loss: 41.71152790202651
epoch_loss: 42.86970544415851
epoch_loss: 43.3229243589002
epoch_loss: 42.98785460272501
epoch_loss: 42.07598243757736
epoch_loss: 39.379825924718105
epoch_loss: 38.342534852582354
epoch_loss: 39.85954454333283
epoch_loss: 38.492509836374325
epoch_loss: 36.3857642218124
epoch_loss: 40.331451504729515
epoch_loss: 36.9612153629924
epoch_loss: 39.88009172262147
epoch

In [17]:
torch.save(model.state_dict(),'./siamese_similarity_hd.pth')

In [11]:
torch.cuda.is_available()

False