In [2]:
from dataset import MELD
import torch
from torch.utils.data import DataLoader
from transformers import BertModel
import math
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
from tqdm import tqdm

  torch.utils._pytree._register_pytree_node(


In [None]:
class PositionalEncoding(torch.nn.Module):

    def __init__(self, d_model, dropout = 0.2, max_len = 5000):
        super().__init__()
        self.dropout = torch.nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class TransformerBlock(torch.nn.Module):
    def __init__(self):
        super(TransformerBlock, self).__init__()
        self.key = torch.nn.Linear(768, 768)
        self.query = torch.nn.Linear(768, 768)
        self.value = torch.nn.Linear(768, 768)
        self.layer_norm = torch.nn.LayerNorm(768)
        self.dropout = torch.nn.Dropout(0.1)
        self.attention = torch.nn.MultiheadAttention(768, 8)
        self.fc = torch.nn.Linear(768, 768)
        self.pos_emb = PositionalEncoding(768,max_len = 100)

    def forward(self, x):
        #x : [seq_len,768]
        z = self.pos_emb(x)
        key = self.key(x)
        query = self.query(x)
        value = self.value(x)
        y = self.attention(query, key, value)[0]
        y = y + x
        y = self.layer_norm(y)
        z = self.fc(y)
        z = y + z
        z = self.layer_norm(z)
        return z


class Model1(torch.nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert.eval()
        self.transformer = TransformerBlock()
        self.fc = torch.nn.Linear(768, 128)
        self.fc2 = torch.nn.Linear(128, 7)
        self.dropout = torch.nn.Dropout(0.1)
        
    def forward(self,x):
        '''
        x : [num_utter, seq_len]
        '''        
        with torch.no_grad():
            x = self.bert(**x).last_hidden_state
            x = x.mean(dim=1)
        x = self.transformer(x)
        x = self.fc(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = torch.softmax(x,dim=1)
        return x

class Model2(torch.nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert.eval()
        self.gru = torch.nn.GRU(768,768)
        self.fc = torch.nn.Linear(768, 128)
        self.fc2 = torch.nn.Linear(128, 7)
        self.dropout = torch.nn.Dropout(0.1)
        
    def forward(self,x):
        '''
        x : [num_utter, seq_len]
        '''        
        with torch.no_grad():
            x = self.bert(**x).last_hidden_state
            x = x.mean(dim=1)
        x,_ = self.gru(x)
        x = self.fc(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = torch.softmax(x,dim=1)
        return x

In [None]:
def train(train_dataset,val_dataset,model,num_epochs=10,lr=1e-4,device='cuda'):
    loss_fn = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model = model.to(device)
    train_losses = []
    train_f1s = []
    val_f1s = []
    val_losses = []
    for i in range(num_epochs):
        true_ys = []
        pred_ys = []
        total_loss = 0
        for x,y in tqdm(train_dataset):
            x = {k:v.to(device) for k,v in x.items()}
            y = torch.from_numpy(y).to(device)
            optimizer.zero_grad()
            y_pred = model(x)
            loss = loss_fn(y_pred,y)
            loss.backward()
            optimizer.step()
            total_loss += loss.cpu().detach().item()
            true_ys.append(y.cpu().detach().numpy())
            pred_ys.append(y_pred.argmax(dim=1).cpu().detach().numpy())
        train_losses.append(total_loss/len(train_dataset))
        true_ys = np.concatenate(true_ys)
        pred_ys = np.concatenate(pred_ys)
        train_f1s.append(f1_score(true_ys,pred_ys,average='weighted'))
        val_loss = 0
        true_ys = []
        pred_ys = []
        print('Validating')
        with torch.no_grad():
            for x,y in tqdm(val_dataset):
                x = {k:v.to(device) for k,v in x.items()}
                y = torch.from_numpy(y).to(device)
                y_pred = model(x)
                loss = loss_fn(y_pred,y)
                val_loss += loss.cpu().detach().item()
                true_ys.append(y.cpu().detach().numpy())
                pred_ys.append(y_pred.argmax(dim=1).cpu().detach().numpy())
            val_losses.append(val_loss/len(val_dataset))
            true_ys = np.concatenate(true_ys)
            pred_ys = np.concatenate(pred_ys)
            val_f1s.append(f1_score(true_ys,pred_ys,average='weighted'))
        print(f'Epoch {i} Train Loss : {train_losses[-1]} Val Loss : {val_losses[-1]} Train F1 : {train_f1s[-1]} Val F1 : {val_f1s[-1]}')
        torch.save(model.state_dict(),'model.pt')
    return train_losses,val_losses,train_f1s,val_f1s


In [None]:
train_dataset = MELD('train_file.json')
val_dataset = MELD('val_file.json')

In [None]:
with torch.autocast(device_type='cuda',dtype=torch.float16):
    model = Model2()
    train_losses,val_losses,train_f1s,val_f1s = train(train_dataset,val_dataset,model,
                                                      num_epochs=7)

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_losses)
plt.plot(val_losses)
plt.legend(['train loss','val_loss'])
plt.show()

In [None]:
plt.plot(train_f1s)
plt.plot(val_f1s)
plt.legend(['train f1s','val f1s'])
plt.show()