In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np, pandas as pd, pickle, time, argparse
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support

if torch.cuda.is_available():
    FloatTensor = torch.cuda.FloatTensor
    LongTensor = torch.cuda.LongTensor
    ByteTensor = torch.cuda.ByteTensor
else:
    FloatTensor = torch.FloatTensor
    LongTensor = torch.LongTensor
    ByteTensor = torch.ByteTensor

In [10]:
class DailyDialogueDataset(Dataset):
    def __init__(self, split:str, path:str) -> None:
        self.speakers, self.input_sequence, self.input_max_seq_length, self.act_labels, self.emotion_labels, self.train_id, self.test_id, self.valid_id = pickle.load(open(path, 'rb'))

        if split == 'train':
            self.keys = [x for x in self.train_id]
        elif split == 'test':
            self.keys = [x for x in self.test_id]
        elif split == 'valid':
            self.keys = [x for x in self.valid_id]

        self.len = len(self.keys)

    def __getitem__(self, index):
        conv = self.keys[index]

        return torch.LongTensor(self.input_sequence[conv]), \
                torch.FloatTensor([[1,0] if x=='0' else [0,1] for x in self.speakers[conv]]),\
                torch.FloatTensor([1]*len(self.act_labels[conv])), \
                torch.LongTensor(self.act_labels[conv]), \
                torch.LongTensor(self.emotion_labels[conv]), \
                self.input_max_seq_length[conv], \
                conv
    
    def __len__(self) -> int:
        return self.len

class DailyDialoguePadCollate:

    def __init__(self, dim=0):
        self.dim = dim

    def pad_tensor(self, vec, pad, dim):

        pad_size = list(vec.shape)
        pad_size[dim] = pad - vec.size(dim)
        return torch.cat([vec, torch.zeros(*pad_size).type(torch.LongTensor)], dim=dim)

    def pad_collate(self, batch):
        
        # find longest sequence
        max_len = max(map(lambda x: x.shape[self.dim], batch))
        
        # pad according to max_len
        batch = [self.pad_tensor(x, pad=max_len, dim=self.dim) for x in batch]
        
        # stack all
        return torch.stack(batch, dim=0)
    
    def __call__(self, batch):
        dat = pd.DataFrame(batch)
        
        return [self.pad_collate(dat[i]).transpose(1, 0).contiguous() if i==0 else \
                pad_sequence(dat[i]) if i == 1 else \
                pad_sequence(dat[i], True) if i < 5 else \
                dat[i].tolist() for i in dat]

In [2]:
class CNNFeatureExtractor(nn.Module):
    def __init__(self, vocab_size, embedding_dim, ouput_size, filters, kernel_sizes, dropout) -> None:
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.convs = nn.ModuleList([nn.Conv1d(in_channels=embedding_dim, out_channels=filters, kernel_size=K) for K in kernel_sizes])
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(len(kernel_sizes) * filters, ouput_size)
        self.feature_dim = ouput_size

    def init_pretrained_embeddings_from_numpy(self, pretrained_word_vectors):
        self.embedding.weight = nn.Parameter(torch.from_numpy(pretrained_word_vectors).float())
        self.embedding.weight.requires_grad = False

    def forward(self, x, unmask):
        num_utt, batch, num_words = x.size()

        x = x.type(LongTensor) # (num_utt, batch, num_words)
        x = x.view(-1, num_words) # (num_utt, batch, num_words) -> (num_utt * batch, num_words)
        emb = self.embedding(x) # (num_utt * batch, num_words) -> (num_utt * batch, num_words, 300)
        emb = emb.transpose(-2, -1).contiguous() # (num_utt * batch, num_words, 300) -> (num_utt * batch, 300, num_words)

        convoluted = [F.relu(conv(emb)) for conv in self.convs]
        pooled = [F.max_pool1d(c, c.size()).squeeze() for c in convoluted]
        concated = torch.cat(pooled, 1)
        features = F.relu(self.fc(self.dropout(concated))) # (num_utt * batch, 150) -> (num_utt * batch, 100)
        features = features.view(num_utt, batch, -1) # (num_utt * batch, 100) -> (num_utt, batch, 100)
        mask = unmask.unsqueeze(-1).type(FloatTensor) # (batch, num_utt) -> (batch, num_utt, 1)
        mask = mask.transpose(0, 1) # (batch, num_utt, 1) -> (num_utt, batch, 1)
        mask = mask.repeat(1, 1, self.feature_dim) # (num_utt, batch, 1) -> (num_utt, batch, 100)
        features = (features * mask)

        return features

In [3]:
class DailyDialogueModel(nn.Module):
    def __init__(self, D_m, D_g, D_p, D_e, D_h,
                 vocab_size, n_classes=7, embedding_dim=300, 
                 cnn_output_size=100, cnn_filters=50, cnn_kernel_sizes=(3,4,5), cnn_dropout=0.5,
                 listener_state=False, context_attention='simple', D_a=100, dropout_rec=0.5,
                 dropout=0.5, att2=True) -> None:
        super().__init__()
        self.cnn_feat_extractor = CNNFeatureExtractor(vocab_size, embedding_dim, cnn_output_size, cnn_filters, cnn_kernel_sizes, cnn_dropout)
        

In [4]:
glv_pretrained = np.load(open('../data/dailydialog/glv_embedding_matrix', 'rb'), allow_pickle=True)
vocab_size, embedding_dim = glv_pretrained.shape
glv_pretrained.shape

(18726, 100)

In [8]:
tokenzier = np.load(open("../data/dailydialog/tokenizer.pkl", "rb"), allow_pickle=True)