In [None]:
import random
import numpy as np
import torch
import re
import glob
import io
import os
import time

import matplotlib.pyplot as plt
import pandas as pd 
import mpld3
from tqdm import tqdm

from transformers import BertTokenizer, BertModel

from torchtext import data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext.vocab import Vectors

%matplotlib inline
mpld3.enable_notebook()

In [None]:
SEED = 77
BATCH_SIZE = 64
MAX_SEQUENCE = 1024
MAX_VOCAB_SIZE = 25000
OUTPUT_DIM = 1
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [2,3,4,5]
DROPOUT = 0.3
N_EPOCHS = 500
TRAIN_RATIO = 0.8
POS_WEIGHT = torch.tensor([1, 7, 8, 4, 9])
MICRO = 'micro'
MACRO = 'macro'
DATA_FOLDER="CNN-Bin"

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Utilities

In [None]:
def tokenize_and_cut(sentence):
    #sentence = sentence.replace('<unk>', '？')
    sentences = re.split(r'\s*<sep>(?:\s*<sep>)*\s*', sentence)
    filtered_sentence = list(filter(lambda sent: '<unk>' not in sent, sentences))
    sents = [tokenize(sent) for sent in filtered_sentence]
    tokens = []
    sents = [allsents.split() for allsents in 
             [' [SEP] '.join(sent) for sent in [[' '.join(token) for token in sents]]]]    
    tokens.extend(sents[0])
    tokens = tokens[:MAX_SEQUENCE]
    return tokens

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
def analysis_plotter(fig, ax, train, valid, title, param_dict1, param_dict2):
    out = ax.plot(train, **param_dict1)
    out = ax.plot(valid, **param_dict2)
    ax.title.set_text(title)
    ax.legend()
    pv = float('inf')
    x = []
    y = []
    for k, v in enumerate(valid):
        if v > pv:
            x.append(k)
            y.append(v)
        pv = v
    scatter = ax.scatter(x, y)
    labels = []
    for x, y in zip(x,y):
        labels.append(f'{x-1}: {y}')
    tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
    mpld3.plugins.connect(fig, tooltip)    

In [None]:
def f_measure(predictions, labels):
    diagnoses = {}
    MICRO = 'micro'
    MACRO = 'macro'
    predicts = []
    diagnoses[MICRO] = {}
    
    rounded_preds = torch.round(torch.sigmoid(predictions))
    predicts.extend(rounded_preds.data.tolist())
    
    for index, value in enumerate(rounded_preds):
        for did, dvalue in enumerate(rounded_preds[index]):
            v = dvalue.item()                    
            if v == 1:
                if dvalue == labels[index, did]:
                    if did not in diagnoses:
                        diagnoses[did] = {}                                
                    diagnoses[did]['tp'] = diagnoses[did].get('tp', 0) + 1
                    diagnoses[MICRO]['tp'] = diagnoses[MICRO].get('tp', 0) + 1
                else:
                    if did not in diagnoses:
                        diagnoses[did] = {}
                    diagnoses[did]['fp'] = diagnoses[did].get('fp', 0) + 1
                    diagnoses[MICRO]['fp'] = diagnoses[MICRO].get('fp', 0) + 1
            elif v == 0:
                if 1 == labels[index, did].item():
                    if did not in diagnoses:
                        diagnoses[did] = {}
                    diagnoses[did]['fn'] = diagnoses[did].get('fn', 0) + 1
                    diagnoses[MICRO]['fn'] = diagnoses[MICRO].get('fn', 0) + 1
    diagnoses[MACRO] = {}
    for d in diagnoses:        
        if d is MACRO:
            continue
        try:
            diagnoses[d]['p']=diagnoses[d].get('tp', 0)/(diagnoses[d].get('tp', 0)+diagnoses[d].get('fp', 0))            
        except:            
            diagnoses[d]['p']=0.0
        if d is not MICRO:
                diagnoses[MACRO]['p']=diagnoses[MACRO].get('p', 0.0)+diagnoses[d]['p']                
            
        try:
            diagnoses[d]['r']=diagnoses[d].get('tp', 0)/(diagnoses[d].get('tp', 0)+diagnoses[d].get('fn', 0))            
        except:
            diagnoses[d]['r']=0.0
        if d is not MICRO:
            diagnoses[MACRO]['r']=diagnoses[MACRO].get('r', 0.0)+diagnoses[d]['r']
        
        try:
            diagnoses[d]['f']=2/(1/diagnoses[d]['p']+1/diagnoses[d]['r'])            
        except:
            diagnoses[d]['f']=0.0
        if d is not MICRO:
                diagnoses[MACRO]['f']=diagnoses[MACRO].get('f', 0.0)+diagnoses[d]['f']
    if len(diagnoses)>2:
        diagnoses[MACRO]['f']=diagnoses[MACRO]['f']/float(len(diagnoses)-2)
        diagnoses[MACRO]['p']=diagnoses[MACRO]['p']/float(len(diagnoses)-2)
        diagnoses[MACRO]['r']=diagnoses[MACRO]['r']/float(len(diagnoses)-2)
    else:
        diagnoses[MACRO]['f']="n/a"
        diagnoses[MACRO]['p']="n/a"
        diagnoses[MACRO]['r']="n/a"
    return diagnoses, predicts

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
def update_fscores(new, overall):
    MICRO = 'micro'
    MACRO = 'macro'
    
    for k in new:
        if k not in overall:
            overall[k] = {}
        overall[k]['tp'] = overall[k].get('tp', 0) + new[k].get('tp', 0)
        overall[k]['fp'] = overall[k].get('fp', 0) + new[k].get('fp', 0)
        overall[k]['fn'] = overall[k].get('fn', 0) + new[k].get('fn', 0)
        overall[MICRO]['tp'] = overall[MICRO].get('tp', 0) + new[k].get('tp', 0)
        overall[MICRO]['fp'] = overall[MICRO].get('fp', 0) + new[k].get('fp', 0)
        overall[MICRO]['fn'] = overall[MICRO].get('fn', 0) + new[k].get('fn', 0)
        
    overall[MACRO] = {}
    for d in overall:        
        if d is MACRO:
            continue
        try:
            overall[d]['p']=overall[d].get('tp', 0)/(overall[d].get('tp', 0)+overall[d].get('fp', 0))            
        except:            
            overall[d]['p']=0.0
        if d is not MICRO:
            overall[MACRO]['p']=overall[MACRO].get('p', 0.0)+overall[d]['p']                
            
        try:
            overall[d]['r']=overall[d].get('tp', 0)/(overall[d].get('tp', 0)+overall[d].get('fn', 0))            
        except:
            overall[d]['r']=0.0
        if d is not MICRO:
            overall[MACRO]['r']=overall[MACRO].get('r', 0.0)+overall[d]['r']
        
        try:
            overall[d]['f']=2/(1/overall[d]['p']+1/overall[d]['r'])            
        except:
            overall[d]['f']=0.0
        if d is not MICRO:
                overall[MACRO]['f']=overall[MACRO].get('f', 0.0)+overall[d]['f']
    if len(overall) > 2:
        overall[MACRO]['f']=overall[MACRO]['f']/float(len(overall)-2)
        overall[MACRO]['p']=overall[MACRO]['p']/float(len(overall)-2)
        overall[MACRO]['r']=overall[MACRO]['r']/float(len(overall)-2)
    else:
        overall[MACRO]['f']="n/a"
        overall[MACRO]['p']="n/a"
        overall[MACRO]['r']="n/a"
    return overall

In [None]:
def train(model, iterator, optimizer, criterion, model_type, label):
    
    epoch_loss = 0
    epoch_fscore = 0
    
    model.train()
    fscores = {}    
    for batch in iterator:        
        optimizer.zero_grad()
        
        if model_type == 0:            
            predictions = model(batch.all_text)
        else:
            predictions = model(batch.bh_text, batch.ep_text)
            
        labels = getattr(batch, label).unsqueeze(1)

        loss = criterion(predictions, labels)
    
        fscore, _ = f_measure(predictions, labels)            
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        #epoch_fscore += fscores['micro']["f"]
        fscores = update_fscores(fscore, fscores)
        
    return epoch_loss / len(iterator), fscores['micro']["f"]

In [None]:
def evaluate(model, iterator, criterion, model_type, label):
    
    epoch_loss = 0
    epoch_fscore = 0
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:
            if model_type == 0:
                predictions = model(batch.all_text)
            else:
                predictions = model(batch.bh_text, batch.ep_text)
        
            labels = getattr(batch, label).unsqueeze(1)
        
            loss = criterion(predictions, labels)
            
            fscores, _ = f_measure(predictions, labels)            

            epoch_loss += loss.item()
            epoch_fscore += fscores['micro']["f"]
        
    return epoch_loss / len(iterator), epoch_fscore / len(iterator)

In [None]:
def train_epoch(epoches, model, train_iterator, optimizer, criterion, model_type, model_name, label, 
                valid_iterator = None, interval = 50, early_stop = True, period = 30, gap = 0.005, threshold = 0.5):
    best_valid_loss = float('inf')
    best_valid_fscore = 0
    train_losses = []
    valid_losses = []
    train_accs = []
    valid_accs = []
    observed_time = 0
    for epoch in range(epoches):
        start_time = time.time()
        
        train_loss, train_acc = train(model, train_iterator, optimizer, criterion, model_type, label)
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
        if valid_iterator:
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, model_type, label)
            valid_losses.append(valid_loss)
            valid_accs.append(valid_acc)
        else:
            valid_loss = 0 
        
        if (epoch + 1) % interval == 0:
            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train micro-F-score: {train_acc*100:.2f}%')
            if valid_iterator:
                print(f'\t Val. Loss: {valid_loss:.3f} |  Val. micro-F-score: {valid_acc*100:.2f}%')
        elif epoch == epoches - 1:
            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train micro-F-score: {train_acc*100:.2f}%')
            if valid_iterator:
                print(f'\t Val. Loss: {valid_loss:.3f} |  Val. micro-F-score: {valid_acc*100:.2f}%')
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), os.path.join(DATA_FOLDER, model_name + '_loss.pt'))
        if early_stop and best_valid_fscore > threshold and best_valid_fscore - valid_acc > gap:
            observed_time += 1
            print(f'\rBest validation F-measure: {best_valid_fscore:.3f}/Current F-measure: {valid_acc:.3f} [Times: {observed_time}/{period}]')  
            if observed_time >= period:
                print(f'Early stop at epoch {epoch+1:02}.')
                break                        
        if valid_acc > best_valid_fscore:
            best_valid_fscore = valid_acc
            torch.save(model.state_dict(), os.path.join(DATA_FOLDER, model_name + '_fscore.pt'))
            observed_time = 0        
    return train_losses, valid_losses, train_accs, valid_accs

In [None]:
def test(model, iterator, model_type, label, model_name = None):    
    epoch_loss = 0
    epoch_acc = 0
    if model_name:
        model.load_state_dict(torch.load(os.path.join(DATA_FOLDER, model_name + '.pt')))

    model.eval()
    
    diagnoses = {}
    predicts = []
    MICRO = 'micro'
    MACRO = 'macro'
    diagnoses[MICRO] = {}
    with torch.no_grad():                    
        for batch in iterator:
            if model_type == 0:
                predictions = model(batch.all_text)
            else:
                predictions = model(batch.bh_text, batch.ep_text)
            rounded_preds = torch.round(torch.sigmoid(predictions))
            predicts.extend(rounded_preds.data.tolist())
            labels = getattr(batch, label).unsqueeze(1)
            
            for index, value in enumerate(rounded_preds):
                for did, dvalue in enumerate(rounded_preds[index]):
                    v = dvalue.item()                    
                    if v == 1:
                        if dvalue == labels[index, did]:
                            if did not in diagnoses:
                                diagnoses[did] = {}                                
                            diagnoses[did]['tp'] = diagnoses[did].get('tp', 0) + 1
                            diagnoses[MICRO]['tp'] = diagnoses[MICRO].get('tp', 0) + 1 
                        else:
                            if did not in diagnoses:
                                diagnoses[did] = {}
                            diagnoses[did]['fp'] = diagnoses[did].get('fp', 0) + 1
                            diagnoses[MICRO]['fp'] = diagnoses[MICRO].get('fp', 0) + 1
                    elif v == 0:
                        if 1 == labels[index, did].item():
                            if did not in diagnoses:
                                diagnoses[did] = {}
                            diagnoses[did]['fn'] = diagnoses[did].get('fn', 0) + 1
                            diagnoses[MICRO]['fn'] = diagnoses[MICRO].get('fn', 0) + 1
                        else:
                            if did not in diagnoses:
                                diagnoses[did] = {}
                            diagnoses[did]['tn'] = diagnoses[did].get('tn', 0) + 1
                            diagnoses[MICRO]['tn'] = diagnoses[MICRO].get('tn', 0) + 1
    diagnoses[MACRO] = {}
    for d in diagnoses:        
        if d is MACRO:
            continue
        try:
            diagnoses[d]['p']=diagnoses[d].get('tp', 0)/(diagnoses[d].get('tp', 0)+diagnoses[d].get('fp', 0))
            if d is not MICRO:
                diagnoses[MACRO]['p']=diagnoses[MACRO].get('p', 0.0)+diagnoses[d]['p']                
        except:            
            diagnoses[d]['p']=0.0
            
        try:
            diagnoses[d]['r']=diagnoses[d].get('tp', 0)/(diagnoses[d].get('tp', 0)+diagnoses[d].get('fn', 0))
            if d is not MICRO:
                diagnoses[MACRO]['r']=diagnoses[MACRO].get('r', 0.0)+diagnoses[d]['r']
        except:
            diagnoses[d]['r']=0.0
        
        try:
            diagnoses[d]['f']=2/(1/diagnoses[d]['p']+1/diagnoses[d]['r'])
            if d is not MICRO:
                diagnoses[MACRO]['f']=diagnoses[MACRO].get('f', 0.0)+diagnoses[d]['f']
        except:
            diagnoses[d]['f']=0.0
    diagnoses[MACRO]['f']=diagnoses[MACRO].get('f', 0.0)/float(len(diagnoses)-2)
    diagnoses[MACRO]['p']=diagnoses[MACRO].get('p', 0.0)/float(len(diagnoses)-2)
    diagnoses[MACRO]['r']=diagnoses[MACRO].get('r', 0.0)/float(len(diagnoses)-2)
    return diagnoses, predicts

In [None]:
def bert_tokenize_and_cut(sentence):
    sentences = re.split(r'\s*<sep>(?:\s*<sep>)*\s*', sentence)
    filtered_sentence = list(filter(lambda sent: '<unk>' not in sent, sentences))
    sents = [tokenizer.tokenize(sent[:BERT_MAX_SEQUENCE-2]) for sent in filtered_sentence]
    tokens = []
    sents = [allsents.split() for allsents in 
             [' [SEP] '.join(sent) for sent in [[' '.join(token) for token in sents]]]]
    tokens.extend(sents[0])
    return tokens

def my_convert_tokens_to_ids(sents_tokens):
    sents_tokens = " ".join(sents_tokens)
    sents_tokens = re.split(r'(?i)\s*\[sep\](?:\s*\[sep\])*\s*', sents_tokens)
    sents_tokens = list(filter(lambda x: len(x) > 2, [('[CLS] '+sent+' [SEP]').split() for sent in sents_tokens]))
    sents = [tokenizer.convert_tokens_to_ids(tokens) for tokens in sents_tokens]
    tokens = []
    for sent in sents:
        tokens.extend(sent[:MAX_SEQUENCE-1-len(tokens)])
    tokens.append(BERT_EOS_IDX)
    return tokens

class BERTCNNBaseline(nn.Module):
    def __init__(self, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx, bert, sep_token):
        
        super().__init__()             
        self.pad_idx = pad_idx
        self.bert = bert
        self.sep_token = sep_token
        self.bert.eval()
        self.embedding_dim = bert.config.to_dict()['hidden_size'] # here we add all the last four layers
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, self.embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
    def create_attention_masks(self, ids):
        attention_masks = []
        for id in ids:
            id_mask = [float(i>0) for i in id]            
            attention_masks.append(id_mask)
        return torch.tensor(attention_masks).to(device)
    
    def embedding(self, batch):
        # ID:102 is used to separate sentences
        # [batch size, sent len]
        batch_embeddings = []
        for sents in batch:      
            key = ' '.join(str(x) for x in sents.data.tolist())
            key = re.sub(r'(\s+0)+\s*', '', key)
            if key in bert_cache:
                sent_embeddings = bert_cache[key]
            else:
                sep_idxes = (sents == self.sep_token).nonzero().squeeze(1).data.tolist()
                seq_lengths = []
                sents_ids = []
                pv = -1
                for k, v in enumerate(sep_idxes):                
                    sent_embedding = [self.pad_idx]*BERT_MAX_SEQUENCE
                    if k == 0:
                        seq_lengths.append(v+1)
                        sent_embedding[:v+1] = sents[:v+1].data.tolist()
                    else:
                        seq_lengths.append(v-pv)
                        sent_embedding[:v-pv] = sents[pv+1:v+1].data.tolist()
                    sents_ids.append(sent_embedding)
                    pv = v
                attention_masks = self.create_attention_masks(sents_ids)
                sents_ids = torch.tensor(sents_ids).to(device)
                sent_embeddings = []
                with torch.no_grad():
                    last_hidden_state, _, hidden_states = self.bert(sents_ids, attention_masks)
                    token_embeddings = torch.stack(hidden_states[:-1], dim=0)
                    token_embeddings = token_embeddings.permute(1, 2, 0, 3)
                    for id, tks in enumerate(token_embeddings):
                        token_vecs = []
                        for i in range(seq_lengths[id]):
                            #cat_vec = torch.cat((tks[i][-1], tks[i][-2], tks[i][-3], tks[i][-4]), dim =0)
                            sum_all_vec = torch.sum(tks[i][:], dim =0)
                            token_vecs.append(sum_all_vec)
                            #token_vecs.append(cat_vec)
                        token_vecs=torch.stack(token_vecs, 0)
                        sent_embeddings.append(token_vecs)
                    sent_embeddings = torch.cat(sent_embeddings, 0)                
                    if sent_embeddings.shape[0] != MAX_SEQUENCE:
                        sent_embeddings = torch.cat((sent_embeddings, \
                                torch.zeros(MAX_SEQUENCE - sent_embeddings.shape[0], self.embedding_dim).to(device)), 0)
                    # # sentences, # words, # layers, # features
                bert_cache[key] = sent_embeddings
            batch_embeddings.append(sent_embeddings.to(device))
        batch_embeddings = torch.stack(batch_embeddings, 0)
        return batch_embeddings        
    
    def forward(self, text):
                
        embedded = self.embedding(text)
        
        embedded = embedded.unsqueeze(1)
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        return self.fc(cat)

# Model Definition

In [None]:
class CNNBaseline(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        embedded = self.embedding(text)
                
        embedded = embedded.unsqueeze(1)
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))
        return self.fc(cat)

# Dataset Definition

In [None]:
class NTUHDataset(data.Dataset):
    #urls = ['Datasets\\NTUH\\corpus.txt']
    name = 'ntuh'
    dirname = 'ntuh'
    diagnosis_types = ['major_depressive', 'schizophrenia', 'biploar', 'minor_depressive', 'dementia']
    
    @staticmethod
    def sort_key(ex):
        return len(ex.all_text) # TODO add ep_text?

    def __init__(self, path, bh_text_field, ep_text_field, all_text_field,
                 major_label_field, sch_label_field, bipolar_label_field, minor_label_field, dementia_label_field,
                 **kwargs):
        fields = [('patient_id', None), 
                  ('bh_text', bh_text_field),
                  ('ep_text', ep_text_field),
                  ('all_text', all_text_field),
                  ('major_depressive', major_label_field),
                  ('schizophrenia', sch_label_field),
                  ('biploar', bipolar_label_field),
                  ('minor_depressive', minor_label_field),
                  ('dementia', dementia_label_field)]
        examples = []
        
        for fname in glob.iglob(path + '.txt'):
            with io.open(fname, 'r', encoding="utf-8") as f:
                for line in f:
                    pid, bh_text, ep_text, major_d, sc, bp, minor_d, de = line.split('\t')
                    all_text = "%s <sep> %s" % (bh_text, ep_text)
                    examples.append(data.Example.fromlist([pid, bh_text, ep_text, all_text, major_d, sc, bp, minor_d, de], 
                                                          fields))
        super(NTUHDataset, self).__init__(examples, fields, **kwargs)

    @classmethod
    def splits(cls, 
               bh_text_field, ep_text_field, all_text_field,
               major_label_field, sch_label_field, bipolar_label_field, minor_label_field, dementia_label_field,
               root='..\\Datasets\\NTUH',
               train='train_preprocessing', test='test_preprocessing', **kwargs):
        return super(NTUHDataset, cls).splits(
            path = root, root=root, 
            bh_text_field = bh_text_field, ep_text_field = ep_text_field, all_text_field = all_text_field, 
            major_label_field = major_label_field, sch_label_field = sch_label_field, 
            bipolar_label_field = bipolar_label_field, minor_label_field = minor_label_field, 
            dementia_label_field = dementia_label_field,
            train=train, validation=None, test=test, **kwargs)

# Pre-process

In [None]:
tokenize = str.split

In [None]:
BH_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
EP_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
ALL_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)

MAJ_LABEL = data.LabelField(dtype = torch.float)
SCH_LABEL = data.LabelField(dtype = torch.float)
BIP_LABEL = data.LabelField(dtype = torch.float)
MIN_LABEL = data.LabelField(dtype = torch.float)
DEM_LABEL = data.LabelField(dtype = torch.float)

In [None]:
full_train_data, test_data = NTUHDataset.splits(BH_TEXT, EP_TEXT, ALL_TEXT, 
                                           MAJ_LABEL, SCH_LABEL, BIP_LABEL, MIN_LABEL, DEM_LABEL)

train_data, valid_data = full_train_data.split(random_state = random.seed(SEED), split_ratio = TRAIN_RATIO)

In [None]:
BH_TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
EP_TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
ALL_TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)

MAJ_LABEL.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
SCH_LABEL.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
BIP_LABEL.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
MIN_LABEL.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
DEM_LABEL.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)

In [None]:
MAJ_LABEL.vocab.itos = ['0', '1']
MAJ_LABEL.vocab.stoi['1'] = 1
MAJ_LABEL.vocab.stoi['0'] = 0

# Create Iterator

In [None]:
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

# Initialize Our Model

In [None]:
INPUT_DIM = len(ALL_TEXT.vocab)
UNK_IDX = ALL_TEXT.vocab.stoi[ALL_TEXT.unk_token]
PAD_IDX = ALL_TEXT.vocab.stoi[ALL_TEXT.pad_token]
SEP_IDX = ALL_TEXT.vocab.stoi['[sep]']

print("Input dimension: %s\nUnknown word index: %s\nPadding index: %s\nSeperator index: %s" % \
      (INPUT_DIM, UNK_IDX, PAD_IDX, SEP_IDX))

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

model = CNNBaseline(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model

In [None]:
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[SEP_IDX] = torch.zeros(EMBEDDING_DIM)

# Train the Model

In [None]:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)

In [None]:
train_losses, valid_losses, train_accs, valid_accs = \
    train_epoch(N_EPOCHS, model, train_iterator, optimizer, criterion, 0, 
                'rand_0', NTUHDataset.diagnosis_types[0], valid_iterator)

# Result Analysis

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, train_losses, valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, train_accs, valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

In [None]:
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[0])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[0], 'rand_0_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')              

## Word2vec

In [None]:
vectors = Vectors(name='word2vec_skipgram_model.bin', cache=DATA_FOLDER)
WV_EMBEDDING_DIM = vectors.vectors.shape[1]

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

WV_ALL_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
WV_BH_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
WV_EP_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)

WV_MAJ_LABEL = data.LabelField(dtype = torch.float)
WV_SCH_LABEL = data.LabelField(dtype = torch.float)
WV_BIP_LABEL = data.LabelField(dtype = torch.float)
WV_MIN_LABEL = data.LabelField(dtype = torch.float)
WV_DEM_LABEL = data.LabelField(dtype = torch.float)

full_wv_train_data, wv_test_data = NTUHDataset.splits(WV_BH_TEXT, WV_EP_TEXT, WV_ALL_TEXT, 
                                           WV_MAJ_LABEL, WV_SCH_LABEL, WV_BIP_LABEL, 
                                                            WV_MIN_LABEL, WV_DEM_LABEL)

wv_train_data, wv_valid_data = full_wv_train_data.split(random_state = random.seed(SEED), 
                                                                split_ratio = TRAIN_RATIO)

WV_ALL_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_BH_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_EP_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_MAJ_LABEL.build_vocab(wv_train_data)
WV_SCH_LABEL.build_vocab(wv_train_data)
WV_BIP_LABEL.build_vocab(wv_train_data)
WV_MIN_LABEL.build_vocab(wv_train_data)
WV_DEM_LABEL.build_vocab(wv_train_data)

WV_MAJ_LABEL.vocab.itos = ['0', '1']
WV_MAJ_LABEL.vocab.stoi['1'] = 1
WV_MAJ_LABEL.vocab.stoi['0'] = 0

WV_ALL_INPUT_DIM = len(WV_ALL_TEXT.vocab)
WV_ALL_UNK_IDX = WV_ALL_TEXT.vocab.stoi[WV_ALL_TEXT.unk_token]
WV_ALL_PAD_IDX = WV_ALL_TEXT.vocab.stoi[WV_ALL_TEXT.pad_token]
WV_ALL_SEP_IDX = WV_ALL_TEXT.vocab.stoi['[sep]']
WV_BH_SEP_IDX = WV_BH_TEXT.vocab.stoi['[sep]']
WV_EP_SEP_IDX = WV_EP_TEXT.vocab.stoi['[sep]']

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

wv_train_iterator, wv_valid_iterator, wv_test_iterator = data.BucketIterator.splits(
    (wv_train_data, wv_valid_data, wv_test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

wv_model = CNNBaseline(WV_ALL_INPUT_DIM, WV_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, WV_ALL_PAD_IDX)

for s in WV_ALL_TEXT.vocab.stoi:    
    if s in vectors.stoi:
        with torch.no_grad():
            wv_model.embedding.weight[WV_ALL_TEXT.vocab.stoi[s]].copy_(vectors.vectors[vectors.stoi[s]])#.clone()
wv_model.embedding.weight.data[WV_ALL_UNK_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_SEP_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_PAD_IDX] = torch.zeros(WV_EMBEDDING_DIM)

wv_optimizer = optim.Adam([param for param in wv_model.parameters() if param.requires_grad == True])
wv_criterion = nn.BCEWithLogitsLoss()#pos_weight = POS_WEIGHT)
wv_model = wv_model.to(device)
wv_criterion = wv_criterion.to(device)

wv_train_losses, wv_valid_losses, wv_train_accs, wv_valid_accs = \
     train_epoch(N_EPOCHS, wv_model, wv_train_iterator, wv_optimizer, wv_criterion, 0, 
                 'wv_0', NTUHDataset.diagnosis_types[0], wv_valid_iterator, early_stop = True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, wv_train_losses, wv_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, wv_train_accs, wv_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[0])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[f]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[0], 'wv_0_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')        

## Glove

In [None]:
GLOVE_BH_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
GLOVE_EP_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
GLOVE_ALL_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)

GLOVE_MAJ_LABEL = data.LabelField(dtype = torch.float)
GLOVE_SCH_LABEL = data.LabelField(dtype = torch.float)
GLOVE_BIP_LABEL = data.LabelField(dtype = torch.float)
GLOVE_MIN_LABEL = data.LabelField(dtype = torch.float)
GLOVE_DEM_LABEL = data.LabelField(dtype = torch.float)

full_glove_train_data, glove_test_data = NTUHDataset.splits(GLOVE_BH_TEXT, GLOVE_EP_TEXT, GLOVE_ALL_TEXT, 
                                           GLOVE_MAJ_LABEL, GLOVE_SCH_LABEL, GLOVE_BIP_LABEL, 
                                                            GLOVE_MIN_LABEL, GLOVE_DEM_LABEL)

glove_train_data, glove_valid_data = full_glove_train_data.split(random_state = random.seed(SEED), 
                                                                 split_ratio = TRAIN_RATIO)

GLOVE_ALL_TEXT.build_vocab(glove_train_data, max_size = MAX_VOCAB_SIZE, vectors = "glove.6B.300d", 
                 unk_init = torch.Tensor.normal_)
GLOVE_BH_TEXT.build_vocab(glove_train_data, max_size = MAX_VOCAB_SIZE, vectors = "glove.6B.300d", 
                 unk_init = torch.Tensor.normal_)
GLOVE_EP_TEXT.build_vocab(glove_train_data, max_size = MAX_VOCAB_SIZE, vectors = "glove.6B.300d", 
                 unk_init = torch.Tensor.normal_)

GLOVE_MAJ_LABEL.build_vocab(glove_train_data)
GLOVE_SCH_LABEL.build_vocab(glove_train_data)
GLOVE_BIP_LABEL.build_vocab(glove_train_data)
GLOVE_MIN_LABEL.build_vocab(glove_train_data)
GLOVE_DEM_LABEL.build_vocab(glove_train_data)

GLOVE_MAJ_LABEL.vocab.itos = ['0', '1']
GLOVE_MAJ_LABEL.vocab.stoi['1'] = 1
GLOVE_MAJ_LABEL.vocab.stoi['0'] = 0

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

GLOVE_INPUT_DIM = len(GLOVE_ALL_TEXT.vocab)
GLOVE_PAD_IDX = GLOVE_ALL_TEXT.vocab.stoi[GLOVE_ALL_TEXT.pad_token]
GLOVE_UNK_IDX = GLOVE_ALL_TEXT.vocab.stoi[GLOVE_ALL_TEXT.unk_token]
GLOVE_SEP_IDX = GLOVE_ALL_TEXT.vocab.stoi['[sep]']
GLOVE_EMBEDDING_DIM = GLOVE_ALL_TEXT.vocab.vectors.shape[1]

print("Input dimension: %s\nUnknown word index: %s\nPadding index: %s\nSeparator index: %s" % 
      (GLOVE_INPUT_DIM, GLOVE_UNK_IDX, GLOVE_PAD_IDX, GLOVE_SEP_IDX))

In [None]:
glove_train_iterator, glove_valid_iterator, glove_test_iterator = data.BucketIterator.splits(
    (glove_train_data, glove_valid_data, glove_test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

glove_model = CNNBaseline(GLOVE_INPUT_DIM, GLOVE_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          GLOVE_PAD_IDX)
glove_model.embedding.weight.data.copy_(GLOVE_ALL_TEXT.vocab.vectors)
glove_model.embedding.weight.data[GLOVE_UNK_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_PAD_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_SEP_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)

glove_optimizer = optim.Adam([param for param in glove_model.parameters() if param.requires_grad == True])
glove_criterion = nn.BCEWithLogitsLoss()#pos_weight = POS_WEIGHT)
glove_model = glove_model.to(device)
glove_criterion = glove_criterion.to(device)

glove_train_losses, glove_valid_losses, glove_train_accs, glove_valid_accs = \
     train_epoch(N_EPOCHS, glove_model, glove_train_iterator, glove_optimizer, glove_criterion, 0, 
                 'glove_0', NTUHDataset.diagnosis_types[0], glove_valid_iterator, early_stop=True, period =30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, glove_train_losses, glove_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, glove_train_accs, glove_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[0])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[0], 'glove_0_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')        

### BERT

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

BERT_MODEL = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case = True)
bert = BertModel.from_pretrained(BERT_MODEL, output_hidden_states = True)

BERT_EOS_TOKEN = tokenizer.sep_token
BERT_PAD_TOKEN = tokenizer.pad_token
BERT_UNK_TOKEN = tokenizer.unk_token

BERT_EOS_IDX = tokenizer.convert_tokens_to_ids(BERT_EOS_TOKEN)
BERT_PAD_IDX = tokenizer.convert_tokens_to_ids(BERT_PAD_TOKEN)
BERT_UNK_IDX = tokenizer.convert_tokens_to_ids(BERT_UNK_TOKEN)

BERT_MAX_SEQUENCE = tokenizer.max_model_input_sizes[BERT_MODEL]

BERT_ALL_TEXT = data.Field(batch_first = True,
                  use_vocab = False,
                  tokenize = bert_tokenize_and_cut,
                  preprocessing = my_convert_tokens_to_ids,
                  pad_token = BERT_PAD_IDX,
                  unk_token = BERT_UNK_IDX, lower = True)

BERT_BH_TEXT = data.Field(batch_first = True,
                  use_vocab = False,
                  tokenize = bert_tokenize_and_cut,
                  preprocessing = my_convert_tokens_to_ids,
                  pad_token = BERT_PAD_IDX,
                  unk_token = BERT_UNK_IDX, lower = True)

BERT_EP_TEXT = data.Field(batch_first = True,
                  use_vocab = False,
                  tokenize = bert_tokenize_and_cut,
                  preprocessing = my_convert_tokens_to_ids,
                  pad_token = BERT_PAD_IDX,
                  unk_token = BERT_UNK_IDX, lower = True)

BERT_MAJ_LABEL = data.LabelField(dtype = torch.float)
BERT_SCH_LABEL = data.LabelField(dtype = torch.float)
BERT_BIP_LABEL = data.LabelField(dtype = torch.float)
BERT_MIN_LABEL = data.LabelField(dtype = torch.float)
BERT_DEM_LABEL = data.LabelField(dtype = torch.float)

full_bert_train_data, bert_test_data = NTUHDataset.splits(BERT_BH_TEXT, BERT_EP_TEXT, BERT_ALL_TEXT, 
                                           BERT_MAJ_LABEL, BERT_SCH_LABEL, BERT_BIP_LABEL, 
                                                            BERT_MIN_LABEL, BERT_DEM_LABEL)

bert_train_data, bert_valid_data = full_bert_train_data.split(random_state = random.seed(SEED), 
                                                                 split_ratio = TRAIN_RATIO)

bert_cache=torch.load('cnn_bert_cache_all.pt')

BERT_MAJ_LABEL.build_vocab(bert_train_data)
BERT_SCH_LABEL.build_vocab(bert_train_data)
BERT_BIP_LABEL.build_vocab(bert_train_data)
BERT_MIN_LABEL.build_vocab(bert_train_data)
BERT_DEM_LABEL.build_vocab(bert_train_data)

BERT_MAJ_LABEL.vocab.itos = ['0', '1']
BERT_MAJ_LABEL.vocab.stoi['1'] = 1
BERT_MAJ_LABEL.vocab.stoi['0'] = 0

bert_train_iterator, bert_valid_iterator, bert_test_iterator = data.BucketIterator.splits(
    (bert_train_data, bert_valid_data, bert_test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

bert_model = BERTCNNBaseline(N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          BERT_PAD_IDX, bert, BERT_EOS_IDX)

In [None]:
bert_optimizer = optim.Adam([param for param in bert_model.parameters() if param.requires_grad == True])
bert_criterion = nn.BCEWithLogitsLoss()#pos_weight = POS_WEIGHT)
bert_model = bert_model.to(device)
bert_criterion = bert_criterion.to(device)

bert_train_losses, bert_valid_losses, bert_train_accs, bert_valid_accs = \
     train_epoch(N_EPOCHS, bert_model, bert_train_iterator, bert_optimizer, bert_criterion, 0, 
                 'bert_0', NTUHDataset.diagnosis_types[0], bert_valid_iterator, early_stop= True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, bert_train_losses, bert_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, bert_train_accs, bert_valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[0])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[0], 'bert_0_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')              

# Schizophrenia

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
model = CNNBaseline(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[SEP_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[1])
model = model.to(device)
criterion = criterion.to(device)

train_losses, valid_losses, train_accs, valid_accs = \
    train_epoch(N_EPOCHS, model, train_iterator, optimizer, criterion, 0, 
                'rand_1', NTUHDataset.diagnosis_types[1], valid_iterator)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, train_losses, valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, train_accs, valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[1])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[1], 'rand_1_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')              

## Word2vec

In [None]:
vectors = Vectors(name='word2vec_skipgram_model.bin', cache=DATA_FOLDER)
WV_EMBEDDING_DIM = vectors.vectors.shape[1]

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

wv_model = CNNBaseline(WV_ALL_INPUT_DIM, WV_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, WV_ALL_PAD_IDX)

for s in WV_ALL_TEXT.vocab.stoi:
    if s in vectors.stoi:
        with torch.no_grad():
            wv_model.embedding.weight[WV_ALL_TEXT.vocab.stoi[s]].copy_(vectors.vectors[vectors.stoi[s]])#.clone()
wv_model.embedding.weight.data[WV_ALL_UNK_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_SEP_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_PAD_IDX] = torch.zeros(WV_EMBEDDING_DIM)

wv_optimizer = optim.Adam([param for param in wv_model.parameters() if param.requires_grad == True])
wv_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[1])
wv_model = wv_model.to(device)
wv_criterion = wv_criterion.to(device)

wv_train_losses, wv_valid_losses, wv_train_accs, wv_valid_accs = \
     train_epoch(N_EPOCHS, wv_model, wv_train_iterator, wv_optimizer, wv_criterion, 0, 
                 'wv_1', NTUHDataset.diagnosis_types[1], wv_valid_iterator, early_stop = True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, wv_train_losses, wv_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, wv_train_accs, wv_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[1])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[1], 'wv_1_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                         

### Glove

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

glove_model = CNNBaseline(GLOVE_INPUT_DIM, GLOVE_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          GLOVE_PAD_IDX)
glove_model.embedding.weight.data.copy_(GLOVE_ALL_TEXT.vocab.vectors)
glove_model.embedding.weight.data[GLOVE_UNK_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_PAD_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_SEP_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)

glove_optimizer = optim.Adam([param for param in glove_model.parameters() if param.requires_grad == True])
glove_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[1])
glove_model = glove_model.to(device)
glove_criterion = glove_criterion.to(device)

glove_train_losses, glove_valid_losses, glove_train_accs, glove_valid_accs = \
     train_epoch(N_EPOCHS, glove_model, glove_train_iterator, glove_optimizer, glove_criterion, 0, 
                 'glove_1', NTUHDataset.diagnosis_types[1], glove_valid_iterator, early_stop=True, period =30)


In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, glove_train_losses, glove_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, glove_train_accs, glove_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[1])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[1], 'glove_1_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

### BERT

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

bert_model = BERTCNNBaseline(N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          BERT_PAD_IDX, bert, BERT_EOS_IDX)
                          
                         
bert_optimizer = optim.Adam([param for param in bert_model.parameters() if param.requires_grad == True])
bert_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[1])
bert_model = bert_model.to(device)
bert_criterion = bert_criterion.to(device)

bert_train_losses, bert_valid_losses, bert_train_accs, bert_valid_accs = \
     train_epoch(N_EPOCHS, bert_model, bert_train_iterator, bert_optimizer, bert_criterion, 0, 
                 'bert_1', NTUHDataset.diagnosis_types[1], bert_valid_iterator, early_stop= True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, bert_train_losses, bert_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, bert_train_accs, bert_valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[1])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[1], 'bert_1_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[1]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

## Biploar

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

model = CNNBaseline(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[SEP_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[2])
model = model.to(device)
criterion = criterion.to(device)

train_losses, valid_losses, train_accs, valid_accs = \
    train_epoch(N_EPOCHS, model, train_iterator, optimizer, criterion, 0, 
                'rand_2', NTUHDataset.diagnosis_types[2], valid_iterator)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, train_losses, valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, train_accs, valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[2])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[2], 'rand_2_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                              

In [None]:
vectors = Vectors(name='word2vec_skipgram_model.bin', cache=DATA_FOLDER)
WV_EMBEDDING_DIM = vectors.vectors.shape[1]

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

wv_model = CNNBaseline(WV_ALL_INPUT_DIM, WV_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, WV_ALL_PAD_IDX)

for s in WV_ALL_TEXT.vocab.stoi:
    if s in vectors.stoi:
        with torch.no_grad():
            wv_model.embedding.weight[WV_ALL_TEXT.vocab.stoi[s]].copy_(vectors.vectors[vectors.stoi[s]])#.clone()
wv_model.embedding.weight.data[WV_ALL_UNK_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_SEP_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_PAD_IDX] = torch.zeros(WV_EMBEDDING_DIM)

wv_optimizer = optim.Adam([param for param in wv_model.parameters() if param.requires_grad == True])
wv_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[2])
wv_model = wv_model.to(device)
wv_criterion = wv_criterion.to(device)

wv_train_losses, wv_valid_losses, wv_train_accs, wv_valid_accs = \
     train_epoch(N_EPOCHS, wv_model, wv_train_iterator, wv_optimizer, wv_criterion, 0, 
                 'wv_2', NTUHDataset.diagnosis_types[2], wv_valid_iterator, early_stop = True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, wv_train_losses, wv_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, wv_train_accs, wv_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[2])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[2], 'wv_2_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                         

### Glove

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

glove_model = CNNBaseline(GLOVE_INPUT_DIM, GLOVE_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          GLOVE_PAD_IDX)
glove_model.embedding.weight.data.copy_(GLOVE_ALL_TEXT.vocab.vectors)
glove_model.embedding.weight.data[GLOVE_UNK_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_PAD_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_SEP_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)

glove_optimizer = optim.Adam([param for param in glove_model.parameters() if param.requires_grad == True])
glove_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[2])
glove_model = glove_model.to(device)
glove_criterion = glove_criterion.to(device)

glove_train_losses, glove_valid_losses, glove_train_accs, glove_valid_accs = \
     train_epoch(N_EPOCHS, glove_model, glove_train_iterator, glove_optimizer, glove_criterion, 0, 
                 'glove_2', NTUHDataset.diagnosis_types[2], glove_valid_iterator, early_stop=True, period =30)


In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, glove_train_losses, glove_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, glove_train_accs, glove_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[2])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[2], 'glove_2_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

### BERT

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

bert_model = BERTCNNBaseline(N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          BERT_PAD_IDX, bert, BERT_EOS_IDX)
                         
bert_optimizer = optim.Adam([param for param in bert_model.parameters() if param.requires_grad == True])
bert_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[2])
bert_model = bert_model.to(device)
bert_criterion = bert_criterion.to(device)

bert_train_losses, bert_valid_losses, bert_train_accs, bert_valid_accs = \
     train_epoch(N_EPOCHS, bert_model, bert_train_iterator, bert_optimizer, bert_criterion, 0, 
                 'bert_2', NTUHDataset.diagnosis_types[2], bert_valid_iterator, early_stop= True, period = 30)


In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, bert_train_losses, bert_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, bert_train_accs, bert_valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[2])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

In [None]:
test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[2], 'bert_2_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[2]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

## Minor_depressive

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

model = CNNBaseline(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[SEP_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[3])
model = model.to(device)
criterion = criterion.to(device)

train_losses, valid_losses, train_accs, valid_accs = \
    train_epoch(N_EPOCHS, model, train_iterator, optimizer, criterion, 0, 
                'rand_3', NTUHDataset.diagnosis_types[3], valid_iterator)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, train_losses, valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, train_accs, valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[3])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[3], 'rand_3_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                              

## Word2vec

In [None]:
vectors = Vectors(name='word2vec_skipgram_model.bin', cache=DATA_FOLDER)
WV_EMBEDDING_DIM = vectors.vectors.shape[1]

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

wv_model = CNNBaseline(WV_ALL_INPUT_DIM, WV_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, WV_ALL_PAD_IDX)

for s in WV_ALL_TEXT.vocab.stoi:
    if s in vectors.stoi:
        with torch.no_grad():
            wv_model.embedding.weight[WV_ALL_TEXT.vocab.stoi[s]].copy_(vectors.vectors[vectors.stoi[s]])#.clone()
wv_model.embedding.weight.data[WV_ALL_UNK_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_SEP_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_PAD_IDX] = torch.zeros(WV_EMBEDDING_DIM)

wv_optimizer = optim.Adam([param for param in wv_model.parameters() if param.requires_grad == True])
wv_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[3])
wv_model = wv_model.to(device)
wv_criterion = wv_criterion.to(device)

wv_train_losses, wv_valid_losses, wv_train_accs, wv_valid_accs = \
     train_epoch(N_EPOCHS, wv_model, wv_train_iterator, wv_optimizer, wv_criterion, 0, 
                 'wv_3', NTUHDataset.diagnosis_types[3], wv_valid_iterator, early_stop = True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, wv_train_losses, wv_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, wv_train_accs, wv_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[3])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[3], 'wv_3_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                         

### Glove

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

glove_model = CNNBaseline(GLOVE_INPUT_DIM, GLOVE_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          GLOVE_PAD_IDX)
glove_model.embedding.weight.data.copy_(GLOVE_ALL_TEXT.vocab.vectors)
glove_model.embedding.weight.data[GLOVE_UNK_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_PAD_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_SEP_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)

glove_optimizer = optim.Adam([param for param in glove_model.parameters() if param.requires_grad == True])
glove_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[3])
glove_model = glove_model.to(device)
glove_criterion = glove_criterion.to(device)

glove_train_losses, glove_valid_losses, glove_train_accs, glove_valid_accs = \
     train_epoch(N_EPOCHS, glove_model, glove_train_iterator, glove_optimizer, glove_criterion, 0, 
                 'glove_3', NTUHDataset.diagnosis_types[3], glove_valid_iterator, early_stop=True, period =30)



In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, glove_train_losses, glove_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, glove_train_accs, glove_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[3])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[3], 'glove_3_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

### BERT

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

bert_model = BERTCNNBaseline(N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          BERT_PAD_IDX, bert, BERT_EOS_IDX)                          
                         
bert_optimizer = optim.Adam([param for param in bert_model.parameters() if param.requires_grad == True])
bert_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[3])
bert_model = bert_model.to(device)
bert_criterion = bert_criterion.to(device)

bert_train_losses, bert_valid_losses, bert_train_accs, bert_valid_accs = \
     train_epoch(N_EPOCHS, bert_model, bert_train_iterator, bert_optimizer, bert_criterion, 0, 
                 'bert_3', NTUHDataset.diagnosis_types[3], bert_valid_iterator, early_stop= True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, bert_train_losses, bert_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, bert_train_accs, bert_valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[3])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[3], 'bert_3_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[3]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

## dementia

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

model = CNNBaseline(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[SEP_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[4])
model = model.to(device)
criterion = criterion.to(device)

train_losses, valid_losses, train_accs, valid_accs = \
    train_epoch(N_EPOCHS, model, train_iterator, optimizer, criterion, 0, 
                'rand_4', NTUHDataset.diagnosis_types[4], valid_iterator)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, train_losses, valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, train_accs, valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[4])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(model, test_iterator, 0, NTUHDataset.diagnosis_types[4], 'rand_4_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                              

## Word2vec

In [None]:
vectors = Vectors(name='word2vec_skipgram_model.bin', cache=DATA_FOLDER)
WV_EMBEDDING_DIM = vectors.vectors.shape[1]

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

WV_ALL_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
WV_BH_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)
WV_EP_TEXT = data.Field(tokenize = tokenize_and_cut, batch_first = True, lower = True)

WV_MAJ_LABEL = data.LabelField(dtype = torch.float)
WV_SCH_LABEL = data.LabelField(dtype = torch.float)
WV_BIP_LABEL = data.LabelField(dtype = torch.float)
WV_MIN_LABEL = data.LabelField(dtype = torch.float)
WV_DEM_LABEL = data.LabelField(dtype = torch.float)

full_wv_train_data, wv_test_data = NTUHDataset.splits(WV_BH_TEXT, WV_EP_TEXT, WV_ALL_TEXT, 
                                           WV_MAJ_LABEL, WV_SCH_LABEL, WV_BIP_LABEL, 
                                                            WV_MIN_LABEL, WV_DEM_LABEL)

wv_train_data, wv_valid_data = full_wv_train_data.split(random_state = random.seed(SEED), 
                                                                split_ratio = TRAIN_RATIO)

WV_ALL_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_BH_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_EP_TEXT.build_vocab(wv_train_data, 
                 max_size = MAX_VOCAB_SIZE)#, 

WV_MAJ_LABEL.build_vocab(wv_train_data)
WV_SCH_LABEL.build_vocab(wv_train_data)
WV_BIP_LABEL.build_vocab(wv_train_data)
WV_MIN_LABEL.build_vocab(wv_train_data)
WV_DEM_LABEL.build_vocab(wv_train_data)

WV_ALL_INPUT_DIM = len(WV_ALL_TEXT.vocab)
WV_ALL_UNK_IDX = WV_ALL_TEXT.vocab.stoi[WV_ALL_TEXT.unk_token]
WV_ALL_PAD_IDX = WV_ALL_TEXT.vocab.stoi[WV_ALL_TEXT.pad_token]
WV_ALL_SEP_IDX = WV_ALL_TEXT.vocab.stoi['[sep]']
WV_BH_SEP_IDX = WV_BH_TEXT.vocab.stoi['[sep]']
WV_EP_SEP_IDX = WV_EP_TEXT.vocab.stoi['[sep]']

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

wv_train_iterator, wv_valid_iterator, wv_test_iterator = data.BucketIterator.splits(
    (wv_train_data, wv_valid_data, wv_test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

wv_model = CNNBaseline(WV_ALL_INPUT_DIM, WV_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, WV_ALL_PAD_IDX)

for s in WV_ALL_TEXT.vocab.stoi:    
    if s in vectors.stoi:
        with torch.no_grad():
            wv_model.embedding.weight[WV_ALL_TEXT.vocab.stoi[s]].copy_(vectors.vectors[vectors.stoi[s]])#.clone()
wv_model.embedding.weight.data[WV_ALL_UNK_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_SEP_IDX] = torch.zeros(WV_EMBEDDING_DIM)
wv_model.embedding.weight.data[WV_ALL_PAD_IDX] = torch.zeros(WV_EMBEDDING_DIM)

wv_optimizer = optim.Adam([param for param in wv_model.parameters() if param.requires_grad == True])
wv_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[4])
wv_model = wv_model.to(device)
wv_criterion = wv_criterion.to(device)

wv_train_losses, wv_valid_losses, wv_train_accs, wv_valid_accs = \
     train_epoch(N_EPOCHS, wv_model, wv_train_iterator, wv_optimizer, wv_criterion, 0, 
                 'wv_4', NTUHDataset.diagnosis_types[4], wv_valid_iterator, early_stop = True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, wv_train_losses, wv_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, wv_train_accs, wv_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[4])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(wv_model, wv_test_iterator, 0, NTUHDataset.diagnosis_types[4], 'wv_4_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                         

### Glove

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

glove_model = CNNBaseline(GLOVE_INPUT_DIM, GLOVE_EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          GLOVE_PAD_IDX)
glove_model.embedding.weight.data.copy_(GLOVE_ALL_TEXT.vocab.vectors)
glove_model.embedding.weight.data[GLOVE_UNK_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_PAD_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)
glove_model.embedding.weight.data[GLOVE_SEP_IDX] = torch.zeros(GLOVE_EMBEDDING_DIM)

glove_optimizer = optim.Adam([param for param in glove_model.parameters() if param.requires_grad == True])
glove_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[4])
glove_model = glove_model.to(device)
glove_criterion = glove_criterion.to(device)

glove_train_losses, glove_valid_losses, glove_train_accs, glove_valid_accs = \
     train_epoch(N_EPOCHS, glove_model, glove_train_iterator, glove_optimizer, glove_criterion, 0, 
                 'glove_4', NTUHDataset.diagnosis_types[4], glove_valid_iterator, early_stop=True, period =30)


In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, glove_train_losses, glove_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, glove_train_accs, glove_valid_accs, 'Training/Validation F-Measure', {'label': 'Training F-Measure'}, {'label': 'Validation F-Measure'})

test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[4])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[0]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')

print('#'*40)
test_f_scores, predicts = test(glove_model, glove_test_iterator, 0, NTUHDataset.diagnosis_types[4], 'glove_4_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               

### BERT

In [None]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

bert_model = BERTCNNBaseline(N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, 
                          BERT_PAD_IDX, bert, BERT_EOS_IDX)
                          
                         
bert_optimizer = optim.Adam([param for param in bert_model.parameters() if param.requires_grad == True])
bert_criterion = nn.BCEWithLogitsLoss(pos_weight = POS_WEIGHT[4])
bert_model = bert_model.to(device)
bert_criterion = bert_criterion.to(device)

bert_train_losses, bert_valid_losses, bert_train_accs, bert_valid_accs = \
     train_epoch(N_EPOCHS, bert_model, bert_train_iterator, bert_optimizer, bert_criterion, 0, 
                 'bert_4', NTUHDataset.diagnosis_types[4], bert_valid_iterator, early_stop= True, period = 30)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, figsize=(15,10))
analysis_plotter(fig, ax1, bert_train_losses, bert_valid_losses, 'Training/Validation Loss', {'label': 'Training Loss'}, {'label': 'Validation Loss'})
analysis_plotter(fig, ax2, bert_train_accs, bert_valid_accs, 'Training/Validation F-score', {'label': 'Training F-score'}, {'label': 'Validation F-score'})

test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[4])

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
              
test_f_scores, predicts = test(bert_model, bert_test_iterator, 0, NTUHDataset.diagnosis_types[4], 'bert_4_fscore')

for f in test_f_scores:
    if f is MICRO or f is MACRO:
        print(f'{f}-average:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')
    else:
        print(f'{NTUHDataset.diagnosis_types[4]}:\n\tprecision: {test_f_scores[f]["p"]:0.3f}\n\trecall: {test_f_scores[f]["r"]:0.3f}\n\tf-score: {test_f_scores[f]["f"]:0.3f}\n')                               