# Start

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/My\ Drive/NLP/FINAL
#%cd /content/drive/My\ Drive/New

/content/drive/My Drive/NLP/FINAL


# Model

## Preprocessing

In [None]:
import numpy as np
import torchtext
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import pickle
import string
from tqdm.notebook import tqdm
import time
import pandas as pd

In [None]:
class Params():
    def __init__(self):
        self.num_vocab = 400001
        self.num_pos = 17
        self.num_dep = 44
        self.num_ent = 66

        self.dim_word = 200
        self.dim_pos = 25
        self.dim_dep = 25
        self.dim_ent = 25

        self.dim_hidden = 100

        self.num_class = 10
        self.batch_size = 20

        self.learn_rate = 1e-3
        self.learn_rate_decay = 0.92
        self.step_size = 50
        self.weight_decay = 1e-5
        self.dropout = 0.0
        self.alpha = 1
        self.total_epoch = 5
        self.max_norm = 10

params = Params()

In [None]:
glove = torchtext.vocab.GloVe(name='6B', dim=200)
glove.itos.append('UKN')
glove.stoi['UKN'] = params.num_vocab - 1

In [None]:
pos_dict = {'ADJ': 0, 'ADP': 1, 'ADV': 2, 'AUX': 3, 'CCONJ': 4, 'DET': 5, 'INTJ': 6, 'NOUN': 7, 'NUM': 8, 
            'PART': 9, 'PRON': 10, 'PROPN': 11, 'PUNCT': 12, 'SCONJ': 13, 'SYM': 14, 'VERB': 15, 'X': 16}

In [None]:
dep_dict = {'acl': 0, 'acl:relcl': 1, 'advcl': 2, 'advmod': 3, 'amod': 4, 'appos': 5, 'aux': 6, 'aux:pass': 7, 'case': 8, 
            'cc': 9, 'cc:preconj': 10, 'ccomp': 11, 'compound': 12, 'compound:prt': 13, 'conj': 14, 'cop': 15, 'csubj': 16, 
            'csubj:pass': 17, 'det': 18, 'det:predet': 19, 'discourse': 20, 'expl': 21, 'fixed': 22, 'flat': 23, 'goeswith': 24, 
            'iobj': 25, 'list': 26, 'mark': 27, 'nmod': 28, 'nmod:npmod': 29, 'nmod:poss': 30, 'nmod:tmod': 31, 'nsubj': 32, 
            'nsubj:pass': 33, 'nummod': 34, 'obj': 35, 'obl': 36, 'obl:npmod': 37, 'obl:tmod': 38, 'parataxis': 39, 'punct': 40, 
            'root': 41, 'vocative': 42, 'xcomp': 43}

In [None]:
ent_dict = {'B-CARDINAL': 0, 'B-DATE': 1, 'B-EVENT': 2, 'B-FAC': 3, 'B-GPE': 4, 'B-LAW': 5, 'B-LOC': 6, 'B-MONEY': 7, 'B-NORP': 8, 
            'B-ORG': 9, 'B-PERCENT': 10, 'B-PERSON': 11, 'B-PRODUCT': 12,'B-QUANTITY': 13, 'B-TIME': 14, 'B-WORK_OF_ART': 15, 
            'I-CARDINAL': 16, 'I-DATE': 17, 'I-EVENT': 18, 'I-FAC': 19, 'I-GPE': 20, 'I-LAW': 21, 'I-LOC': 22, 'I-MONEY': 23, 
            'I-NORP': 24, 'I-ORG': 25, 'I-PERCENT': 26,'I-PERSON': 27, 'I-PRODUCT': 28, 'I-QUANTITY': 29, 'I-TIME': 30, 'I-WORK_OF_ART': 31, 
            'L-CARDINAL': 32, 'L-DATE': 33, 'L-EVENT': 34, 'L-FAC': 35, 'L-GPE': 36, 'L-LAW': 37, 'L-LOC': 38, 'L-MONEY': 39, 'L-NORP': 40, 
            'L-ORG': 41, 'L-PERCENT': 42, 'L-PERSON': 43, 'L-PRODUCT': 44, 'L-QUANTITY': 45,'L-TIME': 46, 'L-WORK_OF_ART': 47, 'O': 48, 
            'U-CARDINAL': 49, 'U-DATE': 50, 'U-EVENT': 51, 'U-FAC': 52, 'U-GPE': 53, 'U-LANGUAGE': 54, 'U-LAW': 55, 'U-LOC': 56, 'U-MONEY': 57, 
            'U-NORP': 58, 'U-ORDINAL': 59, 'U-ORG': 60, 'U-PERSON': 61, 'U-PRODUCT': 62, 'U-QUANTITY': 63, 'U-TIME': 64, 'U-WORK_OF_ART': 65}

In [None]:
class Data(object):
    def __init__(self, words_seq, pos_seq, deps_seq, ents_seq, indx_path1, indx_path2, dep_path1, dep_path2, childs_path1, childs_path2, rels, rels1, rels2, 
                 len_path1, len_path2, num_child_path1, num_child_path2, words_seq_id, pos_seq_id, deps_seq_id, dep_path1_id, dep_path2_id, ents_seq_id):
        self.words_seq = words_seq
        self.pos_seq = pos_seq
        self.deps_seq = deps_seq
        self.ents_seq = ents_seq
        self.indx_path1 = indx_path1
        self.indx_path2 = indx_path2
        self.dep_path1 = dep_path1
        self.dep_path2 = dep_path2
        self.childs_path1 = childs_path1
        self.childs_path2 = childs_path2
        self.rels = rels
        self.rels1 = rels1
        self.rels2 = rels2
        self.len_path1 = len_path1
        self.len_path2 = len_path2
        self.num_child_path1 = num_child_path1
        self.num_child_path2 = num_child_path2
        self.words_seq_id = words_seq_id
        self.pos_seq_id = pos_seq_id
        self.deps_seq_id = deps_seq_id
        self.dep_path1_id = dep_path1_id
        self.dep_path2_id = dep_path2_id
        self.ents_seq_id = ents_seq_id
        
        self.size = len(words_seq)
        self.offset = 0
    
    def next_batch(self, batch_size):
        if self.offset+batch_size > self.size:
            self.offset = self.offset+batch_size-self.size
        batch = Data(self.words_seq[self.offset:self.offset+batch_size], 
                     self.pos_seq[self.offset:self.offset+batch_size], 
                     self.deps_seq[self.offset:self.offset+batch_size], 
                     self.ents_seq[self.offset:self.offset+batch_size], 
                     self.indx_path1[self.offset:self.offset+batch_size], 
                     self.indx_path2[self.offset:self.offset+batch_size], 
                     self.dep_path1[self.offset:self.offset+batch_size], 
                     self.dep_path2[self.offset:self.offset+batch_size], 
                     self.childs_path1[self.offset:self.offset+batch_size], 
                     self.childs_path2[self.offset:self.offset+batch_size], 
                     self.rels[self.offset:self.offset+batch_size], 
                     self.rels1[self.offset:self.offset+batch_size], 
                     self.rels2[self.offset:self.offset+batch_size], 
                     self.len_path1[self.offset:self.offset+batch_size], 
                     self.len_path2[self.offset:self.offset+batch_size], 
                     self.num_child_path1[self.offset:self.offset+batch_size], 
                     self.num_child_path2[self.offset:self.offset+batch_size], 
                     self.words_seq_id[self.offset:self.offset+batch_size], 
                     self.pos_seq_id[self.offset:self.offset+batch_size], 
                     self.deps_seq_id[self.offset:self.offset+batch_size], 
                     self.dep_path1_id[self.offset:self.offset+batch_size], 
                     self.dep_path2_id[self.offset:self.offset+batch_size], 
                     self.ents_seq_id[self.offset:self.offset+batch_size])
        self.offset += batch_size
        return batch

In [None]:
def prepare_input(file_path):
    f = open(file_path, 'rb')
    words_seq, pos_seq, deps_seq, ents_seq, indx_path1, indx_path2, dep_path1, dep_path2, childs_path1, childs_path2, rels = pickle.load(f)
    f.close()

    num_data = len(words_seq)

    # replace unknown words
    for i in range(num_data):
        for j, word in enumerate(words_seq[i]):
            if word not in glove.itos:
                words_seq[i][j] = 'UKN'

    # length of LCA paths
    len_path1 = [len(indx_path1[i]) for i in range(num_data)]
    len_path2 = [len(indx_path2[i]) for i in range(num_data)]
    
    # number of children of words in LCA paths
    num_child_path1 = [[len(childs) for childs in childs_path1[i]] for i in range(num_data)]
    num_child_path2 = [[len(childs) for childs in childs_path2[i]] for i in range(num_data)]
    
    # word -> id
    words_seq_id = [[glove.stoi[word] for word in words_seq[i]] for i in range(num_data)]

    # pos -> id
    pos_seq_id = [[pos_dict[pos] for pos in pos_seq[i]] for i in range(num_data)]

    # dep -> id
    deps_seq_id = [[dep_dict[dep] for dep in deps_seq[i]] for i in range(num_data)]
    dep_path1_id = [[dep_dict[dep] for dep in dep_path1[i]] for i in range(num_data)]
    dep_path2_id = [[dep_dict[dep] for dep in dep_path2[i]] for i in range(num_data)]

    # ent -> id
    ents_seq_id = [[ent_dict[ent] for ent in ents_seq[i]] for i in range(num_data)]

    # rels for v15+
    rels1 = []
    rels2 = []
    for i in range(num_data):
        if rels[i] % 2 == 0:
            rels1.append(int(rels[i] / 2))
            rels2.append(9)
        else:
            rels1.append(9)
            rels2.append(int(rels[i] / 2))

    return Data(words_seq, pos_seq, deps_seq, ents_seq, indx_path1, indx_path2, dep_path1, dep_path2, childs_path1, childs_path2, rels, rels1, rels2,
                len_path1, len_path2, num_child_path1, num_child_path2, words_seq_id, pos_seq_id, deps_seq_id, dep_path1_id, dep_path2_id, ents_seq_id)

## BiLSTM+Tree-LSTM

In [None]:
class BiLSTM_TreeLSTM(nn.Module):
    def __init__(self):
        super(BiLSTM_TreeLSTM, self).__init__()
        glove = torchtext.vocab.GloVe(name='6B', dim=200)
        glove.itos.append('UKN')
        glove.stoi['UKN'] = params.num_vocab - 1

        ## EMBEDDING LAYER ##
        self.UKN_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_word))).cuda()
        self.word_embedding = torch.cat([glove.vectors.cuda(), self.UKN_embedding], 0)
        self.pos_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_pos, params.dim_pos)))
        self.dep_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_dep, params.dim_dep)))
        self.ent_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_ent, params.dim_ent)))

        ## SEQUENCE LAYER ##
        self.seq_lstm = nn.LSTM(params.dim_word+params.dim_pos, params.dim_hidden, bidirectional=True)
        self.seq_h0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(2, 1, params.dim_hidden)))
        self.seq_c0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(2, 1, params.dim_hidden)))

        ## ENTITY DETECTION ##
        self.dect_cell = nn.RNNCell(params.dim_hidden*2+params.dim_ent, params.dim_hidden, bias=True, nonlinearity='tanh')
        self.dect_ent0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_ent)))
        self.dect_Wy = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.dim_hidden, params.num_ent)))
        self.dect_by = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.num_ent)))

        ## DEPENDENCY LAYER ##
        self.dep_Wi = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden*2+params.dim_dep+params.dim_ent, params.dim_hidden)))
        self.dep_Ui = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden, params.dim_hidden)))
        self.dep_Uit = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_hidden, params.dim_hidden)))
        self.dep_bi = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))

        self.dep_Wf = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden*2+params.dim_dep+params.dim_ent, params.dim_hidden)))
        self.dep_Uf = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden, params.dim_hidden)))
        self.dep_Uft = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden, params.dim_hidden)))
        self.dep_bf = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))

        self.dep_Wo = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden*2+params.dim_dep+params.dim_ent, params.dim_hidden)))
        self.dep_Uo = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden, params.dim_hidden)))
        self.dep_Uot = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_hidden, params.dim_hidden)))
        self.dep_bo = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))

        self.dep_Wu = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden*2+params.dim_dep+params.dim_ent, params.dim_hidden)))
        self.dep_Uu = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, params.dim_hidden, params.dim_hidden)))
        self.dep_Uut = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_hidden, params.dim_hidden)))
        self.dep_bu = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))

        self.dep_c0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))
        self.dep_h0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(3, 1, params.dim_hidden)))    

        ## RELATION CLASSIFICATION ##
        self.rc = nn.Sequential(
            nn.Linear(params.dim_hidden*5, params.dim_hidden),
            nn.Tanh(),
            nn.Linear(params.dim_hidden, params.num_class)
        )

        # dropout layer
        self.dropout = nn.Dropout(p=params.dropout)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()


    def forward(self, Data):
        # result of entity dectection
        dect_y = []

        # result of dependency layer
        dep_y = torch.tensor([]).cuda()
        dep_y_ = torch.tensor([]).cuda()


        ## EMBEDDING LAYER ##
        word_embd = [F.embedding(torch.tensor(Data.words_seq_id[i]).cuda(), self.word_embedding) for i in range(Data.size)]
        pos_embd = [F.embedding(torch.tensor(Data.pos_seq_id[i]).cuda(), self.pos_embedding) for i in range(Data.size)]
        dep_embd = [F.embedding(torch.tensor(Data.deps_seq_id[i]).cuda(), self.dep_embedding) for i in range(Data.size)]
        dep1_embd = [F.embedding(torch.tensor(Data.dep_path1_id[i]).cuda(), self.dep_embedding) for i in range(Data.size)]
        dep2_embd = [F.embedding(torch.tensor(Data.dep_path2_id[i]).cuda(), self.dep_embedding) for i in range(Data.size)]
        ent_embd = [F.embedding(torch.tensor(Data.ents_seq_id[i]).cuda(), self.ent_embedding) for i in range(Data.size)]
        ent1_embd = [F.embedding(torch.tensor(Data.dep_path1_id[i]).cuda(), self.ent_embedding) for i in range(Data.size)]
        ent2_embd = [F.embedding(torch.tensor(Data.dep_path2_id[i]).cuda(), self.ent_embedding) for i in range(Data.size)]


        for i in range(Data.size):
            seq_x = torch.cat([word_embd[i], pos_embd[i]], 1)
            seq_x = torch.unsqueeze(seq_x, 1)

            ## SEQUENCE LAYER ##
            seq_h, (_, _) = self.seq_lstm(seq_x, (self.seq_h0, self.seq_c0))
            seq_h_ = torch.squeeze(seq_h, 1)


            ## ENTITY DECTECTION ##
            dect_ent = self.dect_ent0
            dect_y_ = torch.tensor([]).cuda()
            dect_y_id = []

            for j in range(len(Data.words_seq[i])):
                dect_xt = torch.cat([seq_h[j], dect_ent], 1)
                dect_ht = self.dect_cell(dect_xt)
                dect_yt = torch.mm(dect_ht, self.dect_Wy) + self.dect_by
                dect_yt_id = torch.argmax(dect_yt)

                dect_y_ = torch.cat([dect_y_, dect_yt], 0)
                dect_y_id.append(dect_yt_id)

                dect_ent = torch.unsqueeze(self.ent_embedding[dect_yt_id], 0)

            dect_y.append(dect_y_)

            dect_ent_embd = F.embedding(torch.tensor(dect_y_id).cuda(), self.ent_embedding)


            ## STACKING LAYER ##
            dep_embd_ = self.dropout(dep_embd[i])
            #ent_embd_ = self.dropout(dect_ent_embd)
            ent_embd_ = self.dropout(ent_embd[i])
            dep_x = torch.unsqueeze(torch.cat([seq_h_, dep_embd_, ent_embd_], 1), 1)


            ## DEPENDENCY LAYER ##
            # bottom-up

            def lstmcell(dep_x, k): # bottom-up
                it = self.sigmoid(torch.mm(dep_x[k], self.dep_Wi[0]) + torch.mm(self.dep_h0[0], self.dep_Ui[0]) + self.dep_bi[0])
                ot = self.sigmoid(torch.mm(dep_x[k], self.dep_Wo[0]) + torch.mm(self.dep_h0[0], self.dep_Uo[0]) + self.dep_bo[0])
                ut = self.tanh(torch.mm(dep_x[k], self.dep_Wu[0]) + torch.mm(self.dep_h0[0], self.dep_Uu[0]) + self.dep_bu[0])
                ft = self.sigmoid(torch.mm(dep_x[k], self.dep_Wf[0]) + torch.mm(self.dep_h0[0], self.dep_Uf[0]) + self.dep_bf[0])
                c = torch.mul(it, ut) + torch.mul(ft, self.dep_c0[0])
                h = torch.mul(ot, self.tanh(c))
                return c, h

            def child_sum_lstm(dep_x, indx, prev_indx1, prev_indx2, childs, dep_ct, dep_ht):
                it = torch.mm(dep_x[indx], self.dep_Wi[0]) + self.dep_bi[0]
                ot = torch.mm(dep_x[indx], self.dep_Wo[0]) + self.dep_bo[0]
                ut = torch.mm(dep_x[indx], self.dep_Wu[0]) + self.dep_bu[0]


                for k in childs:
                    if k==prev_indx1:
                        it += torch.mm(dep_ht[0], self.dep_Ui[0])
                        ot += torch.mm(dep_ht[0], self.dep_Uo[0])
                        ut += torch.mm(dep_ht[0], self.dep_Uu[0])
                    elif k==prev_indx2:
                        it += torch.mm(dep_ht[1], self.dep_Ui[0])
                        ot += torch.mm(dep_ht[1], self.dep_Uo[0])
                        ut += torch.mm(dep_ht[1], self.dep_Uu[0])
                
                it = self.sigmoid(it)
                ot = self.sigmoid(ot)
                ut = self.tanh(ut)
                
                ct = torch.mul(it, ut)

                for k in childs:
                    ftk = torch.mm(dep_x[indx], self.dep_Wf[0]) + self.dep_bf[0]
                    if k==prev_indx1:
                        ftk += torch.mm(dep_ht[0], self.dep_Uf[0])
                        ct += torch.mul(self.sigmoid(ftk), dep_ct[0])
                    elif k==prev_indx2:
                        ftk += (torch.mm(dep_ht[0], self.dep_Uf[0]) + torch.mm(dep_ht[1], self.dep_Uf[0]))
                        ct += torch.mul(self.sigmoid(ftk), dep_ct[1])
                
                ht = torch.mul(ot, self.tanh(ct))
                return ct, ht
            

            dep_ct1 = self.dep_c0[0]
            dep_ht1 = self.dep_h0[0]
            for j in range(Data.len_path1[i]-1):
                dep_ct1, dep_ht1 = child_sum_lstm(dep_x, Data.indx_path1[i][j], Data.indx_path1[i][j-1], -1, 
                                                  Data.childs_path1[i][j], [dep_ct1], [dep_ht1])
            
            dep_ct2 = self.dep_c0[0]
            dep_ht2 = self.dep_h0[0]
            for j in range(Data.len_path2[i]-1):
                dep_ct2, dep_ht2 = child_sum_lstm(dep_x, Data.indx_path2[i][j], Data.indx_path2[i][j-1], -1, 
                                                  Data.childs_path2[i][j], [dep_ct2], [dep_ht2])
            
            if len(Data.indx_path1[i])==1:
                prev_indx1 = -1
            else:
                prev_indx1 = Data.indx_path1[i][-2]
            if len(Data.indx_path2[i])==1:
                prev_indx2 = -1
            else:
                prev_indx2 = Data.indx_path2[i][-2]
            
            dep_c_bu, dep_h_bu = child_sum_lstm(dep_x, Data.indx_path1[i][-1], prev_indx1, prev_indx2, 
                                                Data.childs_path1[i][-1], [dep_ct1, dep_ct2], [dep_ht1, dep_ht2])
            
            tmp1 = torch.unsqueeze(seq_h_[Data.indx_path1[i][0]][:params.dim_hidden] + seq_h_[Data.indx_path1[i][0]][params.dim_hidden:], 0)
            tmp2 = torch.unsqueeze(seq_h_[Data.indx_path2[i][0]][:params.dim_hidden] + seq_h_[Data.indx_path2[i][0]][params.dim_hidden:], 0)

            # top-down
            dep_ct3 = self.dep_c0[1]
            dep_ht3 = self.dep_h0[1]
            for j in range(Data.len_path1[i]-1, -1, -1):
                it = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wi[1]) + torch.mm(dep_ht3, self.dep_Ui[1]) + self.dep_bi[1])
                ft = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wf[1]) + torch.mm(dep_ht3, self.dep_Uf[1]) + self.dep_bf[1])
                ot = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wo[1]) + torch.mm(dep_ht3, self.dep_Uo[1]) + self.dep_bo[1])
                ut = self.tanh(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wu[1]) + torch.mm(dep_ht3, self.dep_Uu[1]) + self.dep_bu[1])
                dep_ct3 = torch.mul(it, ut) + torch.mul(ft, dep_ct3)
                dep_ht3 = torch.mul(ot, self.tanh(dep_ct3))
            
            dep_ct4 = self.dep_c0[2]
            dep_ht4 = self.dep_h0[2]
            for j in range(Data.len_path2[i]-1, -1, -1):
                it = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wi[2]) + torch.mm(dep_ht4, self.dep_Ui[2]) + self.dep_bi[2])
                ft = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wf[2]) + torch.mm(dep_ht4, self.dep_Uf[2]) + self.dep_bf[2])
                ot = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wo[2]) + torch.mm(dep_ht4, self.dep_Uo[2]) + self.dep_bo[2])
                ut = self.tanh(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wu[2]) + torch.mm(dep_ht4, self.dep_Uu[2]) + self.dep_bu[2])
                dep_ct4 = torch.mul(it, ut) + torch.mul(ft, dep_ct4)
                dep_ht4 = torch.mul(ot, self.tanh(dep_ct4))
            
            dep_y = torch.cat([dep_y, torch.cat([dep_h_bu, dep_ht3, dep_ht4, tmp1, tmp2], 1)], 0)

            dep_ct5 = self.dep_c0[1]
            dep_ht5 = self.dep_h0[1]
            for j in range(Data.len_path2[i]-1, -1, -1):
                it = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wi[1]) + torch.mm(dep_ht5, self.dep_Ui[1]) + self.dep_bi[1])
                ft = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wf[1]) + torch.mm(dep_ht5, self.dep_Uf[1]) + self.dep_bf[1])
                ot = self.sigmoid(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wo[1]) + torch.mm(dep_ht5, self.dep_Uo[1]) + self.dep_bo[1])
                ut = self.tanh(torch.mm(dep_x[Data.indx_path2[i][j]], self.dep_Wu[1]) + torch.mm(dep_ht5, self.dep_Uu[1]) + self.dep_bu[1])
                dep_ct5 = torch.mul(it, ut) + torch.mul(ft, dep_ct5)
                dep_ht5 = torch.mul(ot, self.tanh(dep_ct5))
            
            dep_ct6 = self.dep_h0[2]
            dep_ht6 = self.dep_h0[2]
            for j in range(Data.len_path1[i]-1, -1, -1):
                it = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wi[2]) + torch.mm(dep_ht6, self.dep_Ui[2]) + self.dep_bi[2])
                ft = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wf[2]) + torch.mm(dep_ht6, self.dep_Uf[2]) + self.dep_bf[2])
                ot = self.sigmoid(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wo[2]) + torch.mm(dep_ht6, self.dep_Uo[2]) + self.dep_bo[2])
                ut = self.tanh(torch.mm(dep_x[Data.indx_path1[i][j]], self.dep_Wu[2]) + torch.mm(dep_ht6, self.dep_Uu[2]) + self.dep_bu[2])
                dep_ct6 = torch.mul(it, ut) + torch.mul(ft, dep_ct6)
                dep_ht6 = torch.mul(ot, self.tanh(dep_ct6))
            
            dep_y_ = torch.cat([dep_y_, torch.cat([dep_h_bu, dep_ht5, dep_ht6, tmp2, tmp1], 1)], 0)
        

        ## RELATION CLASSIFICATION ##
        rc_y = self.rc(dep_y)
        rc_y_ = self.rc(dep_y_)

        return dect_y, rc_y, rc_y_

## BiLSTM

In [None]:
class BiLSTM_TreeLSTM(nn.Module):
    def __init__(self):
        super(BiLSTM_TreeLSTM, self).__init__()
        glove = torchtext.vocab.GloVe(name='6B', dim=200)
        glove.itos.append('UKN')
        glove.stoi['UKN'] = params.num_vocab - 1

        ## EMBEDDING LAYER ##
        self.UKN_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(1, params.dim_word))).cuda()
        self.word_embedding = torch.cat([glove.vectors.cuda(), self.UKN_embedding], 0)
        self.pos_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_pos, params.dim_pos)))
        self.dep_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_dep, params.dim_dep)))
        self.ent_embedding = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(params.num_ent, params.dim_ent)))

        ## SEQUENCE LAYER ##
        self.seq_lstm = nn.LSTM(params.dim_word+params.dim_pos+params.dim_ent+params.dim_dep, params.dim_hidden, bidirectional=True)
        self.seq_h0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(2, 1, params.dim_hidden)))
        self.seq_c0 = nn.Parameter(nn.init.xavier_normal_(torch.Tensor(2, 1, params.dim_hidden)))

        ## RELATION CLASSIFICATION ##
        self.rc = nn.Sequential(
            nn.Linear(params.dim_hidden, params.dim_hidden),
            nn.Tanh(),
            nn.Linear(params.dim_hidden, params.num_class)
        )

        # dropout layer
        self.dropout = nn.Dropout(p=params.dropout)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()


    def forward(self, Data):
        ## EMBEDDING LAYER ##
        word_embd = [F.embedding(torch.tensor(Data.words_seq_id[i]).cuda(), self.word_embedding) for i in range(Data.size)]
        pos_embd = [F.embedding(torch.tensor(Data.pos_seq_id[i]).cuda(), self.pos_embedding) for i in range(Data.size)]
        dep_embd = [F.embedding(torch.tensor(Data.deps_seq_id[i]).cuda(), self.dep_embedding) for i in range(Data.size)]
        ent_embd = [F.embedding(torch.tensor(Data.ents_seq_id[i]).cuda(), self.ent_embedding) for i in range(Data.size)]


        for i in range(Data.size):
            seq_x = torch.cat([word_embd[i], pos_embd[i], ent_embd[i], dep_embd[i]], 1)
            seq_x = torch.unsqueeze(seq_x, 1)

            ## SEQUENCE LAYER ##
            seq_h, (_, _) = self.seq_lstm(seq_x, (self.seq_h0, self.seq_c0))
            seq_h_ = torch.squeeze(seq_h, 1)
            h, _ = torch.max(seq_h[:, :, 0:params.dim_hidden]+seq_h[:, :, params.dim_hidden:], 0)
            h = torch.nn.functional.dropout(h, 0.3)

        

        ## RELATION CLASSIFICATION ##
        rc_y = self.rc(dep_y)
        rc_y_ = self.rc(dep_y_)

        return dect_y, rc_y, rc_y_

## Other

In [None]:
# calculate weight
rels = []
ents = []

f = open('data/train_path', 'rb')
_, _, _, ents_seq1, _, _, _, _, _, _, rels1 = pickle.load(f)
f.close()
rels.extend(rels1)
for ent in ents_seq1:
    ents.extend(ent)

f = open('data/test_path', 'rb')
_, _, _, ents_seq2, _, _, _, _, _, _, rels2 = pickle.load(f)
f.close()
rels.extend(rels2)
for ent in ents_seq2:
    ents.extend(ent)

for i in range(10717):
    rels[i] = int(rels[i] / 2)

rel_freq = pd.value_counts(rels).sort_index().to_list()
rel_weight = [1e3 / rel_freq[i] for i in range(params.num_class)]
ent_freq = pd.value_counts(ents).sort_index().to_list()
ent_weight = [1e3 / ent_freq[i] for i in range(params.num_ent)]

In [None]:
criterion1 = nn.CrossEntropyLoss(weight=torch.tensor(ent_weight, dtype=torch.float))
criterion2 = nn.CrossEntropyLoss(weight=torch.tensor(rel_weight, dtype=torch.float))

In [None]:
#model = BiLSTM_TreeLSTM()
#model = model.cuda()
#optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=False)
#scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
#criterion1 = nn.CrossEntropyLoss(weight=torch.tensor(ent_weight, dtype=torch.float))
#criterion2 = nn.CrossEntropyLoss(weight=torch.tensor(rel_weight, dtype=torch.float))
#start_epoch = 1

In [None]:
# load model
#checkpoint = torch.load('Model/treelstm_v15.pth')
#model.load_state_dict(checkpoint['net'])
#model = model.cuda()
#optimizer.load_state_dict(checkpoint['optimizer'])
#start_epoch = checkpoint['epoch'] + 1

# Train

In [None]:
train_data = prepare_input('data/train_path')
test_data = prepare_input('data/test_path')

In [None]:
def eval(epoch):
    model.eval()
    loss_dect = 0
    loss_dep = 0
    count = 0
    cond_mat = torch.zeros(19, 19)
    for _ in range(90):
        batch_data = test_data.next_batch(30)
        dect_y, rc_y, rc_y_= model(batch_data)

        torch.cat([rc_y, rc_y_], 1)
        
        for i in range(batch_data.size):
            loss_dect = loss_dep + criterion1(dect_y[i].cpu(), torch.tensor(batch_data.ents_seq_id[i])).item()
    
        loss_dep = loss_dep + criterion2(rc_y.cpu(), torch.tensor(batch_data.rels1)).item() + criterion2(rc_y_.cpu(), torch.tensor(batch_data.rels2)).item()

        for i in range(batch_data.size):
            if torch.argmax(rc_y[i])==9 and torch.argmax(rc_y_[i])==9:
                rel_pred = 18
            else:
                rel1 = torch.max(rc_y[i][0:9])
                rel2 = torch.max(rc_y_[i][0:9])
                if rel1 > rel2:
                    rel_pred = 2*torch.argmax(rc_y[i][0:9]).item()
                else:
                    rel_pred = 2*torch.argmax(rc_y_[i][0:9]).item() + 1

            cond_mat[rel_pred, batch_data.rels[i]] += 1
            if int(rel_pred/2)==int(batch_data.rels[i]/2):
                count += 1
    
    TP = (torch.tensor([cond_mat[i, i] for i in range(19)]))
    FP = torch.sum(cond_mat, 1) - TP
    FN = torch.sum(cond_mat, 0) - TP
    P = TP / (TP + FP + 1e-5)
    R = TP / (TP + FN + 1e-5)
    tmp = 2 / (1/P+1/R)
    P_ = torch.mean(torch.cat([P[0:5], P[6:18]], 0))
    R_ = torch.mean(torch.cat([R[0:5], R[6:18]], 0))
    f1 = torch.mean(torch.cat([tmp[0:5], tmp[6:18]], 0))
        
    print("epoch: %d, accuracy:  %2.2f%%, loss_dect: %2.4f, loss_dep: %2.4f, loss_total: %2.4f, R: %2.2f%%, P: %2.2f%%, f1_macro: %2.2f%%" % 
          (epoch, torch.trace(cond_mat)/27, loss_dect/90, loss_dep/90, (loss_dect+loss_dep)/90, R_*100, P_*100, f1*100))
    time.sleep(0.1)

In [None]:
def train():
    model.train()
    batch_data = train_data.next_batch(params.batch_size)
    
    optimizer.zero_grad()

    dect_y, rc_y, rc_y_= model(batch_data)

    loss_dect = 0
    for i in range(batch_data.size):
        loss_dect = loss_dect + criterion1(dect_y[i].cpu(), torch.tensor(batch_data.ents_seq_id[i]))
    
    loss_dep = criterion2(rc_y.cpu(), torch.tensor(batch_data.rels1)) + criterion2(rc_y_.cpu(), torch.tensor(batch_data.rels2))
    
    loss = loss_dect * params.alpha + loss_dep
    
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=params.max_norm, norm_type=2)
    optimizer.step()

In [None]:
def begin(ver):
    start = time.clock()
    for epoch in range(start_epoch, start_epoch+params.total_epoch):
        try:
            with tqdm(range(400)) as t:
                for step in t:
                    train()
                    if epoch>2 and (step+1)%100==0:
                        eval(epoch)
        except KeyboardInterrupt:
            t.close()
            raise
        state = {'net': model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
        torch.save(state, 'Model/treelstm'+ver+'.pth')
    end = time.clock()
    print("training time: ", end-start, " s")

# Begin

In [None]:
# default
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 1
params.total_epoch = 6
params.max_norm = 10

## dropout

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.0 ##
params.alpha = 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p00')

 25%|██▌       | 100/400 [02:25<1:17:58, 15.60s/it]

epoch: 1, accuracy:  28.33%, loss_dect: 2.7800, loss_dep: 2.7713, loss_total: 5.5514, f1_macro: 14.61%


 50%|█████     | 200/400 [04:50<52:41, 15.81s/it]

epoch: 1, accuracy:  52.56%, loss_dect: 1.9183, loss_dep: 1.9104, loss_total: 3.8287, f1_macro: 48.99%


 75%|███████▌  | 300/400 [07:19<26:34, 15.95s/it]

epoch: 1, accuracy:  60.26%, loss_dect: 1.5459, loss_dep: 1.5373, loss_total: 3.0833, f1_macro: 58.77%


100%|██████████| 400/400 [09:45<00:00,  1.46s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  52.41%, loss_dect: 1.8267, loss_dep: 1.7991, loss_total: 3.6258, f1_macro: 52.03%


 25%|██▌       | 100/400 [02:26<1:19:04, 15.82s/it]

epoch: 2, accuracy:  65.78%, loss_dect: 1.3179, loss_dep: 1.3323, loss_total: 2.6502, f1_macro: 66.02%


 50%|█████     | 200/400 [04:52<52:45, 15.83s/it]

epoch: 2, accuracy:  67.81%, loss_dect: 1.2094, loss_dep: 1.2224, loss_total: 2.4318, f1_macro: 69.61%


 75%|███████▌  | 300/400 [07:20<26:36, 15.96s/it]

epoch: 2, accuracy:  69.52%, loss_dect: 1.1970, loss_dep: 1.2005, loss_total: 2.3975, f1_macro: 71.02%


100%|██████████| 400/400 [09:45<00:00,  1.46s/it]

epoch: 2, accuracy:  59.37%, loss_dect: 1.5738, loss_dep: 1.5937, loss_total: 3.1675, f1_macro: 61.37%



 25%|██▌       | 100/400 [02:25<1:18:03, 15.61s/it]

epoch: 3, accuracy:  70.48%, loss_dect: 1.1297, loss_dep: 1.1402, loss_total: 2.2699, f1_macro: 72.12%


 50%|█████     | 200/400 [04:53<55:32, 16.66s/it]

epoch: 3, accuracy:  71.00%, loss_dect: 1.1166, loss_dep: 1.1286, loss_total: 2.2451, f1_macro: 73.04%


 75%|███████▌  | 300/400 [07:22<26:46, 16.06s/it]

epoch: 3, accuracy:  71.48%, loss_dect: 1.1193, loss_dep: 1.1309, loss_total: 2.2502, f1_macro: 73.44%


100%|██████████| 400/400 [09:49<00:00,  1.47s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  62.67%, loss_dect: 1.5432, loss_dep: 1.5550, loss_total: 3.0982, f1_macro: 64.30%


 25%|██▌       | 100/400 [02:26<1:19:04, 15.82s/it]

epoch: 4, accuracy:  72.33%, loss_dect: 1.0986, loss_dep: 1.1047, loss_total: 2.2033, f1_macro: 74.17%


 50%|█████     | 200/400 [04:48<51:30, 15.45s/it]

epoch: 4, accuracy:  72.26%, loss_dect: 1.0963, loss_dep: 1.1073, loss_total: 2.2036, f1_macro: 73.69%


 75%|███████▌  | 300/400 [07:11<25:46, 15.46s/it]

epoch: 4, accuracy:  71.37%, loss_dect: 1.1626, loss_dep: 1.1799, loss_total: 2.3425, f1_macro: 72.52%


100%|██████████| 400/400 [09:33<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  65.44%, loss_dect: 1.4739, loss_dep: 1.4900, loss_total: 2.9638, f1_macro: 66.79%


 25%|██▌       | 100/400 [02:23<1:17:45, 15.55s/it]

epoch: 5, accuracy:  72.56%, loss_dect: 1.1282, loss_dep: 1.1210, loss_total: 2.2492, f1_macro: 75.00%


 50%|█████     | 200/400 [04:45<51:30, 15.45s/it]

epoch: 5, accuracy:  73.00%, loss_dect: 1.1329, loss_dep: 1.1432, loss_total: 2.2761, f1_macro: 74.25%


 75%|███████▌  | 300/400 [07:12<26:58, 16.19s/it]

epoch: 5, accuracy:  70.93%, loss_dect: 1.2763, loss_dep: 1.2943, loss_total: 2.5706, f1_macro: 72.23%


100%|██████████| 400/400 [09:41<00:00,  1.45s/it]

epoch: 5, accuracy:  68.07%, loss_dect: 1.3887, loss_dep: 1.4094, loss_total: 2.7980, f1_macro: 69.40%
training time:  2941.8748619999997  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.1 ##
params.alpha = 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p01')

 25%|██▌       | 100/400 [02:27<1:18:09, 15.63s/it]

epoch: 1, accuracy:  31.07%, loss_dect: 2.7719, loss_dep: 2.8002, loss_total: 5.5721, f1_macro: 17.99%


 50%|█████     | 200/400 [04:50<50:57, 15.29s/it]

epoch: 1, accuracy:  54.15%, loss_dect: 1.8125, loss_dep: 1.8334, loss_total: 3.6459, f1_macro: 52.74%


 75%|███████▌  | 300/400 [07:11<25:19, 15.19s/it]

epoch: 1, accuracy:  59.78%, loss_dect: 1.5436, loss_dep: 1.5636, loss_total: 3.1071, f1_macro: 58.93%


100%|██████████| 400/400 [09:31<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  61.96%, loss_dect: 1.5660, loss_dep: 1.5831, loss_total: 3.1490, f1_macro: 63.12%


 25%|██▌       | 100/400 [02:20<1:16:38, 15.33s/it]

epoch: 2, accuracy:  65.44%, loss_dect: 1.2915, loss_dep: 1.2828, loss_total: 2.5743, f1_macro: 66.38%


 50%|█████     | 200/400 [04:39<50:18, 15.09s/it]

epoch: 2, accuracy:  66.37%, loss_dect: 1.2456, loss_dep: 1.2585, loss_total: 2.5041, f1_macro: 66.52%


 75%|███████▌  | 300/400 [06:59<25:09, 15.10s/it]

epoch: 2, accuracy:  68.33%, loss_dect: 1.1929, loss_dep: 1.2083, loss_total: 2.4012, f1_macro: 70.81%


100%|██████████| 400/400 [09:19<00:00,  1.40s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  68.93%, loss_dect: 1.2264, loss_dep: 1.2408, loss_total: 2.4672, f1_macro: 70.71%


 25%|██▌       | 100/400 [02:19<1:15:09, 15.03s/it]

epoch: 3, accuracy:  70.63%, loss_dect: 1.0856, loss_dep: 1.0917, loss_total: 2.1773, f1_macro: 72.47%


 50%|█████     | 200/400 [04:38<50:21, 15.11s/it]

epoch: 3, accuracy:  70.59%, loss_dect: 1.1027, loss_dep: 1.1119, loss_total: 2.2147, f1_macro: 72.33%


 75%|███████▌  | 300/400 [06:58<25:16, 15.16s/it]

epoch: 3, accuracy:  69.41%, loss_dect: 1.1775, loss_dep: 1.1877, loss_total: 2.3652, f1_macro: 72.30%


100%|██████████| 400/400 [09:18<00:00,  1.40s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  67.56%, loss_dect: 1.3747, loss_dep: 1.3206, loss_total: 2.6953, f1_macro: 68.00%


 25%|██▌       | 100/400 [02:19<1:15:08, 15.03s/it]

epoch: 4, accuracy:  71.74%, loss_dect: 1.0796, loss_dep: 1.0890, loss_total: 2.1686, f1_macro: 73.90%


 50%|█████     | 200/400 [04:39<50:45, 15.23s/it]

epoch: 4, accuracy:  71.30%, loss_dect: 1.0961, loss_dep: 1.1088, loss_total: 2.2050, f1_macro: 73.32%


 75%|███████▌  | 300/400 [06:59<25:07, 15.08s/it]

epoch: 4, accuracy:  69.67%, loss_dect: 1.2034, loss_dep: 1.2192, loss_total: 2.4225, f1_macro: 72.10%


100%|██████████| 400/400 [09:23<00:00,  1.41s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  65.81%, loss_dect: 1.5191, loss_dep: 1.5318, loss_total: 3.0510, f1_macro: 65.57%


 25%|██▌       | 100/400 [02:22<1:17:15, 15.45s/it]

epoch: 5, accuracy:  71.48%, loss_dect: 1.1212, loss_dep: 1.1337, loss_total: 2.2549, f1_macro: 72.97%


 50%|█████     | 200/400 [04:45<51:32, 15.46s/it]

epoch: 5, accuracy:  70.78%, loss_dect: 1.1867, loss_dep: 1.1948, loss_total: 2.3815, f1_macro: 72.85%


 75%|███████▌  | 300/400 [07:10<26:03, 15.64s/it]

epoch: 5, accuracy:  70.59%, loss_dect: 1.2512, loss_dep: 1.2611, loss_total: 2.5123, f1_macro: 72.19%


100%|██████████| 400/400 [09:32<00:00,  1.43s/it]

epoch: 5, accuracy:  66.70%, loss_dect: 1.5992, loss_dep: 1.5950, loss_total: 3.1942, f1_macro: 67.41%
training time:  2851.4188779999986  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.2 ## 0.3
params.alpha = 1
params.total_epoch = 6
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p02')

 25%|██▌       | 100/400 [02:22<1:16:23, 15.28s/it]

epoch: 1, accuracy:  36.56%, loss_dect: 2.7099, loss_dep: 2.7391, loss_total: 5.4489, f1_macro: 25.43%


 50%|█████     | 200/400 [04:44<51:25, 15.43s/it]

epoch: 1, accuracy:  51.00%, loss_dect: 1.8637, loss_dep: 1.8892, loss_total: 3.7529, f1_macro: 49.38%


 75%|███████▌  | 300/400 [07:09<26:09, 15.69s/it]

epoch: 1, accuracy:  58.70%, loss_dect: 1.5880, loss_dep: 1.6048, loss_total: 3.1928, f1_macro: 59.57%


100%|██████████| 400/400 [09:36<00:00,  1.44s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  60.85%, loss_dect: 1.5154, loss_dep: 1.5297, loss_total: 3.0451, f1_macro: 62.04%


 25%|██▌       | 100/400 [02:29<1:20:36, 16.12s/it]

epoch: 2, accuracy:  63.33%, loss_dect: 1.3941, loss_dep: 1.4049, loss_total: 2.7990, f1_macro: 63.62%


 50%|█████     | 200/400 [04:57<54:24, 16.32s/it]

epoch: 2, accuracy:  67.30%, loss_dect: 1.2684, loss_dep: 1.2780, loss_total: 2.5463, f1_macro: 67.69%


 75%|███████▌  | 300/400 [07:28<26:52, 16.13s/it]

epoch: 2, accuracy:  66.93%, loss_dect: 1.2603, loss_dep: 1.2732, loss_total: 2.5335, f1_macro: 68.91%


100%|██████████| 400/400 [09:55<00:00,  1.49s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  66.93%, loss_dect: 1.2660, loss_dep: 1.2783, loss_total: 2.5443, f1_macro: 68.76%


 25%|██▌       | 100/400 [02:25<1:18:50, 15.77s/it]

epoch: 3, accuracy:  69.19%, loss_dect: 1.1431, loss_dep: 1.1574, loss_total: 2.3005, f1_macro: 71.07%


 50%|█████     | 200/400 [04:49<52:29, 15.75s/it]

epoch: 3, accuracy:  70.85%, loss_dect: 1.1235, loss_dep: 1.1324, loss_total: 2.2559, f1_macro: 72.37%


 75%|███████▌  | 300/400 [07:17<26:32, 15.93s/it]

epoch: 3, accuracy:  68.96%, loss_dect: 1.1814, loss_dep: 1.1920, loss_total: 2.3734, f1_macro: 70.63%


100%|██████████| 400/400 [09:44<00:00,  1.46s/it]

epoch: 3, accuracy:  69.63%, loss_dect: 1.1927, loss_dep: 1.2060, loss_total: 2.3987, f1_macro: 71.12%



 25%|██▌       | 100/400 [02:27<1:19:47, 15.96s/it]

epoch: 4, accuracy:  70.85%, loss_dect: 1.0856, loss_dep: 1.1008, loss_total: 2.1864, f1_macro: 72.74%


 50%|█████     | 200/400 [04:54<53:14, 15.97s/it]

epoch: 4, accuracy:  72.44%, loss_dect: 1.1129, loss_dep: 1.1259, loss_total: 2.2388, f1_macro: 74.11%


 75%|███████▌  | 300/400 [07:23<26:45, 16.06s/it]

epoch: 4, accuracy:  70.81%, loss_dect: 1.1383, loss_dep: 1.1505, loss_total: 2.2889, f1_macro: 73.02%


100%|██████████| 400/400 [09:51<00:00,  1.48s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  70.81%, loss_dect: 1.2107, loss_dep: 1.2224, loss_total: 2.4330, f1_macro: 72.36%


 25%|██▌       | 100/400 [02:28<1:19:50, 15.97s/it]

epoch: 5, accuracy:  71.22%, loss_dect: 1.1277, loss_dep: 1.1328, loss_total: 2.2604, f1_macro: 72.64%


 50%|█████     | 200/400 [04:55<52:38, 15.79s/it]

epoch: 5, accuracy:  72.19%, loss_dect: 1.2000, loss_dep: 1.2090, loss_total: 2.4090, f1_macro: 74.93%


 75%|███████▌  | 300/400 [07:22<26:30, 15.90s/it]

epoch: 5, accuracy:  72.37%, loss_dect: 1.1644, loss_dep: 1.1607, loss_total: 2.3251, f1_macro: 74.37%


100%|██████████| 400/400 [09:49<00:00,  1.47s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 5, accuracy:  71.56%, loss_dect: 1.2956, loss_dep: 1.3063, loss_total: 2.6020, f1_macro: 72.75%


 25%|██▌       | 100/400 [02:27<1:19:31, 15.91s/it]

epoch: 6, accuracy:  70.67%, loss_dect: 1.2443, loss_dep: 1.1952, loss_total: 2.4395, f1_macro: 73.12%


 50%|█████     | 200/400 [04:53<52:50, 15.85s/it]

epoch: 6, accuracy:  70.74%, loss_dect: 1.3510, loss_dep: 1.3678, loss_total: 2.7188, f1_macro: 72.42%


 75%|███████▌  | 300/400 [07:21<26:24, 15.84s/it]

epoch: 6, accuracy:  70.33%, loss_dect: 1.3826, loss_dep: 1.3712, loss_total: 2.7539, f1_macro: 72.01%


100%|██████████| 400/400 [09:48<00:00,  1.47s/it]

epoch: 6, accuracy:  70.52%, loss_dect: 1.3638, loss_dep: 1.3856, loss_total: 2.7493, f1_macro: 71.27%
training time:  3556.619482  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3 ## 0.3
params.alpha = 1
params.total_epoch = 6
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p03')

 25%|██▌       | 100/400 [02:27<1:19:01, 15.81s/it]

epoch: 1, accuracy:  31.81%, loss_dect: 2.7439, loss_dep: 2.7697, loss_total: 5.5137, f1_macro: 18.65%


 50%|█████     | 200/400 [04:54<52:51, 15.86s/it]

epoch: 1, accuracy:  51.85%, loss_dect: 1.8573, loss_dep: 1.8778, loss_total: 3.7352, f1_macro: 49.65%


 75%|███████▌  | 300/400 [07:26<28:12, 16.93s/it]

epoch: 1, accuracy:  56.89%, loss_dect: 1.6477, loss_dep: 1.6642, loss_total: 3.3119, f1_macro: 54.45%


100%|██████████| 400/400 [10:04<00:00,  1.51s/it]

epoch: 1, accuracy:  57.81%, loss_dect: 1.5403, loss_dep: 1.5571, loss_total: 3.0975, f1_macro: 57.66%



 25%|██▌       | 100/400 [02:33<1:21:34, 16.31s/it]

epoch: 2, accuracy:  60.56%, loss_dect: 1.4222, loss_dep: 1.4373, loss_total: 2.8595, f1_macro: 62.93%


 50%|█████     | 200/400 [05:02<53:49, 16.15s/it]

epoch: 2, accuracy:  67.07%, loss_dect: 1.2318, loss_dep: 1.2422, loss_total: 2.4740, f1_macro: 66.37%


 75%|███████▌  | 300/400 [07:33<27:00, 16.21s/it]

epoch: 2, accuracy:  66.56%, loss_dect: 1.2414, loss_dep: 1.2463, loss_total: 2.4877, f1_macro: 67.57%


100%|██████████| 400/400 [10:02<00:00,  1.51s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  66.48%, loss_dect: 1.2268, loss_dep: 1.2361, loss_total: 2.4630, f1_macro: 67.94%


 25%|██▌       | 100/400 [02:28<1:20:53, 16.18s/it]

epoch: 3, accuracy:  68.63%, loss_dect: 1.1550, loss_dep: 1.1676, loss_total: 2.3226, f1_macro: 70.69%


 50%|█████     | 200/400 [05:00<54:33, 16.37s/it]

epoch: 3, accuracy:  70.44%, loss_dect: 1.1116, loss_dep: 1.1225, loss_total: 2.2341, f1_macro: 72.05%


 75%|███████▌  | 300/400 [07:30<27:06, 16.27s/it]

epoch: 3, accuracy:  70.00%, loss_dect: 1.1414, loss_dep: 1.1473, loss_total: 2.2887, f1_macro: 72.11%


100%|██████████| 400/400 [10:01<00:00,  1.50s/it]

epoch: 3, accuracy:  68.67%, loss_dect: 1.1823, loss_dep: 1.1912, loss_total: 2.3734, f1_macro: 70.62%



 25%|██▌       | 100/400 [02:29<1:20:46, 16.15s/it]

epoch: 4, accuracy:  69.63%, loss_dect: 1.1178, loss_dep: 1.1297, loss_total: 2.2475, f1_macro: 71.95%


 50%|█████     | 200/400 [04:58<54:00, 16.20s/it]

epoch: 4, accuracy:  71.44%, loss_dect: 1.1473, loss_dep: 1.1144, loss_total: 2.2617, f1_macro: 74.88%


 75%|███████▌  | 300/400 [07:26<26:24, 15.84s/it]

epoch: 4, accuracy:  71.33%, loss_dect: 1.1429, loss_dep: 1.1515, loss_total: 2.2944, f1_macro: 73.98%


100%|██████████| 400/400 [09:52<00:00,  1.48s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  69.74%, loss_dect: 1.1846, loss_dep: 1.1942, loss_total: 2.3788, f1_macro: 72.07%


 25%|██▌       | 100/400 [02:26<1:18:51, 15.77s/it]

epoch: 5, accuracy:  70.04%, loss_dect: 1.1979, loss_dep: 1.1916, loss_total: 2.3895, f1_macro: 72.23%


 50%|█████     | 200/400 [04:53<53:19, 16.00s/it]

epoch: 5, accuracy:  72.59%, loss_dect: 1.1421, loss_dep: 1.1488, loss_total: 2.2909, f1_macro: 75.49%


 75%|███████▌  | 300/400 [07:22<26:46, 16.07s/it]

epoch: 5, accuracy:  72.52%, loss_dect: 1.1380, loss_dep: 1.1436, loss_total: 2.2816, f1_macro: 74.48%


100%|██████████| 400/400 [09:51<00:00,  1.48s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 5, accuracy:  71.00%, loss_dect: 1.2421, loss_dep: 1.2510, loss_total: 2.4930, f1_macro: 72.88%


 25%|██▌       | 100/400 [02:30<1:20:58, 16.19s/it]

epoch: 6, accuracy:  70.00%, loss_dect: 1.2404, loss_dep: 1.2614, loss_total: 2.5018, f1_macro: 72.39%


 50%|█████     | 200/400 [04:59<54:16, 16.28s/it]

epoch: 6, accuracy:  72.00%, loss_dect: 1.1994, loss_dep: 1.2158, loss_total: 2.4152, f1_macro: 74.40%


 75%|███████▌  | 300/400 [07:30<26:57, 16.17s/it]

epoch: 6, accuracy:  72.15%, loss_dect: 1.3054, loss_dep: 1.2715, loss_total: 2.5770, f1_macro: 74.16%


100%|██████████| 400/400 [10:00<00:00,  1.50s/it]

epoch: 6, accuracy:  70.78%, loss_dect: 1.2972, loss_dep: 1.3131, loss_total: 2.6103, f1_macro: 73.27%
training time:  3624.424258000001  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.4 ## 0.3
params.alpha = 1
params.total_epoch = 6
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p04')

 25%|██▌       | 100/400 [02:30<1:20:46, 16.15s/it]

epoch: 1, accuracy:  30.44%, loss_dect: 2.6205, loss_dep: 2.6470, loss_total: 5.2675, f1_macro: 16.14%


 50%|█████     | 200/400 [04:59<54:05, 16.23s/it]

epoch: 1, accuracy:  55.00%, loss_dect: 1.7930, loss_dep: 1.7831, loss_total: 3.5760, f1_macro: 54.28%


 75%|███████▌  | 300/400 [07:30<27:08, 16.29s/it]

epoch: 1, accuracy:  57.56%, loss_dect: 1.5649, loss_dep: 1.5651, loss_total: 3.1300, f1_macro: 57.89%


100%|██████████| 400/400 [10:00<00:00,  1.50s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  56.93%, loss_dect: 1.5843, loss_dep: 1.5848, loss_total: 3.1691, f1_macro: 55.46%


 25%|██▌       | 100/400 [02:30<1:21:05, 16.22s/it]

epoch: 2, accuracy:  62.22%, loss_dect: 1.5022, loss_dep: 1.4806, loss_total: 2.9828, f1_macro: 63.10%


 50%|█████     | 200/400 [05:00<54:05, 16.23s/it]

epoch: 2, accuracy:  65.81%, loss_dect: 1.2828, loss_dep: 1.2741, loss_total: 2.5570, f1_macro: 66.97%


 75%|███████▌  | 300/400 [07:32<27:12, 16.33s/it]

epoch: 2, accuracy:  65.96%, loss_dect: 1.2887, loss_dep: 1.3021, loss_total: 2.5908, f1_macro: 66.61%


100%|██████████| 400/400 [10:01<00:00,  1.50s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  65.59%, loss_dect: 1.2461, loss_dep: 1.2670, loss_total: 2.5131, f1_macro: 66.63%


 25%|██▌       | 100/400 [02:30<1:21:22, 16.28s/it]

epoch: 3, accuracy:  66.04%, loss_dect: 1.2991, loss_dep: 1.3219, loss_total: 2.6210, f1_macro: 66.97%


 50%|█████     | 200/400 [05:02<56:06, 16.83s/it]

epoch: 3, accuracy:  69.74%, loss_dect: 1.1452, loss_dep: 1.1594, loss_total: 2.3047, f1_macro: 70.78%


 75%|███████▌  | 300/400 [07:43<29:06, 17.46s/it]

epoch: 3, accuracy:  68.89%, loss_dect: 1.2122, loss_dep: 1.2228, loss_total: 2.4350, f1_macro: 69.80%


100%|██████████| 400/400 [10:24<00:00,  1.56s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  67.93%, loss_dect: 1.2090, loss_dep: 1.2071, loss_total: 2.4161, f1_macro: 69.59%


 25%|██▌       | 100/400 [02:39<1:26:20, 17.27s/it]

epoch: 4, accuracy:  68.93%, loss_dect: 1.1981, loss_dep: 1.2096, loss_total: 2.4077, f1_macro: 70.33%


 50%|█████     | 200/400 [05:17<57:17, 17.19s/it]

epoch: 4, accuracy:  71.81%, loss_dect: 1.1180, loss_dep: 1.1288, loss_total: 2.2468, f1_macro: 73.49%


 75%|███████▌  | 300/400 [07:56<28:37, 17.17s/it]

epoch: 4, accuracy:  70.22%, loss_dect: 1.1756, loss_dep: 1.1901, loss_total: 2.3657, f1_macro: 72.13%


100%|██████████| 400/400 [10:29<00:00,  1.57s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  70.04%, loss_dect: 1.1852, loss_dep: 1.2041, loss_total: 2.3894, f1_macro: 71.67%


 25%|██▌       | 100/400 [02:26<1:21:21, 16.27s/it]

epoch: 5, accuracy:  70.56%, loss_dect: 1.1737, loss_dep: 1.1841, loss_total: 2.3578, f1_macro: 72.38%


 50%|█████     | 200/400 [04:57<54:12, 16.26s/it]

epoch: 5, accuracy:  72.15%, loss_dect: 1.1650, loss_dep: 1.1738, loss_total: 2.3388, f1_macro: 73.75%


 75%|███████▌  | 300/400 [07:29<27:12, 16.33s/it]

epoch: 5, accuracy:  70.78%, loss_dect: 1.2105, loss_dep: 1.2217, loss_total: 2.4323, f1_macro: 72.17%


100%|██████████| 400/400 [09:59<00:00,  1.50s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 5, accuracy:  68.70%, loss_dect: 1.3245, loss_dep: 1.3379, loss_total: 2.6625, f1_macro: 70.89%


 25%|██▌       | 100/400 [02:30<1:21:45, 16.35s/it]

epoch: 6, accuracy:  70.93%, loss_dect: 1.2379, loss_dep: 1.2558, loss_total: 2.4937, f1_macro: 72.25%


 50%|█████     | 200/400 [05:01<55:11, 16.56s/it]

epoch: 6, accuracy:  72.41%, loss_dect: 1.2116, loss_dep: 1.2336, loss_total: 2.4453, f1_macro: 74.54%


 75%|███████▌  | 300/400 [07:35<27:29, 16.49s/it]

epoch: 6, accuracy:  70.33%, loss_dect: 1.2888, loss_dep: 1.3131, loss_total: 2.6020, f1_macro: 71.66%


100%|██████████| 400/400 [10:06<00:00,  1.52s/it]

epoch: 6, accuracy:  68.44%, loss_dect: 1.4745, loss_dep: 1.4705, loss_total: 2.9450, f1_macro: 71.14%
training time:  3695.366894999999  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.5 ## 0.3
params.alpha = 1
params.total_epoch = 6
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p05')

 25%|██▌       | 100/400 [02:33<1:22:51, 16.57s/it]

epoch: 1, accuracy:  30.93%, loss_dect: 2.7029, loss_dep: 2.7314, loss_total: 5.4343, f1_macro: 17.06%


 50%|█████     | 200/400 [05:05<55:08, 16.54s/it]

epoch: 1, accuracy:  51.74%, loss_dect: 1.8595, loss_dep: 1.8804, loss_total: 3.7399, f1_macro: 49.78%


 75%|███████▌  | 300/400 [07:38<27:58, 16.79s/it]

epoch: 1, accuracy:  60.15%, loss_dect: 1.5897, loss_dep: 1.5852, loss_total: 3.1750, f1_macro: 60.55%


100%|██████████| 400/400 [10:10<00:00,  1.53s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  58.59%, loss_dect: 1.5802, loss_dep: 1.5689, loss_total: 3.1491, f1_macro: 58.09%


 25%|██▌       | 100/400 [02:33<1:23:00, 16.60s/it]

epoch: 2, accuracy:  62.63%, loss_dect: 1.3886, loss_dep: 1.4005, loss_total: 2.7891, f1_macro: 64.57%


 50%|█████     | 200/400 [05:05<55:13, 16.57s/it]

epoch: 2, accuracy:  66.30%, loss_dect: 1.2868, loss_dep: 1.3014, loss_total: 2.5882, f1_macro: 65.50%


 75%|███████▌  | 300/400 [07:39<27:42, 16.62s/it]

epoch: 2, accuracy:  67.44%, loss_dect: 1.2093, loss_dep: 1.2245, loss_total: 2.4338, f1_macro: 69.12%


100%|██████████| 400/400 [10:11<00:00,  1.53s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  68.04%, loss_dect: 1.1827, loss_dep: 1.1915, loss_total: 2.3742, f1_macro: 69.54%


 25%|██▌       | 100/400 [02:33<1:22:50, 16.57s/it]

epoch: 3, accuracy:  67.59%, loss_dect: 1.2215, loss_dep: 1.2323, loss_total: 2.4538, f1_macro: 68.97%


 50%|█████     | 200/400 [05:05<54:59, 16.50s/it]

epoch: 3, accuracy:  69.59%, loss_dect: 1.1548, loss_dep: 1.1649, loss_total: 2.3197, f1_macro: 68.97%


 75%|███████▌  | 300/400 [07:38<27:36, 16.56s/it]

epoch: 3, accuracy:  69.33%, loss_dect: 1.1291, loss_dep: 1.1374, loss_total: 2.2665, f1_macro: 71.20%


100%|██████████| 400/400 [10:10<00:00,  1.53s/it]

epoch: 3, accuracy:  69.41%, loss_dect: 1.1139, loss_dep: 1.1193, loss_total: 2.2332, f1_macro: 71.29%



 25%|██▌       | 100/400 [02:33<1:22:57, 16.59s/it]

epoch: 4, accuracy:  68.74%, loss_dect: 1.1976, loss_dep: 1.2067, loss_total: 2.4043, f1_macro: 70.84%


 50%|█████     | 200/400 [05:06<55:25, 16.63s/it]

epoch: 4, accuracy:  70.52%, loss_dect: 1.1541, loss_dep: 1.1711, loss_total: 2.3252, f1_macro: 71.03%


 75%|███████▌  | 300/400 [07:42<28:07, 16.87s/it]

epoch: 4, accuracy:  71.22%, loss_dect: 1.1203, loss_dep: 1.1340, loss_total: 2.2542, f1_macro: 73.10%


100%|██████████| 400/400 [10:15<00:00,  1.54s/it]

epoch: 4, accuracy:  70.78%, loss_dect: 1.1123, loss_dep: 1.1280, loss_total: 2.2403, f1_macro: 72.77%



 25%|██▌       | 100/400 [02:34<1:23:38, 16.73s/it]

epoch: 5, accuracy:  71.22%, loss_dect: 1.1881, loss_dep: 1.1709, loss_total: 2.3591, f1_macro: 73.13%


 50%|█████     | 200/400 [05:10<56:33, 16.97s/it]

epoch: 5, accuracy:  71.63%, loss_dect: 1.1631, loss_dep: 1.1709, loss_total: 2.3340, f1_macro: 72.98%


 75%|███████▌  | 300/400 [07:44<27:34, 16.54s/it]

epoch: 5, accuracy:  71.22%, loss_dect: 1.1490, loss_dep: 1.1608, loss_total: 2.3099, f1_macro: 73.55%


100%|██████████| 400/400 [10:16<00:00,  1.54s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 5, accuracy:  69.70%, loss_dect: 1.2193, loss_dep: 1.2301, loss_total: 2.4493, f1_macro: 72.20%


 25%|██▌       | 100/400 [02:33<1:22:33, 16.51s/it]

epoch: 6, accuracy:  72.93%, loss_dect: 1.1717, loss_dep: 1.1808, loss_total: 2.3525, f1_macro: 74.78%


 50%|█████     | 200/400 [05:05<55:27, 16.64s/it]

epoch: 6, accuracy:  70.93%, loss_dect: 1.2498, loss_dep: 1.2680, loss_total: 2.5178, f1_macro: 72.49%


 75%|███████▌  | 300/400 [07:39<27:33, 16.53s/it]

epoch: 6, accuracy:  71.15%, loss_dect: 1.2410, loss_dep: 1.2646, loss_total: 2.5056, f1_macro: 72.68%


100%|██████████| 400/400 [10:11<00:00,  1.53s/it]

epoch: 6, accuracy:  68.48%, loss_dect: 1.3821, loss_dep: 1.4078, loss_total: 2.7899, f1_macro: 69.49%
training time:  3710.949249000001  s





## alpha

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0 ## 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_p00') # it should be _a00

 25%|██▌       | 100/400 [02:35<1:23:11, 16.64s/it]

epoch: 1, accuracy:  52.85%, loss_dect: 1.9352, loss_dep: 1.9120, loss_total: 3.8472, f1_macro: 49.43%


 50%|█████     | 200/400 [05:05<53:32, 16.06s/it]

epoch: 1, accuracy:  64.15%, loss_dect: 1.3567, loss_dep: 1.3219, loss_total: 2.6786, f1_macro: 64.88%


 75%|███████▌  | 300/400 [07:31<26:13, 15.74s/it]

epoch: 1, accuracy:  69.81%, loss_dect: 1.1862, loss_dep: 1.1499, loss_total: 2.3361, f1_macro: 72.95%


100%|██████████| 400/400 [09:55<00:00,  1.49s/it]

epoch: 1, accuracy:  64.63%, loss_dect: 1.3829, loss_dep: 1.3487, loss_total: 2.7315, f1_macro: 67.55%



 25%|██▌       | 100/400 [02:24<1:17:33, 15.51s/it]

epoch: 2, accuracy:  72.67%, loss_dect: 1.0360, loss_dep: 1.0066, loss_total: 2.0426, f1_macro: 75.81%


 50%|█████     | 200/400 [04:48<51:19, 15.40s/it]

epoch: 2, accuracy:  74.37%, loss_dect: 1.0329, loss_dep: 0.9957, loss_total: 2.0286, f1_macro: 77.39%


 75%|███████▌  | 300/400 [07:12<26:03, 15.64s/it]

epoch: 2, accuracy:  73.93%, loss_dect: 1.0438, loss_dep: 1.0049, loss_total: 2.0487, f1_macro: 77.24%


100%|██████████| 400/400 [09:35<00:00,  1.44s/it]

epoch: 2, accuracy:  69.59%, loss_dect: 1.2221, loss_dep: 1.1848, loss_total: 2.4068, f1_macro: 73.06%



 25%|██▌       | 100/400 [02:23<1:17:37, 15.53s/it]

epoch: 3, accuracy:  74.37%, loss_dect: 1.0219, loss_dep: 0.9858, loss_total: 2.0078, f1_macro: 77.19%


 50%|█████     | 200/400 [04:46<51:16, 15.38s/it]

epoch: 3, accuracy:  75.41%, loss_dect: 1.0318, loss_dep: 0.9958, loss_total: 2.0276, f1_macro: 78.55%


 75%|███████▌  | 300/400 [07:10<25:46, 15.46s/it]

epoch: 3, accuracy:  74.59%, loss_dect: 1.0447, loss_dep: 1.0142, loss_total: 2.0589, f1_macro: 77.84%


100%|██████████| 400/400 [09:32<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  69.93%, loss_dect: 1.2735, loss_dep: 1.2395, loss_total: 2.5129, f1_macro: 72.60%


 25%|██▌       | 100/400 [02:22<1:17:01, 15.41s/it]

epoch: 4, accuracy:  73.67%, loss_dect: 1.0866, loss_dep: 1.0482, loss_total: 2.1347, f1_macro: 75.82%


 50%|█████     | 200/400 [04:45<51:15, 15.38s/it]

epoch: 4, accuracy:  73.89%, loss_dect: 1.1474, loss_dep: 1.1115, loss_total: 2.2589, f1_macro: 76.51%


 75%|███████▌  | 300/400 [07:08<25:30, 15.31s/it]

epoch: 4, accuracy:  74.70%, loss_dect: 1.1911, loss_dep: 1.1587, loss_total: 2.3498, f1_macro: 77.77%


100%|██████████| 400/400 [09:31<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  71.41%, loss_dect: 1.3076, loss_dep: 1.2747, loss_total: 2.5823, f1_macro: 73.07%


 25%|██▌       | 100/400 [02:22<1:17:01, 15.41s/it]

epoch: 5, accuracy:  74.00%, loss_dect: 1.2281, loss_dep: 1.1837, loss_total: 2.4118, f1_macro: 76.67%


 50%|█████     | 200/400 [04:44<51:18, 15.39s/it]

epoch: 5, accuracy:  73.33%, loss_dect: 1.3291, loss_dep: 1.2922, loss_total: 2.6213, f1_macro: 75.74%


 75%|███████▌  | 300/400 [07:07<25:27, 15.28s/it]

epoch: 5, accuracy:  74.15%, loss_dect: 1.3275, loss_dep: 1.3016, loss_total: 2.6291, f1_macro: 77.47%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]

epoch: 5, accuracy:  70.37%, loss_dect: 1.5491, loss_dep: 1.5246, loss_total: 3.0737, f1_macro: 73.34%
training time:  2911.1812610000015  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.1 ## 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_a01')

 25%|██▌       | 100/400 [02:22<1:16:47, 15.36s/it]

epoch: 1, accuracy:  39.93%, loss_dect: 2.2612, loss_dep: 2.2846, loss_total: 4.5458, f1_macro: 29.49%


 50%|█████     | 200/400 [04:44<51:17, 15.39s/it]

epoch: 1, accuracy:  59.67%, loss_dect: 1.5547, loss_dep: 1.5713, loss_total: 3.1261, f1_macro: 60.62%


 75%|███████▌  | 300/400 [07:07<25:39, 15.39s/it]

epoch: 1, accuracy:  67.07%, loss_dect: 1.2636, loss_dep: 1.2799, loss_total: 2.5436, f1_macro: 69.39%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  68.59%, loss_dect: 1.2659, loss_dep: 1.2802, loss_total: 2.5461, f1_macro: 70.35%


 25%|██▌       | 100/400 [02:22<1:17:01, 15.40s/it]

epoch: 2, accuracy:  70.56%, loss_dect: 1.1095, loss_dep: 1.0906, loss_total: 2.2002, f1_macro: 71.83%


 50%|█████     | 200/400 [04:44<51:27, 15.44s/it]

epoch: 2, accuracy:  71.33%, loss_dect: 1.0522, loss_dep: 1.0630, loss_total: 2.1152, f1_macro: 73.77%


 75%|███████▌  | 300/400 [07:07<25:32, 15.32s/it]

epoch: 2, accuracy:  71.07%, loss_dect: 1.0575, loss_dep: 1.0679, loss_total: 2.1254, f1_macro: 74.44%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  72.19%, loss_dect: 1.0630, loss_dep: 1.0752, loss_total: 2.1382, f1_macro: 74.36%


 25%|██▌       | 100/400 [02:23<1:17:16, 15.45s/it]

epoch: 3, accuracy:  73.22%, loss_dect: 1.0030, loss_dep: 1.0091, loss_total: 2.0122, f1_macro: 75.78%


 50%|█████     | 200/400 [04:45<51:37, 15.49s/it]

epoch: 3, accuracy:  73.44%, loss_dect: 1.0330, loss_dep: 1.0435, loss_total: 2.0765, f1_macro: 75.39%


 75%|███████▌  | 300/400 [07:08<25:44, 15.44s/it]

epoch: 3, accuracy:  73.26%, loss_dect: 1.0612, loss_dep: 1.0702, loss_total: 2.1313, f1_macro: 76.99%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  72.63%, loss_dect: 1.1473, loss_dep: 1.1083, loss_total: 2.2556, f1_macro: 74.42%


 25%|██▌       | 100/400 [02:22<1:17:12, 15.44s/it]

epoch: 4, accuracy:  74.37%, loss_dect: 0.9825, loss_dep: 0.9915, loss_total: 1.9740, f1_macro: 77.14%


 50%|█████     | 200/400 [04:44<51:31, 15.46s/it]

epoch: 4, accuracy:  73.41%, loss_dect: 1.0934, loss_dep: 1.1043, loss_total: 2.1977, f1_macro: 75.02%


 75%|███████▌  | 300/400 [07:07<25:34, 15.35s/it]

epoch: 4, accuracy:  72.44%, loss_dect: 1.1688, loss_dep: 1.1840, loss_total: 2.3529, f1_macro: 75.86%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]

epoch: 4, accuracy:  71.85%, loss_dect: 1.2023, loss_dep: 1.2137, loss_total: 2.4160, f1_macro: 73.61%



 25%|██▌       | 100/400 [02:23<1:16:56, 15.39s/it]

epoch: 5, accuracy:  73.41%, loss_dect: 1.1195, loss_dep: 1.1343, loss_total: 2.2538, f1_macro: 76.41%


 50%|█████     | 200/400 [04:45<51:18, 15.39s/it]

epoch: 5, accuracy:  73.52%, loss_dect: 1.1951, loss_dep: 1.2018, loss_total: 2.3969, f1_macro: 75.66%


 75%|███████▌  | 300/400 [07:07<25:41, 15.41s/it]

epoch: 5, accuracy:  72.52%, loss_dect: 1.2596, loss_dep: 1.2711, loss_total: 2.5308, f1_macro: 75.31%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]

epoch: 5, accuracy:  72.26%, loss_dect: 1.3262, loss_dep: 1.3222, loss_total: 2.6483, f1_macro: 74.20%
training time:  2875.316291000003  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.5 ## 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_a05')

 25%|██▌       | 100/400 [02:22<1:16:38, 15.33s/it]

epoch: 1, accuracy:  39.30%, loss_dect: 2.5518, loss_dep: 2.5764, loss_total: 5.1282, f1_macro: 25.68%


 50%|█████     | 200/400 [04:43<51:11, 15.36s/it]

epoch: 1, accuracy:  53.59%, loss_dect: 1.7560, loss_dep: 1.7781, loss_total: 3.5341, f1_macro: 52.93%


 75%|███████▌  | 300/400 [07:05<25:30, 15.30s/it]

epoch: 1, accuracy:  61.63%, loss_dect: 1.4698, loss_dep: 1.4853, loss_total: 2.9551, f1_macro: 62.46%


100%|██████████| 400/400 [09:27<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  64.67%, loss_dect: 1.4397, loss_dep: 1.4491, loss_total: 2.8888, f1_macro: 64.67%


 25%|██▌       | 100/400 [02:22<1:16:42, 15.34s/it]

epoch: 2, accuracy:  67.19%, loss_dect: 1.2638, loss_dep: 1.2744, loss_total: 2.5382, f1_macro: 68.61%


 50%|█████     | 200/400 [04:44<51:39, 15.50s/it]

epoch: 2, accuracy:  68.26%, loss_dect: 1.2036, loss_dep: 1.2136, loss_total: 2.4172, f1_macro: 69.64%


 75%|███████▌  | 300/400 [07:07<25:36, 15.37s/it]

epoch: 2, accuracy:  66.44%, loss_dect: 1.2377, loss_dep: 1.2505, loss_total: 2.4882, f1_macro: 67.94%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  69.56%, loss_dect: 1.1964, loss_dep: 1.2064, loss_total: 2.4028, f1_macro: 71.23%


 25%|██▌       | 100/400 [02:22<1:16:26, 15.29s/it]

epoch: 3, accuracy:  70.78%, loss_dect: 1.0901, loss_dep: 1.0991, loss_total: 2.1892, f1_macro: 72.82%


 50%|█████     | 200/400 [04:43<51:07, 15.34s/it]

epoch: 3, accuracy:  70.41%, loss_dect: 1.1104, loss_dep: 1.1194, loss_total: 2.2298, f1_macro: 72.21%


 75%|███████▌  | 300/400 [07:06<25:34, 15.34s/it]

epoch: 3, accuracy:  70.26%, loss_dect: 1.1133, loss_dep: 1.1265, loss_total: 2.2398, f1_macro: 72.74%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  71.11%, loss_dect: 1.1349, loss_dep: 1.1458, loss_total: 2.2807, f1_macro: 72.43%


 25%|██▌       | 100/400 [02:23<1:16:59, 15.40s/it]

epoch: 4, accuracy:  71.59%, loss_dect: 1.0564, loss_dep: 1.0737, loss_total: 2.1301, f1_macro: 72.80%


 50%|█████     | 200/400 [04:44<51:04, 15.32s/it]

epoch: 4, accuracy:  72.81%, loss_dect: 1.0598, loss_dep: 1.0755, loss_total: 2.1353, f1_macro: 75.18%


 75%|███████▌  | 300/400 [07:07<25:45, 15.46s/it]

epoch: 4, accuracy:  72.30%, loss_dect: 1.0843, loss_dep: 1.0970, loss_total: 2.1813, f1_macro: 75.05%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  71.85%, loss_dect: 1.1624, loss_dep: 1.1724, loss_total: 2.3348, f1_macro: 72.95%


 25%|██▌       | 100/400 [02:22<1:16:44, 15.35s/it]

epoch: 5, accuracy:  71.89%, loss_dect: 1.1205, loss_dep: 1.1246, loss_total: 2.2451, f1_macro: 73.10%


 50%|█████     | 200/400 [04:43<50:52, 15.26s/it]

epoch: 5, accuracy:  73.74%, loss_dect: 1.1315, loss_dep: 1.1410, loss_total: 2.2724, f1_macro: 74.89%


 75%|███████▌  | 300/400 [07:05<25:34, 15.35s/it]

epoch: 5, accuracy:  73.44%, loss_dect: 1.1346, loss_dep: 1.1360, loss_total: 2.2707, f1_macro: 75.34%


100%|██████████| 400/400 [09:27<00:00,  1.42s/it]

epoch: 5, accuracy:  71.63%, loss_dect: 1.2264, loss_dep: 1.2355, loss_total: 2.4619, f1_macro: 73.08%
training time:  2867.0038550000027  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 1 ## 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_a10')

 25%|██▌       | 100/400 [02:21<1:16:33, 15.31s/it]

epoch: 1, accuracy:  37.41%, loss_dect: 2.5803, loss_dep: 2.5655, loss_total: 5.1458, f1_macro: 27.63%


 50%|█████     | 200/400 [04:42<50:39, 15.20s/it]

epoch: 1, accuracy:  55.19%, loss_dect: 1.7986, loss_dep: 1.8154, loss_total: 3.6141, f1_macro: 53.44%


 75%|███████▌  | 300/400 [07:04<25:31, 15.32s/it]

epoch: 1, accuracy:  60.33%, loss_dect: 1.5493, loss_dep: 1.5546, loss_total: 3.1039, f1_macro: 59.71%


100%|██████████| 400/400 [09:27<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  60.41%, loss_dect: 1.4675, loss_dep: 1.4820, loss_total: 2.9496, f1_macro: 60.14%


 25%|██▌       | 100/400 [02:21<1:16:20, 15.27s/it]

epoch: 2, accuracy:  60.81%, loss_dect: 1.4378, loss_dep: 1.4493, loss_total: 2.8872, f1_macro: 61.67%


 50%|█████     | 200/400 [04:43<51:21, 15.41s/it]

epoch: 2, accuracy:  66.11%, loss_dect: 1.2536, loss_dep: 1.2712, loss_total: 2.5247, f1_macro: 65.93%


 75%|███████▌  | 300/400 [07:06<25:33, 15.34s/it]

epoch: 2, accuracy:  67.15%, loss_dect: 1.2024, loss_dep: 1.2151, loss_total: 2.4175, f1_macro: 68.87%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  66.30%, loss_dect: 1.2478, loss_dep: 1.2602, loss_total: 2.5080, f1_macro: 68.33%


 25%|██▌       | 100/400 [02:21<1:16:33, 15.31s/it]

epoch: 3, accuracy:  67.63%, loss_dect: 1.1725, loss_dep: 1.1850, loss_total: 2.3575, f1_macro: 69.97%


 50%|█████     | 200/400 [04:43<51:12, 15.36s/it]

epoch: 3, accuracy:  70.07%, loss_dect: 1.1044, loss_dep: 1.1133, loss_total: 2.2177, f1_macro: 72.00%


 75%|███████▌  | 300/400 [07:06<25:36, 15.36s/it]

epoch: 3, accuracy:  70.89%, loss_dect: 1.0995, loss_dep: 1.1063, loss_total: 2.2058, f1_macro: 72.63%


100%|██████████| 400/400 [09:27<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  68.26%, loss_dect: 1.2258, loss_dep: 1.2411, loss_total: 2.4669, f1_macro: 70.11%


 25%|██▌       | 100/400 [02:22<1:16:39, 15.33s/it]

epoch: 4, accuracy:  69.00%, loss_dect: 1.1352, loss_dep: 1.1459, loss_total: 2.2810, f1_macro: 70.58%


 50%|█████     | 200/400 [04:43<51:10, 15.35s/it]

epoch: 4, accuracy:  71.52%, loss_dect: 1.0985, loss_dep: 1.1058, loss_total: 2.2043, f1_macro: 74.46%


 75%|███████▌  | 300/400 [07:05<25:27, 15.28s/it]

epoch: 4, accuracy:  70.89%, loss_dect: 1.1228, loss_dep: 1.1278, loss_total: 2.2506, f1_macro: 73.36%


100%|██████████| 400/400 [09:27<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  67.63%, loss_dect: 1.3004, loss_dep: 1.3154, loss_total: 2.6158, f1_macro: 70.15%


 25%|██▌       | 100/400 [02:22<1:16:47, 15.36s/it]

epoch: 5, accuracy:  70.89%, loss_dect: 1.1146, loss_dep: 1.1304, loss_total: 2.2450, f1_macro: 72.81%


 50%|█████     | 200/400 [04:43<51:09, 15.35s/it]

epoch: 5, accuracy:  71.67%, loss_dect: 1.1517, loss_dep: 1.1428, loss_total: 2.2945, f1_macro: 75.39%


 75%|███████▌  | 300/400 [07:06<25:41, 15.41s/it]

epoch: 5, accuracy:  70.81%, loss_dect: 1.1693, loss_dep: 1.1799, loss_total: 2.3493, f1_macro: 73.61%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]

epoch: 5, accuracy:  70.00%, loss_dect: 1.2893, loss_dep: 1.3028, loss_total: 2.5921, f1_macro: 71.92%
training time:  2864.4567289999977  s





In [None]:
train_data = prepare_input('data/train_path')
test_data = prepare_input('data/test_path')

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-3 ## 1e-5
params.dropout = 0.3
params.alpha = 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_wd3')

 25%|██▌       | 100/400 [02:22<1:16:27, 15.29s/it]

epoch: 1, accuracy:  31.52%, loss_dect: 2.6679, loss_dep: 2.6604, loss_total: 5.3283, f1_macro: 18.68%


 50%|█████     | 200/400 [04:44<51:09, 15.35s/it]

epoch: 1, accuracy:  52.41%, loss_dect: 1.8869, loss_dep: 1.8873, loss_total: 3.7742, f1_macro: 49.06%


 75%|███████▌  | 300/400 [07:06<25:35, 15.35s/it]

epoch: 1, accuracy:  60.44%, loss_dect: 1.5624, loss_dep: 1.5560, loss_total: 3.1183, f1_macro: 59.93%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  49.67%, loss_dect: 1.9149, loss_dep: 1.8754, loss_total: 3.7903, f1_macro: 52.67%


 25%|██▌       | 100/400 [02:22<1:16:14, 15.25s/it]

epoch: 2, accuracy:  62.48%, loss_dect: 1.3501, loss_dep: 1.3661, loss_total: 2.7161, f1_macro: 63.50%


 50%|█████     | 200/400 [04:44<50:54, 15.27s/it]

epoch: 2, accuracy:  65.89%, loss_dect: 1.2407, loss_dep: 1.2551, loss_total: 2.4958, f1_macro: 67.69%


 75%|███████▌  | 300/400 [07:07<25:37, 15.37s/it]

epoch: 2, accuracy:  68.33%, loss_dect: 1.1973, loss_dep: 1.2019, loss_total: 2.3992, f1_macro: 69.86%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  58.11%, loss_dect: 1.6379, loss_dep: 1.6555, loss_total: 3.2934, f1_macro: 60.73%


 25%|██▌       | 100/400 [02:22<1:16:19, 15.26s/it]

epoch: 3, accuracy:  67.85%, loss_dect: 1.2129, loss_dep: 1.2257, loss_total: 2.4387, f1_macro: 70.41%


 50%|█████     | 200/400 [04:44<50:52, 15.26s/it]

epoch: 3, accuracy:  70.15%, loss_dect: 1.0916, loss_dep: 1.1046, loss_total: 2.1962, f1_macro: 72.56%


 75%|███████▌  | 300/400 [07:06<25:33, 15.33s/it]

epoch: 3, accuracy:  70.85%, loss_dect: 1.1446, loss_dep: 1.1571, loss_total: 2.3017, f1_macro: 73.24%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  59.74%, loss_dect: 1.6554, loss_dep: 1.6656, loss_total: 3.3209, f1_macro: 61.65%


 25%|██▌       | 100/400 [02:22<1:16:41, 15.34s/it]

epoch: 4, accuracy:  69.48%, loss_dect: 1.1515, loss_dep: 1.1582, loss_total: 2.3097, f1_macro: 72.29%


 50%|█████     | 200/400 [04:44<50:58, 15.29s/it]

epoch: 4, accuracy:  71.26%, loss_dect: 1.0761, loss_dep: 1.0863, loss_total: 2.1624, f1_macro: 74.45%


 75%|███████▌  | 300/400 [07:07<25:35, 15.35s/it]

epoch: 4, accuracy:  70.52%, loss_dect: 1.1615, loss_dep: 1.1766, loss_total: 2.3381, f1_macro: 72.05%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  61.41%, loss_dect: 1.5912, loss_dep: 1.6057, loss_total: 3.1969, f1_macro: 63.38%


 25%|██▌       | 100/400 [02:22<1:16:52, 15.37s/it]

epoch: 5, accuracy:  70.93%, loss_dect: 1.1522, loss_dep: 1.1434, loss_total: 2.2957, f1_macro: 73.56%


 50%|█████     | 200/400 [04:44<50:52, 15.26s/it]

epoch: 5, accuracy:  71.67%, loss_dect: 1.1391, loss_dep: 1.1482, loss_total: 2.2873, f1_macro: 74.50%


 75%|███████▌  | 300/400 [07:07<25:28, 15.29s/it]

epoch: 5, accuracy:  70.33%, loss_dect: 1.2764, loss_dep: 1.2919, loss_total: 2.5683, f1_macro: 71.79%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]

epoch: 5, accuracy:  67.67%, loss_dect: 1.4052, loss_dep: 1.4200, loss_total: 2.8252, f1_macro: 68.91%
training time:  2872.6120779999983  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-4 ## 1e-5
params.dropout = 0.3
params.alpha = 1
params.total_epoch = 4
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_wd4')

 25%|██▌       | 100/400 [02:22<1:16:27, 15.29s/it]

epoch: 1, accuracy:  32.15%, loss_dect: 2.7020, loss_dep: 2.7306, loss_total: 5.4325, f1_macro: 18.99%


 50%|█████     | 200/400 [04:44<50:53, 15.27s/it]

epoch: 1, accuracy:  50.52%, loss_dect: 1.8680, loss_dep: 1.8889, loss_total: 3.7569, f1_macro: 50.37%


 75%|███████▌  | 300/400 [07:07<25:39, 15.39s/it]

epoch: 1, accuracy:  58.85%, loss_dect: 1.5531, loss_dep: 1.5748, loss_total: 3.1279, f1_macro: 58.62%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  58.78%, loss_dect: 1.6290, loss_dep: 1.6482, loss_total: 3.2772, f1_macro: 58.54%


 25%|██▌       | 100/400 [02:22<1:17:20, 15.47s/it]

epoch: 2, accuracy:  63.96%, loss_dect: 1.3602, loss_dep: 1.3515, loss_total: 2.7116, f1_macro: 66.05%


 50%|█████     | 200/400 [04:44<51:15, 15.38s/it]

epoch: 2, accuracy:  66.30%, loss_dect: 1.2835, loss_dep: 1.2983, loss_total: 2.5819, f1_macro: 68.10%


 75%|███████▌  | 300/400 [07:07<25:29, 15.30s/it]

epoch: 2, accuracy:  66.78%, loss_dect: 1.2403, loss_dep: 1.2565, loss_total: 2.4968, f1_macro: 69.28%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  66.22%, loss_dect: 1.3270, loss_dep: 1.3431, loss_total: 2.6701, f1_macro: 68.31%


 25%|██▌       | 100/400 [02:22<1:17:01, 15.41s/it]

epoch: 3, accuracy:  68.63%, loss_dect: 1.1397, loss_dep: 1.1445, loss_total: 2.2842, f1_macro: 70.77%


 50%|█████     | 200/400 [04:44<51:04, 15.32s/it]

epoch: 3, accuracy:  70.26%, loss_dect: 1.1422, loss_dep: 1.1533, loss_total: 2.2955, f1_macro: 72.40%


 75%|███████▌  | 300/400 [07:07<25:26, 15.27s/it]

epoch: 3, accuracy:  69.26%, loss_dect: 1.1800, loss_dep: 1.1887, loss_total: 2.3687, f1_macro: 72.19%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]

epoch: 3, accuracy:  64.85%, loss_dect: 1.5035, loss_dep: 1.4602, loss_total: 2.9637, f1_macro: 65.69%



 25%|██▌       | 100/400 [02:22<1:16:32, 15.31s/it]

epoch: 4, accuracy:  70.19%, loss_dect: 1.1122, loss_dep: 1.1218, loss_total: 2.2340, f1_macro: 72.57%


 50%|█████     | 200/400 [04:44<51:05, 15.33s/it]

epoch: 4, accuracy:  71.59%, loss_dect: 1.1468, loss_dep: 1.1554, loss_total: 2.3023, f1_macro: 74.08%


 75%|███████▌  | 300/400 [07:06<25:26, 15.26s/it]

epoch: 4, accuracy:  68.19%, loss_dect: 1.2503, loss_dep: 1.2658, loss_total: 2.5162, f1_macro: 71.20%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  65.41%, loss_dect: 1.4945, loss_dep: 1.5046, loss_total: 2.9991, f1_macro: 66.01%


 25%|██▌       | 100/400 [02:23<1:17:14, 15.45s/it]

epoch: 5, accuracy:  70.81%, loss_dect: 1.1413, loss_dep: 1.1528, loss_total: 2.2941, f1_macro: 72.76%


 50%|█████     | 200/400 [04:45<51:36, 15.48s/it]

epoch: 5, accuracy:  71.37%, loss_dect: 1.2087, loss_dep: 1.2174, loss_total: 2.4261, f1_macro: 74.13%


 75%|███████▌  | 300/400 [07:07<25:27, 15.27s/it]

epoch: 5, accuracy:  70.19%, loss_dect: 1.2254, loss_dep: 1.2365, loss_total: 2.4618, f1_macro: 73.20%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]

epoch: 5, accuracy:  66.41%, loss_dect: 1.5127, loss_dep: 1.5066, loss_total: 3.0193, f1_macro: 69.25%
training time:  2873.9719200000036  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5 ## 1e-5
params.dropout = 0.3
params.alpha = 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_wd5')

 25%|██▌       | 100/400 [02:22<1:16:19, 15.27s/it]

epoch: 1, accuracy:  35.26%, loss_dect: 2.7187, loss_dep: 2.7469, loss_total: 5.4656, f1_macro: 24.67%


 50%|█████     | 200/400 [04:44<51:15, 15.38s/it]

epoch: 1, accuracy:  53.07%, loss_dect: 1.8943, loss_dep: 1.9182, loss_total: 3.8125, f1_macro: 52.59%


 75%|███████▌  | 300/400 [07:07<25:40, 15.40s/it]

epoch: 1, accuracy:  59.07%, loss_dect: 1.5653, loss_dep: 1.5824, loss_total: 3.1477, f1_macro: 60.71%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  60.07%, loss_dect: 1.5810, loss_dep: 1.5939, loss_total: 3.1749, f1_macro: 61.24%


 25%|██▌       | 100/400 [02:23<1:17:01, 15.41s/it]

epoch: 2, accuracy:  64.52%, loss_dect: 1.3503, loss_dep: 1.3606, loss_total: 2.7109, f1_macro: 67.01%


 50%|█████     | 200/400 [04:45<51:35, 15.48s/it]

epoch: 2, accuracy:  66.48%, loss_dect: 1.2879, loss_dep: 1.2976, loss_total: 2.5854, f1_macro: 68.57%


 75%|███████▌  | 300/400 [07:09<25:47, 15.47s/it]

epoch: 2, accuracy:  66.04%, loss_dect: 1.2436, loss_dep: 1.2572, loss_total: 2.5008, f1_macro: 68.78%


100%|██████████| 400/400 [09:31<00:00,  1.43s/it]

epoch: 2, accuracy:  67.44%, loss_dect: 1.2769, loss_dep: 1.2880, loss_total: 2.5649, f1_macro: 69.50%



 25%|██▌       | 100/400 [02:23<1:17:23, 15.48s/it]

epoch: 3, accuracy:  69.37%, loss_dect: 1.1348, loss_dep: 1.1455, loss_total: 2.2803, f1_macro: 72.20%


 50%|█████     | 200/400 [04:46<51:22, 15.41s/it]

epoch: 3, accuracy:  69.56%, loss_dect: 1.1486, loss_dep: 1.1575, loss_total: 2.3061, f1_macro: 71.81%


 75%|███████▌  | 300/400 [07:09<25:46, 15.47s/it]

epoch: 3, accuracy:  68.81%, loss_dect: 1.1683, loss_dep: 1.1797, loss_total: 2.3481, f1_macro: 72.09%


100%|██████████| 400/400 [09:32<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  70.22%, loss_dect: 1.1978, loss_dep: 1.2062, loss_total: 2.4040, f1_macro: 72.77%


 25%|██▌       | 100/400 [02:22<1:16:40, 15.33s/it]

epoch: 4, accuracy:  70.15%, loss_dect: 1.0864, loss_dep: 1.0978, loss_total: 2.1841, f1_macro: 72.55%


 50%|█████     | 200/400 [04:45<51:11, 15.36s/it]

epoch: 4, accuracy:  71.63%, loss_dect: 1.1137, loss_dep: 1.1285, loss_total: 2.2423, f1_macro: 73.42%


 75%|███████▌  | 300/400 [07:08<25:43, 15.43s/it]

epoch: 4, accuracy:  69.59%, loss_dect: 1.1500, loss_dep: 1.1637, loss_total: 2.3136, f1_macro: 72.46%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  70.74%, loss_dect: 1.2341, loss_dep: 1.2461, loss_total: 2.4802, f1_macro: 72.63%


 25%|██▌       | 100/400 [02:22<1:16:23, 15.28s/it]

epoch: 5, accuracy:  69.22%, loss_dect: 1.1680, loss_dep: 1.1670, loss_total: 2.3350, f1_macro: 70.72%


 50%|█████     | 200/400 [04:44<51:11, 15.36s/it]

epoch: 5, accuracy:  72.04%, loss_dect: 1.1319, loss_dep: 1.1388, loss_total: 2.2707, f1_macro: 73.54%


 75%|███████▌  | 300/400 [07:07<25:42, 15.42s/it]

epoch: 5, accuracy:  71.48%, loss_dect: 1.2113, loss_dep: 1.2099, loss_total: 2.4212, f1_macro: 73.53%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]

epoch: 5, accuracy:  70.85%, loss_dect: 1.3245, loss_dep: 1.3384, loss_total: 2.6628, f1_macro: 72.14%
training time:  2880.5008420000013  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-6 ## 1e-5
params.dropout = 0.3
params.alpha = 1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_wd6')

 25%|██▌       | 100/400 [02:22<1:16:31, 15.31s/it]

epoch: 1, accuracy:  31.15%, loss_dect: 2.7891, loss_dep: 2.7703, loss_total: 5.5594, f1_macro: 17.19%


 50%|█████     | 200/400 [04:43<51:15, 15.38s/it]

epoch: 1, accuracy:  54.56%, loss_dect: 1.7572, loss_dep: 1.7741, loss_total: 3.5313, f1_macro: 51.13%


 75%|███████▌  | 300/400 [07:06<25:34, 15.35s/it]

epoch: 1, accuracy:  59.81%, loss_dect: 1.5244, loss_dep: 1.5324, loss_total: 3.0568, f1_macro: 58.21%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 1, accuracy:  60.11%, loss_dect: 1.4780, loss_dep: 1.4955, loss_total: 2.9735, f1_macro: 59.99%


 25%|██▌       | 100/400 [02:23<1:16:46, 15.36s/it]

epoch: 2, accuracy:  60.93%, loss_dect: 1.4060, loss_dep: 1.4184, loss_total: 2.8244, f1_macro: 61.70%


 50%|█████     | 200/400 [04:44<51:09, 15.35s/it]

epoch: 2, accuracy:  67.89%, loss_dect: 1.1917, loss_dep: 1.2062, loss_total: 2.3979, f1_macro: 68.88%


 75%|███████▌  | 300/400 [07:08<25:33, 15.34s/it]

epoch: 2, accuracy:  69.15%, loss_dect: 1.1493, loss_dep: 1.1618, loss_total: 2.3111, f1_macro: 70.43%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 2, accuracy:  67.15%, loss_dect: 1.2006, loss_dep: 1.2127, loss_total: 2.4132, f1_macro: 69.26%


 25%|██▌       | 100/400 [02:23<1:16:50, 15.37s/it]

epoch: 3, accuracy:  69.22%, loss_dect: 1.1386, loss_dep: 1.1521, loss_total: 2.2907, f1_macro: 69.65%


 50%|█████     | 200/400 [04:44<51:02, 15.31s/it]

epoch: 3, accuracy:  71.30%, loss_dect: 1.1008, loss_dep: 1.1093, loss_total: 2.2101, f1_macro: 72.13%


 75%|███████▌  | 300/400 [07:08<25:42, 15.43s/it]

epoch: 3, accuracy:  71.19%, loss_dect: 1.0735, loss_dep: 1.0764, loss_total: 2.1498, f1_macro: 72.94%


100%|██████████| 400/400 [09:30<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  69.30%, loss_dect: 1.1493, loss_dep: 1.1624, loss_total: 2.3117, f1_macro: 71.08%


 25%|██▌       | 100/400 [02:23<1:17:33, 15.51s/it]

epoch: 4, accuracy:  69.30%, loss_dect: 1.1140, loss_dep: 1.1269, loss_total: 2.2409, f1_macro: 70.87%


 50%|█████     | 200/400 [04:45<51:36, 15.48s/it]

epoch: 4, accuracy:  72.15%, loss_dect: 1.0731, loss_dep: 1.0837, loss_total: 2.1568, f1_macro: 74.39%


 75%|███████▌  | 300/400 [07:08<25:42, 15.43s/it]

epoch: 4, accuracy:  71.74%, loss_dect: 1.0834, loss_dep: 1.0895, loss_total: 2.1730, f1_macro: 73.79%


100%|██████████| 400/400 [09:31<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  70.11%, loss_dect: 1.1870, loss_dep: 1.1988, loss_total: 2.3858, f1_macro: 71.57%


 25%|██▌       | 100/400 [02:22<1:16:43, 15.35s/it]

epoch: 5, accuracy:  69.41%, loss_dect: 1.1773, loss_dep: 1.1907, loss_total: 2.3680, f1_macro: 71.84%


 50%|█████     | 200/400 [04:44<51:03, 15.32s/it]

epoch: 5, accuracy:  72.22%, loss_dect: 1.1379, loss_dep: 1.1118, loss_total: 2.2496, f1_macro: 74.76%


 75%|███████▌  | 300/400 [07:06<25:31, 15.32s/it]

epoch: 5, accuracy:  71.93%, loss_dect: 1.1296, loss_dep: 1.1397, loss_total: 2.2693, f1_macro: 73.94%


100%|██████████| 400/400 [09:29<00:00,  1.42s/it]

epoch: 5, accuracy:  69.78%, loss_dect: 1.2589, loss_dep: 1.2690, loss_total: 2.5278, f1_macro: 72.24%
training time:  2876.4146379999947  s





## cnn

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_cnn')

100%|██████████| 400/400 [06:10<00:00,  1.08it/s]
100%|██████████| 400/400 [06:06<00:00,  1.09it/s]
 25%|██▌       | 100/400 [02:16<1:13:33, 14.71s/it]

epoch: 3, accuracy:  70.63%, loss_dect: 1.1551, loss_dep: 1.1571, loss_total: 2.3122, R: 72.95%, P: 75.06%, f1_macro: 73.25%


 50%|█████     | 200/400 [04:33<49:22, 14.81s/it]

epoch: 3, accuracy:  70.33%, loss_dect: 1.2380, loss_dep: 1.2386, loss_total: 2.4766, R: 70.39%, P: 77.02%, f1_macro: 72.87%


 75%|███████▌  | 300/400 [06:51<24:31, 14.71s/it]

epoch: 3, accuracy:  70.85%, loss_dect: 1.2797, loss_dep: 1.2832, loss_total: 2.5629, R: 73.60%, P: 72.93%, f1_macro: 71.90%


100%|██████████| 400/400 [09:07<00:00,  1.37s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  65.48%, loss_dect: 1.5385, loss_dep: 1.4965, loss_total: 3.0350, R: 63.53%, P: 76.46%, f1_macro: 68.23%


 25%|██▌       | 100/400 [02:17<1:13:27, 14.69s/it]

epoch: 4, accuracy:  70.63%, loss_dect: 1.2566, loss_dep: 1.2725, loss_total: 2.5290, R: 71.86%, P: 75.07%, f1_macro: 72.94%


 50%|█████     | 200/400 [04:33<48:59, 14.70s/it]

epoch: 4, accuracy:  69.81%, loss_dect: 1.3697, loss_dep: 1.3826, loss_total: 2.7523, R: 68.22%, P: 76.21%, f1_macro: 71.66%


 75%|███████▌  | 300/400 [06:51<24:32, 14.72s/it]

epoch: 4, accuracy:  70.00%, loss_dect: 1.4342, loss_dep: 1.4481, loss_total: 2.8823, R: 70.94%, P: 73.53%, f1_macro: 70.84%


100%|██████████| 400/400 [09:08<00:00,  1.37s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  63.85%, loss_dect: 1.7934, loss_dep: 1.8075, loss_total: 3.6009, R: 60.81%, P: 76.33%, f1_macro: 65.98%


 25%|██▌       | 100/400 [02:17<1:13:59, 14.80s/it]

epoch: 5, accuracy:  69.74%, loss_dect: 1.4441, loss_dep: 1.4556, loss_total: 2.8997, R: 70.45%, P: 72.32%, f1_macro: 70.63%


 50%|█████     | 200/400 [04:34<49:19, 14.80s/it]

epoch: 5, accuracy:  69.07%, loss_dect: 1.5380, loss_dep: 1.5585, loss_total: 3.0966, R: 68.38%, P: 75.68%, f1_macro: 71.55%


 75%|███████▌  | 300/400 [06:51<24:39, 14.80s/it]

epoch: 5, accuracy:  70.56%, loss_dect: 1.5219, loss_dep: 1.5434, loss_total: 3.0653, R: 70.62%, P: 75.40%, f1_macro: 72.26%


100%|██████████| 400/400 [09:08<00:00,  1.37s/it]

epoch: 5, accuracy:  64.37%, loss_dect: 1.9417, loss_dep: 1.9608, loss_total: 3.9025, R: 61.43%, P: 76.07%, f1_macro: 65.63%
training time:  2408.2018250000037  s





In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_cnn2')

100%|██████████| 400/400 [06:06<00:00,  1.09it/s]
100%|██████████| 400/400 [06:08<00:00,  1.09it/s]
 25%|██▌       | 100/400 [02:18<1:14:26, 14.89s/it]

epoch: 3, accuracy:  68.33%, loss_dect: 1.2038, loss_dep: 1.2080, loss_total: 2.4117, R: 70.93%, P: 72.16%, f1_macro: 70.69%


 50%|█████     | 200/400 [04:36<49:18, 14.79s/it]

epoch: 3, accuracy:  68.67%, loss_dect: 1.2887, loss_dep: 1.2979, loss_total: 2.5866, R: 70.18%, P: 73.07%, f1_macro: 71.07%


 75%|███████▌  | 300/400 [06:55<24:50, 14.91s/it]

epoch: 3, accuracy:  68.30%, loss_dect: 1.3261, loss_dep: 1.3298, loss_total: 2.6559, R: 68.61%, P: 74.24%, f1_macro: 69.28%


100%|██████████| 400/400 [09:13<00:00,  1.38s/it]

epoch: 3, accuracy:  64.26%, loss_dect: 1.5857, loss_dep: 1.5580, loss_total: 3.1437, R: 63.52%, P: 73.09%, f1_macro: 67.04%



 25%|██▌       | 100/400 [02:19<1:14:21, 14.87s/it]

epoch: 4, accuracy:  67.96%, loss_dect: 1.2802, loss_dep: 1.2957, loss_total: 2.5759, R: 70.88%, P: 72.78%, f1_macro: 71.31%


 50%|█████     | 200/400 [04:37<49:35, 14.88s/it]

epoch: 4, accuracy:  68.85%, loss_dect: 1.3948, loss_dep: 1.4092, loss_total: 2.8041, R: 69.85%, P: 74.30%, f1_macro: 71.15%


 75%|███████▌  | 300/400 [06:57<24:50, 14.90s/it]

epoch: 4, accuracy:  67.89%, loss_dect: 1.4176, loss_dep: 1.4291, loss_total: 2.8468, R: 69.86%, P: 72.28%, f1_macro: 69.25%


100%|██████████| 400/400 [09:14<00:00,  1.39s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  63.85%, loss_dect: 1.7055, loss_dep: 1.7241, loss_total: 3.4297, R: 62.54%, P: 71.29%, f1_macro: 65.23%


 25%|██▌       | 100/400 [02:18<1:14:17, 14.86s/it]

epoch: 5, accuracy:  66.04%, loss_dect: 1.5476, loss_dep: 1.5592, loss_total: 3.1068, R: 66.25%, P: 70.87%, f1_macro: 67.43%


 50%|█████     | 200/400 [04:35<49:12, 14.76s/it]

epoch: 5, accuracy:  68.44%, loss_dect: 1.5101, loss_dep: 1.5287, loss_total: 3.0388, R: 67.73%, P: 74.63%, f1_macro: 70.52%


 75%|███████▌  | 300/400 [06:53<24:30, 14.70s/it]

epoch: 5, accuracy:  67.30%, loss_dect: 1.5528, loss_dep: 1.5730, loss_total: 3.1258, R: 68.19%, P: 70.64%, f1_macro: 67.57%


100%|██████████| 400/400 [09:11<00:00,  1.38s/it]

epoch: 5, accuracy:  59.41%, loss_dect: 2.1654, loss_dep: 2.1814, loss_total: 4.3468, R: 54.63%, P: 72.51%, f1_macro: 58.35%
training time:  2420.511019999998  s





## SP-Tree

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_sp')

100%|██████████| 400/400 [04:31<00:00,  1.47it/s]
100%|██████████| 400/400 [04:24<00:00,  1.51it/s]
 25%|██▌       | 100/400 [01:41<58:04, 11.61s/it]

epoch: 3, accuracy:  74.11%, loss_dect: 0.9820, loss_dep: 0.9782, loss_total: 1.9601, R: 78.44%, P: 77.61%, f1_macro: 77.57%


 50%|█████     | 200/400 [03:22<38:23, 11.52s/it]

epoch: 3, accuracy:  75.07%, loss_dect: 0.9804, loss_dep: 0.9841, loss_total: 1.9645, R: 77.46%, P: 79.50%, f1_macro: 78.16%


 75%|███████▌  | 300/400 [05:04<19:19, 11.60s/it]

epoch: 3, accuracy:  73.67%, loss_dect: 1.0375, loss_dep: 1.0389, loss_total: 2.0764, R: 77.70%, P: 77.61%, f1_macro: 76.84%


100%|██████████| 400/400 [06:46<00:00,  1.02s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  71.96%, loss_dect: 1.1903, loss_dep: 1.1450, loss_total: 2.3353, R: 72.59%, P: 78.76%, f1_macro: 74.81%


 25%|██▌       | 100/400 [01:41<57:41, 11.54s/it]

epoch: 4, accuracy:  74.44%, loss_dect: 1.0035, loss_dep: 1.0186, loss_total: 2.0220, R: 78.95%, P: 76.56%, f1_macro: 77.11%


 50%|█████     | 200/400 [03:21<38:27, 11.54s/it]

epoch: 4, accuracy:  75.04%, loss_dect: 1.0057, loss_dep: 1.0133, loss_total: 2.0190, R: 76.28%, P: 79.78%, f1_macro: 77.79%


 75%|███████▌  | 300/400 [05:02<19:13, 11.54s/it]

epoch: 4, accuracy:  72.89%, loss_dect: 1.1204, loss_dep: 1.1257, loss_total: 2.2460, R: 77.76%, P: 75.93%, f1_macro: 75.84%


100%|██████████| 400/400 [06:44<00:00,  1.01s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  72.37%, loss_dect: 1.2165, loss_dep: 1.2229, loss_total: 2.4393, R: 71.69%, P: 78.51%, f1_macro: 74.08%


 25%|██▌       | 100/400 [01:40<57:37, 11.53s/it]

epoch: 5, accuracy:  73.59%, loss_dect: 1.1283, loss_dep: 1.1375, loss_total: 2.2658, R: 75.91%, P: 78.47%, f1_macro: 76.46%


 50%|█████     | 200/400 [03:22<38:43, 11.62s/it]

epoch: 5, accuracy:  74.37%, loss_dect: 1.1549, loss_dep: 1.1688, loss_total: 2.3237, R: 74.93%, P: 79.59%, f1_macro: 76.85%


 75%|███████▌  | 300/400 [05:03<19:07, 11.48s/it]

epoch: 5, accuracy:  72.74%, loss_dect: 1.2139, loss_dep: 1.2325, loss_total: 2.4465, R: 76.13%, P: 77.53%, f1_macro: 75.93%


100%|██████████| 400/400 [06:45<00:00,  1.01s/it]

epoch: 5, accuracy:  72.81%, loss_dect: 1.3114, loss_dep: 1.3268, loss_total: 2.6382, R: 72.34%, P: 78.49%, f1_macro: 74.67%
training time:  1778.8898530000006  s





## Baseline

In [None]:
params.learn_rate = 1e-3
params.learn_rate_decay = 0.92
params.step_size = 50
params.weight_decay = 1e-5
params.dropout = 0.3
params.alpha = 0.1
params.total_epoch = 5
params.max_norm = 10

model = BiLSTM_TreeLSTM()
model = model.cuda()
optimizer = optim.AdamW(model.parameters(), lr=params.learn_rate, weight_decay=params.weight_decay, amsgrad=True)
scheduler = optim.lr_scheduler.StepLR(optimizer, params.step_size, params.learn_rate_decay)
start_epoch = 1
begin('_base')

100%|██████████| 400/400 [06:33<00:00,  1.02it/s]
100%|██████████| 400/400 [06:24<00:00,  1.04it/s]
 25%|██▌       | 100/400 [02:23<1:17:52, 15.58s/it]

epoch: 3, accuracy:  73.81%, loss_dect: 1.0226, loss_dep: 1.0243, loss_total: 2.0468, R: 77.29%, P: 76.38%, f1_macro: 76.31%


 50%|█████     | 200/400 [04:46<51:37, 15.49s/it]

epoch: 3, accuracy:  74.78%, loss_dect: 0.9950, loss_dep: 1.0029, loss_total: 1.9980, R: 76.56%, P: 79.63%, f1_macro: 77.56%


 75%|███████▌  | 300/400 [07:10<25:50, 15.50s/it]

epoch: 3, accuracy:  73.52%, loss_dect: 1.0710, loss_dep: 1.0687, loss_total: 2.1398, R: 77.07%, P: 77.27%, f1_macro: 76.28%


100%|██████████| 400/400 [09:33<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 3, accuracy:  68.48%, loss_dect: 1.3270, loss_dep: 1.2687, loss_total: 2.5956, R: 66.31%, P: 79.02%, f1_macro: 71.19%


 25%|██▌       | 100/400 [02:22<1:16:53, 15.38s/it]

epoch: 4, accuracy:  74.96%, loss_dect: 1.0406, loss_dep: 1.0544, loss_total: 2.0950, R: 78.42%, P: 76.68%, f1_macro: 77.01%


 50%|█████     | 200/400 [04:45<51:23, 15.42s/it]

epoch: 4, accuracy:  75.26%, loss_dect: 1.0480, loss_dep: 1.0573, loss_total: 2.1052, R: 75.38%, P: 80.51%, f1_macro: 77.30%


 75%|███████▌  | 300/400 [07:08<25:44, 15.44s/it]

epoch: 4, accuracy:  73.15%, loss_dect: 1.1414, loss_dep: 1.1511, loss_total: 2.2925, R: 75.91%, P: 76.54%, f1_macro: 75.20%


100%|██████████| 400/400 [09:31<00:00,  1.43s/it]
  0%|          | 0/400 [00:00<?, ?it/s]

epoch: 4, accuracy:  70.04%, loss_dect: 1.2558, loss_dep: 1.2651, loss_total: 2.5209, R: 69.75%, P: 76.65%, f1_macro: 71.79%


 25%|██▌       | 100/400 [02:22<1:16:49, 15.37s/it]

epoch: 5, accuracy:  74.41%, loss_dect: 1.1072, loss_dep: 1.1183, loss_total: 2.2256, R: 76.25%, P: 77.57%, f1_macro: 76.53%


 50%|█████     | 200/400 [04:44<51:02, 15.31s/it]

epoch: 5, accuracy:  74.93%, loss_dect: 1.1412, loss_dep: 1.1545, loss_total: 2.2956, R: 76.21%, P: 79.78%, f1_macro: 77.45%


 75%|███████▌  | 300/400 [07:06<25:32, 15.33s/it]

epoch: 5, accuracy:  73.44%, loss_dect: 1.2181, loss_dep: 1.2340, loss_total: 2.4522, R: 76.90%, P: 75.69%, f1_macro: 75.52%


100%|██████████| 400/400 [09:28<00:00,  1.42s/it]

epoch: 5, accuracy:  71.15%, loss_dect: 1.3384, loss_dep: 1.3514, loss_total: 2.6898, R: 71.62%, P: 77.07%, f1_macro: 72.59%
training time:  2517.7467939999988  s



