In [1]:
!git clone https://github.com/hawalurahman/PWCN

Cloning into 'PWCN'...
remote: Enumerating objects: 201, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 201 (delta 0), reused 1 (delta 0), pack-reused 197[K
Receiving objects: 100% (201/201), 87.44 MiB | 17.80 MiB/s, done.
Resolving deltas: 100% (92/92), done.
Updating files: 100% (71/71), done.


In [2]:
%cd /content/PWCN

/content/PWCN


In [3]:
class Tokenizer(object):
    def __init__(self, word2idx=None):
        if word2idx is None:
            self.word2idx = {}
            self.idx2word = {}
            self.idx = 0
            self.word2idx['<pad>'] = self.idx
            self.idx2word[self.idx] = '<pad>'
            self.idx += 1
            self.word2idx['<unk>'] = self.idx
            self.idx2word[self.idx] = '<unk>'
            self.idx += 1
        else:
            self.word2idx = word2idx
            self.idx2word = {v:k for k,v in word2idx.items()}

    def fit_on_text(self, text):
        text = text.lower()
        words = text.split()
        for word in words:
            if word not in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1

    def text_to_sequence(self, text):
        text = text.lower()
        words = text.split()
        unknownidx = 1
        sequence = [self.word2idx[w] if w in self.word2idx else unknownidx for w in words]
        # print(len(sequence))
        if len(sequence) == 0:
            sequence = [0]
        return sequence

def read_data(fname):
        tokenizer = Tokenizer()
        fin = open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
        lines = fin.readlines()
        fin.close()
        fin = open(fname+'.dist', 'r', encoding='utf-8', newline='\n', errors='ignore')
        dist_lines = fin.readlines()
        fin.close()

        all_data = []
        cnt = 0
        for i in range(0, len(lines), 3):
            text_left, _, text_right = [s.lower().strip() for s in lines[i].partition("$T$")]
            aspect = lines[i + 1].lower().strip()
            polarity = lines[i + 2].strip()
            original_text = str(text_left) + " " + str(aspect) + " " + str(text_right)

            text_indices = tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right)
            aspect_indices = tokenizer.text_to_sequence(aspect)
            left_indices = tokenizer.text_to_sequence(text_left)

            polarity = int(polarity)+1
            dependency_dist = [float(d) for d in dist_lines[cnt*2+1].split()]

            cnt += 1

            data = {
                'text' : (text_left, _, text_right),
                'full_text' : original_text,
                'aspect' : aspect,
                'text_indices': text_indices,
                'aspect_indices': aspect_indices,
                'left_indices': left_indices,
                'polarity': polarity,
                'dependency_dist': dependency_dist
            }

            all_data.append(data)
        return all_data

## Read Data

In [4]:
train_data = read_data('/content/PWCN/datasets/semeval14/Restaurants_Train.xml.seg')
test_data = read_data('/content/PWCN/datasets/semeval14/Restaurants_Test_Gold.xml.seg')

## BERT Embedding

In [4]:
# -*- coding: utf-8 -*-

import os
import pickle
import random
import numpy as np
import torch
from transformers import BertTokenizer, BertModel

from tqdm import tqdm


def load_word_vec(path, word2idx=None, embed_dim=300):
    fin = open(path, 'r', encoding='utf-8', newline='\n', errors='ignore')
    word_vec = {}
    for line in fin:
        tokens = line.rstrip().split()
        word, vec = ' '.join(tokens[:-embed_dim]), tokens[-embed_dim:]
        if word in word2idx.keys():
            word_vec[word] = np.asarray(vec, dtype='float32')
    return word_vec


def build_embedding_matrix(word2idx, embed_dim, type):
    embedding_matrix_file_name = '{0}_{1}_embedding_matrix.pkl'.format(str(embed_dim), type)
    if os.path.exists(embedding_matrix_file_name):
        print('loading embedding_matrix:', embedding_matrix_file_name)
        embedding_matrix = pickle.load(open(embedding_matrix_file_name, 'rb'))
    else:
        print('loading word vectors ...')
        embedding_matrix = np.zeros((len(word2idx), embed_dim))  # idx 0 and 1 are all-zeros
        embedding_matrix[1, :] = np.random.uniform(-1/np.sqrt(embed_dim), 1/np.sqrt(embed_dim), (1, embed_dim))
        fname = './glove/glove.840B.300d.txt'
        word_vec = load_word_vec(fname, word2idx=word2idx, embed_dim=embed_dim)
        print('building embedding_matrix:', embedding_matrix_file_name)
        for word, i in word2idx.items():
            vec = word_vec.get(word)
            if vec is not None:
                # words not found in embedding index will be all-zeros.
                embedding_matrix[i] = vec
        pickle.dump(embedding_matrix, open(embedding_matrix_file_name, 'wb'))
    return embedding_matrix


def build_embedding_matrix_bert(data_raw, embed_dim, type):
    embedding_matrix_file_name = '{0}_{1}_embedding_matrix.pkl'.format(str(embed_dim), type)
    if os.path.exists(embedding_matrix_file_name):
        print('loading embedding_matrix:', embedding_matrix_file_name)
        embedding_matrix = pickle.load(open(embedding_matrix_file_name, 'rb'))
    else:
        print('loading word vectors ...')
        # embedding_matrix = np.zeros((len(data_raw), embed_dim))  # idx 0 and 1 are all-zeros
        # embedding_matrix[1, :] = np.random.uniform(-1/np.sqrt(embed_dim), 1/np.sqrt(embed_dim), (1, embed_dim))

        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        model = BertModel.from_pretrained("bert-base-uncased")

        all_sentence_token_embedding = []

        for each in data_raw:
            text = " ".join([str(x) for x in each['full_text']])

            inputs = tokenizer.encode_plus(text, return_tensors="pt")
            outputs = model(**inputs)

            last_hidden_states = outputs[0]

            pickle.dump(last_hidden_states, open(embedding_matrix_file_name, 'wb'))
    embedding_matrix = pickle.load(open(embedding_matrix_file_name, 'rb'))
    return embedding_matrix

class Tokenizer(object):
    def __init__(self, word2idx=None):
        if word2idx is None:
            self.word2idx = {}
            self.idx2word = {}
            self.idx = 0
            self.word2idx['<pad>'] = self.idx
            self.idx2word[self.idx] = '<pad>'
            self.idx += 1
            self.word2idx['<unk>'] = self.idx
            self.idx2word[self.idx] = '<unk>'
            self.idx += 1
        else:
            self.word2idx = word2idx
            self.idx2word = {v:k for k,v in word2idx.items()}

    def fit_on_text(self, text):
        text = text.lower()
        words = text.split()
        for word in words:
            if word not in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1

    def text_to_sequence(self, text):
        text = text.lower()
        words = text.split()
        unknownidx = 1
        sequence = [self.word2idx[w] if w in self.word2idx else unknownidx for w in words]
        # print(len(sequence))
        if len(sequence) == 0:
            sequence = [0]
        return sequence


class ABSADataset(object):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)


class ABSADatesetReader:
    @staticmethod
    def __read_text__(fnames):
        text = ''
        for fname in fnames:
            fin = open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
            lines = fin.readlines()
            fin.close()
            for i in range(0, 100, 3):
                text_left, _, text_right = [s.lower().strip() for s in lines[i].partition("$T$")]
                aspect = lines[i + 1].lower().strip()
                text_raw = text_left + " " + aspect + " " + text_right
                text += text_raw + " "
        return text


    @staticmethod
    def __read_data__(fname, tokenizer, name):
        fin = open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
        lines = fin.readlines()
        fin.close()
        fin = open(fname+'.dist', 'r', encoding='utf-8', newline='\n', errors='ignore')
        dist_lines = fin.readlines()
        fin.close()

        all_data = []
        cnt = 0

        for i in tqdm(range(0, len(lines), 3)):
            text_left, _, text_right = [s.lower().strip() for s in lines[i].partition("$T$")]
            aspect = lines[i + 1].lower().strip()
            polarity = lines[i + 2].strip()

            text_indices = tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right)
            aspect_indices = tokenizer.text_to_sequence(aspect)
            left_indices = tokenizer.text_to_sequence(text_left)
            polarity = int(polarity)+1
            dependency_dist = [float(d) for d in dist_lines[cnt*2+1].split()]
            full_text = text_left+" "+aspect+" "+text_right

            cnt += 1

            tokenizer_bert = BertTokenizer.from_pretrained("bert-base-uncased")
            model = BertModel.from_pretrained("bert-base-uncased")

            model.to('cuda')

            inputs = tokenizer_bert.encode_plus(full_text, return_tensors="pt")

            inputs.to('cuda')

            outputs = model(**inputs)

            last_hidden_states = outputs[0]

            data = {
                'text' : (text_left, _, text_right),
                'full_text' : full_text,
                'text_indices': text_indices,
                'aspect_indices': aspect_indices,
                'left_indices': left_indices,
                'polarity': polarity,
                'dependency_dist': dependency_dist,
                'bert_embedding' : last_hidden_states
            }

            if i == 0:
                with open('dataset_data'+name+'.pkl', 'wb') as f:
                    pickle.dump(data, f)
            else:
                with open('dataset_data'+name+'.pkl', 'ab') as f:
                    pickle.dump(data, f)

        with open('dataset_data'+name+'.pkl', 'rb') as f:
            data = []
            while True:
                try:
                    data.append(pickle.load(f))
                except EOFError:
                    break

        return data

    def __init__(self, dataset='laptop', embed_dim=768):
        print("preparing {0} dataset...".format(dataset))
        fname = {
            'restaurant': {
                'train': '/content/PWCN/datasets/semeval14/Restaurants_Train.xml.seg',
                'test': '/content/PWCN/datasets/semeval14/Restaurants_Test_Gold.xml.seg'
            },
            'laptop': {
                'train': './datasets/semeval14/Laptops_Train.xml.seg',
                'test': './datasets/semeval14/Laptops_Test_Gold.xml.seg'
            },
            'restaurant16': {
                'train': './datasets/semeval16/restaurant_2016_training_coba_coba.xml.seg',
                'test': './datasets/semeval16/restaurant_2016_testing_gold_coba_coba.xml.seg'
            },
        }


        tokenizer = Tokenizer()
        # # data_raw = list(ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['train'], tokenizer)))

        # self.train_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['train'], tokenizer))
        # self.test_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['test'], tokenizer))

        text = ABSADatesetReader.__read_text__([fname[dataset]['train'], fname[dataset]['test']])

        if os.path.exists(dataset+'_word2idx.pkl'):
            print("loading {0} tokenizer...".format(dataset))
            with open(dataset+'_word2idx.pkl', 'rb') as f:
                word2idx = pickle.load(f)
                tokenizer = Tokenizer(word2idx=word2idx)
        else:
            tokenizer = Tokenizer()
            tokenizer.fit_on_text(text)
            with open(dataset+'_word2idx.pkl', 'wb') as f:
                pickle.dump(tokenizer.word2idx, f)

        self.embedding_matrix = build_embedding_matrix(tokenizer.word2idx, embed_dim, dataset)
        # self.embedding_matrix = build_embedding_matrix_bert(data_raw, 768, dataset)

        # test_data_pickle = '/content/PWCN/dataset_datatest_data.pkl'
        # train_data_pickle = '/content/PWCN/dataset_datatrain_data.pkl'

        # if os.path.exists(test_data_pickle):
        #     print("loading {0} bert embedding...".format(dataset))

        #     with open(test_data_pickle, 'rb') as f:
        #         data = []
        #         while True:
        #             try:
        #                 data.append(pickle.load(f))
        #             except EOFError:
        #                 break
        #         data = data.to(torch.device('cuda'))
        #     self.test_data = data

        #     with open(train_data_pickle, 'rb') as f:
        #         data = []
        #         while True:
        #             try:
        #                 data.append(pickle.load(f))
        #             except EOFError:
        #                 break
        #         data = data.to(torch.device('cuda'))
        #     self.train_data = data

        # else:
        #     tokenizer = Tokenizer()
        #     tokenizer.fit_on_text(text)
        #     with open(dataset+'_word2idx.pkl', 'wb') as f:
        #         pickle.dump(tokenizer.word2idx, f)


        #     self.train_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['train'], tokenizer, 'train_data'))
        #     self.test_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['test'], tokenizer, 'test_data'))

        self.train_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['train'], tokenizer, 'train_data'))
        self.test_data = ABSADataset(ABSADatesetReader.__read_data__(fname[dataset]['test'], tokenizer, 'test_data'))

In [None]:
absa_dataset = ABSADatesetReader(dataset='restaurant', embed_dim=768)


preparing restaurant dataset...
loading restaurant tokenizer...
loading embedding_matrix: 768_restaurant_embedding_matrix.pkl


  0%|          | 0/3608 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

  8%|▊         | 289/3608 [07:14<1:21:31,  1.47s/it]

In [7]:
type(absa_dataset.embedding_matrix)

torch.Tensor

In [9]:
# -*- coding: utf-8 -*-

from layers.dynamic_rnn import DynamicLSTM
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class DependencyProximity(nn.Module):
    def __init__(self, opt):
        self.opt = opt
        super(DependencyProximity, self).__init__()

    def forward(self, x, aspect_double_idx, text_len, aspect_len, dependency_dist):
        batch_size, seq_len = x.shape[0], x.shape[1]
        # print(type(aspect_double_idx), type(text_len), type(aspect_len), type(dependency_dist), type(batch_size), type(seq_len))
        weight = self.weight_matrix(aspect_double_idx, text_len, aspect_len, dependency_dist, batch_size, seq_len).to(torch.device('cpu'))
        x = weight.unsqueeze(2)*x
        return x

    def weight_matrix(self, aspect_double_idx, text_len, aspect_len, dependency_dist, batch_size, seq_len):
        aspect_double_idx = aspect_double_idx.cpu().numpy()
        text_len = text_len.cpu().numpy()
        aspect_len = aspect_len.cpu().numpy()
        dependency_dist = dependency_dist.cpu().numpy()
        weight = [[] for i in range(batch_size)]
        for i in range(batch_size):
            context_len = text_len[i] - aspect_len[i]
            for j in range(aspect_double_idx[i,0]):
                weight[i].append(1-dependency_dist[i,j]/context_len)
            for j in range(aspect_double_idx[i,0], aspect_double_idx[i,1]+1):
                weight[i].append(0)
            for j in range(aspect_double_idx[i,1]+1, text_len[i]):
                weight[i].append(1-dependency_dist[i,j]/context_len)
            for j in range(text_len[i], seq_len):
                weight[i].append(0)
        return torch.tensor(weight)

class PWCN_DEP_modified(nn.Module):
    def __init__(self, opt):
        super(PWCN_DEP_modified, self).__init__()
        self.opt = opt
        # self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        # self.embed_dropout = nn.Dropout(0.3)
        self.txt_lstm = DynamicLSTM(768, 768, num_layers=1, batch_first=True, bidirectional=True)
        self.proximity = DependencyProximity(opt)
        self.convs3 = nn.Conv1d(2*768, 2*768, 3, padding=1).double()
        self.fc = nn.Linear(2*768, 3).double()

    def forward(self, inputs):
        text_indices, aspect_indices, left_indices, dependency_dist, bert_embedding = inputs
        # print(bert_embedding.shape)

        txt_len = torch.sum(text_indices != 0, dim=1)
        asp_len = torch.sum(aspect_indices != 0, dim=1)
        left_len = torch.sum(left_indices != 0, dim=-1)
        aspect_double_idx = torch.cat([left_len.unsqueeze(-1), (left_len+asp_len-1).unsqueeze(-1)], dim=-1)
        txt_out = bert_embedding
        # print(txt_out.shape, txt_len.shape)
        txt_out, (_, _) = self.txt_lstm(txt_out, txt_len)
        z = F.relu(self.convs3(
            self.proximity(txt_out, aspect_double_idx, txt_len, asp_len, dependency_dist).transpose(1, 2)))  # [(N,Co,L), ...]*len(Ks)
        z = F.max_pool1d(z, z.size(2)).squeeze(2)
        out = self.fc(z)
        return out


In [10]:
# -*- coding: utf-8 -*-

import math
import random
import torch
from torch.nn.utils.rnn import pad_sequence

class BucketIterator(object):
    def __init__(self, data, batch_size, shuffle=True, sort=True):
        self.shuffle = shuffle
        self.sort = sort
        self.batches = self.sort_and_pad(data, batch_size)
        self.batch_len = len(self.batches)

    def sort_and_pad(self, data, batch_size):
        num_batch = int(math.ceil(len(data) / batch_size))
        if self.sort:
            sorted_data = sorted(data, key=lambda x: len(x['text_indices']))
        else:
            sorted_data = data
        batches = []
        for i in range(num_batch):
            batches.append(self.pad_data(sorted_data[i*batch_size : (i+1)*batch_size]))
        return batches

    @staticmethod
    def pad_data(batch_data):
        batch_text_indices = []
        batch_aspect_indices = []
        batch_left_indices = []
        batch_polarity = []
        batch_dependency_dist = []
        batch_bert_embedding = []
        max_len = max([len(t['text_indices']) for t in batch_data])
        for item in batch_data:
            # print(item['text'])
            text_indices, aspect_indices, left_indices, polarity, dependency_dist, bert_embedding = \
            item['text_indices'], item['aspect_indices'], item['left_indices'],\
            item['polarity'], item['dependency_dist'], item['bert_embedding']

            # print(bert_embedding.shape)

            text_padding = [0] * (max_len - len(text_indices))
            aspect_padding = [0] * (max_len - len(aspect_indices))
            left_padding = [0] * (max_len - len(left_indices))
            dependency_dist_padding = [0] * (max_len - len(dependency_dist))
            batch_text_indices.append(text_indices + text_padding)
            batch_aspect_indices.append(aspect_indices + aspect_padding)
            batch_left_indices.append(left_indices + left_padding)
            batch_polarity.append(polarity)
            batch_dependency_dist.append(dependency_dist + dependency_dist_padding)
            batch_bert_embedding.append(bert_embedding)


        # Find the maximum sequence length
        max_sequence_length = max(tensor.size(1) for tensor in batch_bert_embedding)

        # Pad or truncate tensors to have the same sequence length
        padded_tensors = [
            torch.nn.functional.pad(tensor, (0, 0, 0, max_sequence_length - tensor.size(1)), value=0)
            if tensor.size(1) < max_sequence_length
            else tensor[:, :max_sequence_length, :]
            for tensor in batch_bert_embedding
        ]

        # Use pad_sequence to concatenate them along dimension 1
        padded_sequences = pad_sequence(padded_tensors, batch_first=True)
        reshaped_padded_sequences = padded_sequences.squeeze(1)

        # Verify the shapes
        # print(reshaped_padded_sequences.shape)


        return { \
                'text_indices': torch.tensor(batch_text_indices), \
                'aspect_indices': torch.tensor(batch_aspect_indices), \
                'left_indices': torch.tensor(batch_left_indices), \
                'polarity': torch.tensor(batch_polarity), \
                'dependency_dist': torch.tensor(batch_dependency_dist), \
                'bert_embedding': reshaped_padded_sequences,
                }

    def __iter__(self):
        if self.shuffle:
            random.shuffle(self.batches)
        for idx in range(self.batch_len):
            yield self.batches[idx]


In [11]:
import os
import math
import argparse
import random
import numpy
import torch
import torch.nn as nn
from sklearn import metrics
from models import LSTM, BiLSTMAttn, AOA, PWCN_POS

PWCN_DEP = PWCN_DEP_modified


class Instructor:
    def __init__(self, opt):
        self.opt = opt
        # absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim)
        # print(absa_dataset.train_data, opt.dataset, opt.embed_dim, opt.batch_size)
        self.train_data_loader = BucketIterator(absa_dataset.train_data, 128, True)
        self.test_data_loader = BucketIterator(absa_dataset.test_data, 128, False)

        # self.model = opt.model_class(absa_dataset.embedding_matrix, opt).to(opt.device)
        self.model = opt.model_class(absa_dataset.embedding_matrix).to(opt.device)
        self._print_args()
        self.global_f1 = 0.

    def _print_args(self):
        n_trainable_params, n_nontrainable_params = 0, 0
        for p in self.model.parameters():
            n_params = torch.prod(torch.tensor(p.shape)).item()
            if p.requires_grad:
                n_trainable_params += n_params
            else:
                n_nontrainable_params += n_params
        print('>> n_trainable_params: {0}, n_nontrainable_params: {1}'.format(n_trainable_params, n_nontrainable_params))
        print('>> training arguments:')
        for arg in vars(self.opt):
            print('>>> {0}: {1}'.format(arg, getattr(self.opt, arg)))

    def _reset_params(self):
        for p in self.model.parameters():
            if p.requires_grad:
                if len(p.shape) > 1:
                    self.opt.initializer(p)
                else:
                    stdv = 1. / math.sqrt(p.shape[0])
                    torch.nn.init.uniform_(p, a=-stdv, b=stdv)

    def _train(self, criterion, optimizer):
        max_test_acc = 0
        max_f1 = 0
        global_step = 0
        continue_not_increase = 0
        for epoch in range(self.opt.num_epoch):
            print('>' * 100)
            print('epoch: ', epoch)
            n_correct, n_total = 0, 0
            increase_flag = False
            for i_batch, sample_batched in enumerate(self.train_data_loader):
                global_step += 1

                # switch model to training mode, clear gradient accumulators
                self.model.train()
                optimizer.zero_grad()

                inputs = [sample_batched[col].to(self.opt.device) for col in self.opt.inputs_cols]
                targets = sample_batched['polarity'].to(self.opt.device)

                # print(inputs)

                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                # torch.autograd.set_detect_anomaly(True)
                # loss.backward(retain_graph=True)
                loss.backward()
                optimizer.step()

                if global_step % self.opt.log_step == 0:
                    n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                    n_total += len(outputs)
                    train_acc = n_correct / n_total

                    test_acc, f1 = self._evaluate_acc_f1()
                    if test_acc > max_test_acc:
                        max_test_acc = test_acc
                    if f1 > max_f1:
                        increase_flag = True
                        max_f1 = f1
                        if self.opt.save and f1 > self.global_f1:
                            self.global_f1 = f1
                            if not os.path.exists('state_dict'):
                                os.mkdir('state_dict')
                            torch.save(self.model.state_dict(), 'state_dict/'+self.opt.model_name+'_'+self.opt.dataset+'.pkl')
                            print('>> best model saved.')
                    print('loss: {:.4f}, acc: {:.4f}, test_acc: {:.4f}, f1: {:.4f}'.format(loss.item(), train_acc, test_acc, f1))
            if increase_flag == False:
                continue_not_increase += 1
                if continue_not_increase >= 5:
                    print('early stop.')
                    break
            else:
                continue_not_increase = 0
        return max_test_acc, max_f1

    def _evaluate_acc_f1(self):
        # switch model to evaluation mode
        self.model.eval()
        n_test_correct, n_test_total = 0, 0
        t_targets_all, t_outputs_all = None, None
        with torch.no_grad():
            for t_batch, t_sample_batched in enumerate(self.test_data_loader):
                t_inputs = [t_sample_batched[col].to(opt.device) for col in self.opt.inputs_cols]
                t_targets = t_sample_batched['polarity'].to(opt.device)
                t_outputs = self.model(t_inputs)

                n_test_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
                n_test_total += len(t_outputs)

                if t_targets_all is None:
                    t_targets_all = t_targets
                    t_outputs_all = t_outputs
                else:
                    t_targets_all = torch.cat((t_targets_all, t_targets), dim=0)
                    t_outputs_all = torch.cat((t_outputs_all, t_outputs), dim=0)

        test_acc = n_test_correct / n_test_total
        f1 = metrics.f1_score(t_targets_all.cpu(), torch.argmax(t_outputs_all, -1).cpu(), labels=[0, 1, 2], average='macro')
        return test_acc, f1

    def run(self, repeats=3):
        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss()

        max_test_acc_avg = 0
        max_f1_avg = 0
        for i in range(repeats):
            print('repeat: ', i)
            self._reset_params()
            _params = filter(lambda p: p.requires_grad, self.model.parameters())
            optimizer = self.opt.optimizer(_params, lr=self.opt.learning_rate, weight_decay=self.opt.l2reg)
            max_test_acc, max_f1 = self._train(criterion, optimizer)
            print('max_test_acc: {0}     max_f1: {1}'.format(max_test_acc, max_f1))
            max_test_acc_avg += max_test_acc
            max_f1_avg += max_f1
            print('#' * 100)
        print("max_test_acc_avg:", max_test_acc_avg / repeats)
        print("max_f1_avg:", max_f1_avg / repeats)

In [12]:
class Options:
    def __init__(self):
        self.model_name = 'pwcn_dep'
        self.dataset = 'restaurant'
        self.optimizer = 'adam'
        self.initializer = 'xavier_uniform_'
        self.learning_rate = 0.005
        self.dropout = 0
        self.l2reg = 0.00001
        self.num_epoch = 10
        self.batch_size = 128
        self.log_step = 5
        self.embed_dim = 768
        self.hidden_dim = 768
        self.polarities_dim = 3
        self.save = True
        self.seed = 776
        self.device = 'cpu'

opt = Options()
print(type(opt))

model_classes = {
    'lstm': LSTM,
    'bilstmattn': BiLSTMAttn,
    'aoa': AOA,
    'pwcn_pos': PWCN_POS,
    'pwcn_dep': PWCN_DEP_modified,
}
input_colses = {
    'lstm': ['text_indices'],
    'bilstmattn': ['text_indices', 'aspect_indices'],
    'aoa': ['text_indices', 'aspect_indices'],
    'pwcn_pos': ['text_indices', 'aspect_indices', 'left_indices'],
    'pwcn_dep': ['text_indices', 'aspect_indices', 'left_indices', 'dependency_dist', 'bert_embedding'],
}
initializers = {
    'xavier_uniform_': torch.nn.init.xavier_uniform_,
    'xavier_normal_': torch.nn.init.xavier_normal_,
    'orthogonal_': torch.nn.init.orthogonal_,
}
optimizers = {
    'adadelta': torch.optim.Adadelta,  # default lr=1.0
    'adagrad': torch.optim.Adagrad,  # default lr=0.01
    'adam': torch.optim.Adam,  # default lr=0.001
    'adamax': torch.optim.Adamax,  # default lr=0.002
    'asgd': torch.optim.ASGD,  # default lr=0.01
    'rmsprop': torch.optim.RMSprop,  # default lr=0.01
    'sgd': torch.optim.SGD,
}

opt.model_class = model_classes[opt.model_name]
opt.inputs_cols = input_colses[opt.model_name]
opt.initializer = initializers[opt.initializer]
opt.optimizer = optimizers[opt.optimizer]
opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') \
if opt.device is None else torch.device(opt.device)

if opt.seed is not None:
    random.seed(opt.seed)
    numpy.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

ins = Instructor(opt)
ins.run()


<class '__main__.Options'>
>> n_trainable_params: 16533507, n_nontrainable_params: 0
>> training arguments:
>>> model_name: pwcn_dep
>>> dataset: restaurant
>>> optimizer: <class 'torch.optim.adam.Adam'>
>>> initializer: <function xavier_uniform_ at 0x7a5380dced40>
>>> learning_rate: 0.005
>>> dropout: 0
>>> l2reg: 1e-05
>>> num_epoch: 10
>>> batch_size: 128
>>> log_step: 5
>>> embed_dim: 768
>>> hidden_dim: 768
>>> polarities_dim: 3
>>> save: True
>>> seed: 776
>>> device: cpu
>>> model_class: <class '__main__.PWCN_DEP_modified'>
>>> inputs_cols: ['text_indices', 'aspect_indices', 'left_indices', 'dependency_dist', 'bert_embedding']
repeat:  0
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
epoch:  0
>> best model saved.
loss: 2.3927, acc: 0.2344, test_acc: 0.3339, f1: 0.2368


KeyboardInterrupt: ignored