In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import json
from collections import Counter
import collections
import pickle
from termcolor import colored

import os
import time
import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data_utils

from torchtext.vocab import Vocab, Vectors


from sklearn.metrics import accuracy_score, matthews_corrcoef, f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
###############################################################################
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

print(">>>>>>>>>>> CUDA available :", torch.cuda.is_available())
device = 'cuda' if torch.cuda.is_available() else 'cpu'
###############################################################################

>>>>>>>>>>> CUDA available : True


In [51]:
# # GPU 사용 가능 -> True, GPU 사용 불가 -> False
# print(torch.cuda.is_available())
# tensor = torch.rand(3, 3).cuda()
# print(tensor)

## Config 구성

In [3]:
import easydict

args = easydict.EasyDict({
    'data_path' : "/home/hyuns6100/Mental-Heatlh-Care/data/dailydialog_conv35seq_splits.json", # 데이터 경로
    'result_path': "/home/hyuns6100/Mental-Heatlh-Care/Result/",
    'result_text_path': "/home/hyuns6100/Mental-Heatlh-Care/Result/result_text/",
    'best_result_path': "/home/hyuns6100/Mental-Heatlh-Care/Result/best_results.pkl",
    'wv_path': "/home/hyuns6100/Mental-Heatlh-Care/data/", 
    'word_vector':"wiki-news-300d-1M.vec",
    
    'lr': 1e-3,
    'batch_size':32,
    'train_epochs':1000,
    'n_classes': 7,
    'n_train_class': 7, 
	'n_val_class': 7, 
	'n_test_class': 7, 
    'labels': [1, 2, 3, 4, 5, 6],
 
    'cnn_num_filters': 100,
    'cnn_filter_sizes': [3,4,5],
    'context_size': 35,
    'maxtokens': 30,
    'mlp_hidden': [300,300],
    'dropout': 0.1,
    
    'seed':330,
    'patience_metric': 'f1_micro',
    'finetune_ebd': False,
    'patience': 30,
    'save': True,
    'authors':False,
    'convmode': 'seq',
    'embedding': 'cnn',
    'classifier': 'mlp'
    })

In [4]:
def set_seed(seed):
    """
        Setting random seeds
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)

## 데이터 토큰화 (이미 처리됨)


In [5]:
# def creaDailyDialogSeq():
#     	print(colored('CREATING DAILYDIALOG UNIFIED PREPROCESSED FILE FROM data/ijcnlp_dailydialog/  first make sure the per split dailydialog json files are there. Otherwise, please download dailydialog and run the formatting script as follows:', 'yellow'))
# 	print("""cd data/ijcnlp_dailydialog
# 	python3 parser_gg.py -i data/ijcnlp_dailydialog/train -o data/train 
# 	python3 parser_gg.py -i data/ijcnlp_dailydialog/validation -o data/validation
# 	python3 parser_gg.py -i data/ijcnlp_dailydialog/test -o data/test""")
# 	splits = {'train':'data/ijcnlp_dailydialog/train/dailydialog_train.json', 'test':'data/ijcnlp_dailydialog/test/dailydialog_test.json', 'val': 'data/ijcnlp_dailydialog/validation/dailydialog_validation.json'}
# 	splits_emotionflows_fp = { k: os.path.join( os.path.dirname(v), 'dailydialog_{}_emotionflow.json'.format(k) ) for k,v in splits.items() }
# 	txt2l = { 'no emotion': 0, 'anger': 1, 'disgust': 2, 'fear': 3, 'happiness': 4, 'sadness': 5, 'surprise': 6 }
# 	l2txt = { v:k for k, v in txt2l.items() } 
# 	emotionSet = list(set(list(txt2l.keys())))

# 	emotionFlows = {'train':[],'test':[],'val':[]}
# 	def getEmotionFlows(row, split):
# 		row = json.loads(row)
# 		emotionFlow = [ l2txt[r['label']] for r in row ]
# 		emotionFlows[split].append(emotionFlow)

# 	for k,v in tqdm(splits.items()):
# 		for row in tqdm( open(v, 'r').read().split('\n'), desc=colored(v, 'cyan') ) :
# 			getEmotionFlows(row, k)

# 	seq_lengths = np.array([ len(x) for x in emotionFlows['train'] ] )
# 	seq_size = np.percentile( seq_lengths, 90)
# 	# seq_size = max(seq_lengths)
# 	print(colored('seq_size', 'yellow'), seq_size, '75%:', np.percentile(seq_lengths, 75), 'max:', max(seq_lengths))

# 	def tokenize_seq(chat):
# 		tokenizer = TweetTokenizer()
# 		text =  [ ' '.join( list( map(lambda x: x.lower(), tokenizer.tokenize(m)) ) ) for m in chat['text'] ]
# 		chat['text'] = text
# 		return chat

# 	def _trimpad(size, row, pad=True, trim=True):
# 		''' trim and padding (with <pad>) '''
# 		if len(row['texts']) > size:
# 			row['texts'] = row['texts'][int(-size):]
# 			row['labels'] = row['labels'][int(-size):]
# 		else:
# 			row['texts'] = [ ['<pad>' for j in range(5)] for k in range( int(size) - len(row['texts'])  ) ]   +   row['texts']
# 			row['labels'] = [ 0 for k in range( int(size) - len(row['labels'])  ) ]   +   row['labels']
		
# 		assert len(row['texts']) == size
# 		assert len(row['labels']) == size
# 		return row

# 	dataSplits = {'train':[], 'test': [], 'val':[]}
# 	for split in dataSplits.keys():
# 		for i, data in tqdm( enumerate( [ json.loads(line) for line in open(splits[split], 'r').read().split('\n') ]  ), desc=colored('formatting sequence '+split, 'cyan'), total=len(emotionFlows[split]) ):
# 			# data = [ tokenize_seq(chat) for chat in tqdm(data, total=len(data), desc='tokenizing') ]
# 			data = [ tokenize_seq(chat) for chat in data ]
# 			entry = {'texts': [ x['text'] for x in data], 'labels': [ x['label'] for x in data], 'split': split}
# 			entry = _trimpad(seq_size, entry)
# 			dataSplits[split].append(entry)
	
	
# 	labels_train = Counter([ label for line in dataSplits['train'] for label in line['labels'] ])
# 	labels_val = Counter([ label for line in dataSplits['val'] for label in line['labels'] ])
# 	labels_test = Counter([ label for line in dataSplits['test'] for label in line['labels'] ])
# 	print(colored('labels balance', 'yellow'), labels_train, len(labels_train.keys()), labels_val, len(labels_val.keys()), labels_test, len(labels_test.keys()))

# 	records = dataSplits['train'] + dataSplits['val'] + dataSplits['test']
# 	jsonLines = [json.dumps(line) for line  in tqdm(records)]
# 	with open('data/dailydialog_conv{}seq_splits.json'.format(str(int(seq_size))), 'w') as f: f.write('\n'.join(jsonLines))
# 	print( colored('data/dailydialog_conv%sseq_splits.json created!' % (str(int(seq_size))), 'green') )
# 	print(colored('You can now run the labelling tasks.', 'green'))

## DataLoader 구성

In [5]:
def _get_dailydialog_seq_classes():
    '''
        seq consider no emotion label
        @return list of classes associated with each split
    '''
    label_dict = { 
        'no emotion': 0, 
        'anger': 1, 
        'disgust': 2,
        'fear': 3,
        'happiness': 4,
        'sadness': 5,
        'surprise': 6
    }

    train_classes = [0, 1, 2, 3, 4, 5, 6]
    val_classes = [0, 1, 2, 3, 4, 5, 6]
    test_classes = [0, 1, 2, 3, 4, 5, 6]

    return train_classes, val_classes, test_classes

def _load_json_seq(path, args):
    '''
        load data file
        @param path: str, path to the data file
        @return data: list of examples
    '''
    label = {}
    text_len = []
    with open(path, 'r', errors='ignore') as f:
        data = []
        for i, line in enumerate(f):
            row = json.loads(line)

            # count the number of examples per label
            for l in row['labels']:
                if int(l) not in label: label[int(l)] = 1
                else: label[int(l)] += 1

            item = {
                'id': i+1,
                'label': [int(r) for r in row['labels'] ],
                # 'text': [ r[:args['maxtokens']] for r in row['texts'] ]  # 30 # 50 # 80 truncate the text to 500 tokens
                'text': [ r[-args['maxtokens']:] for r in row['texts'] ]  # 30 # 50 # 80 truncate the text to the last tokens
            }

            if args.authors:
                item.update({'authors': [ int(a) for a in row['authors'] ]})

            if 'split' in row: item['split'] = row['split']

            text_len.append(len(row['texts']))

            data.append(item)

        #tprint('Class balance (load_json_seq):')

        print(label)

        print('Avg len: {}'.format(sum(text_len) / (len(text_len))))
        print('Max len: {}'.format(max(text_len)))

        return data
    
def _read_words(data, convmode=None):
    '''
        Count the occurrences of all words
        @param convmode: str, None for non conversational scope, 'naive' for classic or naive approach, 'conv' for conversation depth into account (one additional dim and nested values)
        @param data: list of examples
        @return words: list of words (with duplicates)
    '''
    words = []
    if convmode is None:
        for example in data:
            words += example['text']
    else:
        for example in data:
            for m in example['text']: 
                words += m     
    
    return words

def _meta_split_by_field(all_data, train_classes, val_classes, test_classes, seqmode=False):
    '''
        Split the dataset according to the specified train_classes, val_classes
        and test_classes
        Consider a 'split' field for the different train test val sets

        seqmode is a special mode to ensure sequences of labels to be taken into account

        @param all_data: list of examples (dictionaries)
        @param train_classes: list of int
        @param val_classes: list of int
        @param test_classes: list of int
        @param seqmode: bool 

        @return train_data: list of examples
        @return val_data: list of examples
        @return test_data: list of examples
    '''
    train_data, val_data, test_data = [], [], []

    if seqmode:
        for example in all_data:
            if example['split'] == 'train' and len(set(example['label']) & set(train_classes)) > 0: train_data.append(example)
            if example['split'] == 'val' and len(set(example['label']) & set(val_classes)) > 0: val_data.append(example)
            if example['split'] == 'test' and len(set(example['label']) & set(test_classes)) > 0: test_data.append(example)
    else: 
        for example in all_data:
            if example['split'] == 'train' and example['label'] in train_classes: train_data.append(example)
            if example['split'] == 'val' and example['label'] in val_classes: val_data.append(example)
            if example['split'] == 'test' and example['label'] in test_classes: test_data.append(example)

    return train_data, val_data, test_data

def _del_by_idx(array_list, idx, axis):
    '''
        Delete the specified index for each array in the array_lists

        @params: array_list: list of np arrays
        @params: idx: list of int
        @params: axis: int

        @return: res: tuple of pruned np arrays
    '''
    if type(array_list) is not list:
        array_list = [array_list]

    # modified to perform operations in place
    for i, array in enumerate(array_list):
        array_list[i] = np.delete(array, idx, axis)

    if len(array_list) == 1:
        return array_list[0]
    else:
        return array_list
    
    
def _data_to_nparray(data, vocab, args):
    '''
        Convert the data into a dictionary of np arrays for speed.
    '''
    doc_label = np.array([x['label'] for x in data], dtype=np.int64)

    raw = np.array([e['text'] for e in data], dtype=object)

    # compute the max text length
    text_len = np.array([len(m) for e in data for m in e['text']])
    max_text_len = max(text_len)
    seq_len = np.array(  [len(e['text']) for e in data]  )
    max_seq_len =  max(seq_len)
    ids = np.array([e['id'] for e in data])

    # initialize the big numpy array by <pad>
    text = vocab.stoi['<pad>'] * np.ones([len(data), max_seq_len, max_text_len], dtype=np.int64)
    
    del_idx = []
    # convert each token to its corresponding id
    for i in tqdm(range(len(data)), desc='converting tokens to ids'): # 모든 대화를 돌면서 한 대화 뭉텅이씩 처리
        for idx_x, x in enumerate(data[i]['text']): # 대화 내 모든 문장을 돌면서 한 문장씩 처리
                for idx_message, message in enumerate(x): # 각 문장 내 모든 토큰을 돌면서 각 토큰별 vocab을 이용하여 id로 변환
                        if message in vocab.stoi:
                                text[i, idx_x, idx_message] = vocab.stoi[message]
                        else:
                                text[i, idx_x, idx_message] = vocab.stoi['<unk>']
            # try:
            #     for idx_message, message in enumerate(x):
            #         text[i, idx_x, :len(message)] = [
            #                             vocab.stoi[token] if token in vocab.stoi else vocab.stoi['<unk>'] 
            #                             for token in message
            #                             ]
            # except Exception as e:
            #     print(e)
            #     print(x, idx_x)
            #     exit()

        # filter out document with only unk and pad
        if np.max(text[i]) < 2:
            del_idx.append(i)

    vocab_size = vocab.vectors.size()[0]
    
    print("del_idx: ", del_idx) # 빈 리스트 반환됨
    
    ## Curation for padding (string instead of list of list)
    raw = [ ["<pad>" if m == ["<pad>", "<pad>", "<pad>", "<pad>", "<pad>"] else m for m in c ] for c in raw ]

    if args.authors:
        # trim and pad authors (should have been done in dtaa creation but left here for comparison purposes)
        authors = list()
        for x in data:
            a = len(x['authors'])
            if a < args.context_size: 
                authors.append(x['authors'] + [0 for i in range(18-a)])
            elif a > args.context_size:
                authors.append( x['authors'][int(-args.context_size):] )
            else:
                authors.append(x['authors'])
        authors = np.array(authors, dtype=np.int64)

        ids, text_len, text, doc_label, raw, authors = _del_by_idx(
                [ids, text_len, text, doc_label, raw, authors], del_idx, 0)
        new_data = {
            'ids': ids,
            'text': text,
            'text_len': text_len,
            'label': doc_label,
            'raw': raw,
            'authors': authors,
            'vocab_size': vocab_size,
        }
        
    else: ## authors = False
        #ids, text_len, text, doc_label, raw = _del_by_idx( [ids, text_len, text, doc_label, raw], del_idx, 0)
        new_data = {
            'ids': ids,
            'text': text,
            'text_len': text_len,
            'label': doc_label,
            'raw': raw,
            'vocab_size': vocab_size,
        }
    return new_data
    

In [6]:
class dailydialog_DataLoader:
    def __init__(self, args):
        self.args = args
        
    def load_dataset(self):
        train_classes, val_classes, test_classes = _get_dailydialog_seq_classes()
        assert(len(train_classes) == args.n_train_class)
        assert(len(val_classes) == args.n_val_class)
        assert(len(test_classes) == args.n_test_class)
        
        all_data = _load_json_seq(self.args.data_path, self.args)
        
        # Loading word vector
        path = os.path.join(self.args.wv_path, self.args.word_vector)
        if not os.path.exists(path):
            # Download the word vector and save it locally:
            print('Downloading word vectors')
            import urllib.request
            urllib.request.urlretrieve(
                'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.vec',
                path)
        
        vectors = Vectors(args.word_vector, cache=args.wv_path)
        min_freq = 2
        vocab = Vocab(collections.Counter(_read_words(all_data, convmode=args.convmode)), vectors=vectors,
                  specials=['<pad>', '<unk>'], min_freq=min_freq)
        
         # print word embedding statistics
        wv_size = vocab.vectors.size()
        print('Total num. of words: {}, word vector dimension: {}'.format(
            wv_size[0],
            wv_size[1]))
        
        num_oov = wv_size[0] - torch.nonzero(
            torch.sum(torch.abs(vocab.vectors), dim=1)).size()[0]
        print(('Num. of out-of-vocabulary words'
              '(they are initialized to zeros): {}').format( num_oov))
        
        # Split into meta-train, meta-val, meta-test data (or just splits)
        train_data, val_data, test_data = _meta_split_by_field(all_data, train_classes ,val_classes, test_classes, seqmode=True)
        trainset = Counter([l for d in train_data for l in d['label']])
        valset = Counter([l for d in val_data for l in d['label']])
        testset = Counter([l for d in test_data for l in d['label']])
        print(colored('check sets splits', 'yellow'), trainset, len(list(trainset.keys())), valset, len(list(valset.keys())),  testset, len(list(testset.keys())))
        
        # Convert everything into np array for fast data loading
        train_data = _data_to_nparray(train_data, vocab, args)
        val_data = _data_to_nparray(val_data, vocab, args)
        test_data = _data_to_nparray(test_data, vocab, args)

        train_data['is_train'] = True
        
        #return converted_train_data, converted_val_data, converted_test_data, train_data, val_data, test_data, vocab
        return train_data, val_data, test_data, vocab

In [7]:
set_seed(args.seed)
loader = dailydialog_DataLoader(args)
#converted_train_data, converted_val_data, converted_test_data, train_data, val_data, test_data, vocab = loader.load_dataset()
train_data, val_data, test_data, vocab = loader.load_dataset()

{0: 441723, 4: 12885, 6: 1823, 3: 174, 2: 353, 5: 1150, 1: 1022}
Avg len: 35.0
Max len: 35
Total num. of words: 13967, word vector dimension: 300
Num. of out-of-vocabulary words(they are initialized to zeros): 1947
[33mcheck sets splits[0m Counter({0: 374103, 4: 11182, 6: 1600, 5: 969, 1: 827, 2: 303, 3: 146}) 7 Counter({0: 34039, 4: 684, 6: 107, 5: 79, 1: 77, 3: 11, 2: 3}) 7 Counter({0: 33581, 4: 1019, 1: 118, 6: 116, 5: 102, 2: 47, 3: 17}) 7


converting tokens to ids: 100%|██████████| 11118/11118 [00:01<00:00, 8203.64it/s]


del_idx:  []


converting tokens to ids: 100%|██████████| 1000/1000 [00:00<00:00, 9460.76it/s]


del_idx:  []


converting tokens to ids: 100%|██████████| 1000/1000 [00:00<00:00, 9086.17it/s]

del_idx:  []





In [100]:
# import pickle

# # 파일로 저장
# with open('/home/hyuns6100/Mental-Heatlh-Care/onnx/vocab.pkl', 'wb') as f:
#     pickle.dump(vocab, f)
    
# # 파일 불러오기
# with open('/home/hyuns6100/Mental-Heatlh-Care/onnx/vocab.pkl', 'rb') as f:
#     vocab = pickle.load(f)

In [8]:
from copy import copy

copy_test_data = test_data.copy()

In [59]:
copy_test_data['text'].shape

(1000, 35, 30)

In [60]:
# ## 토큰화 예시 확인
# data = train_data
# text_len = np.array([len(m) for e in data for m in e['text']])
# max_text_len = max(text_len)
# seq_len = np.array(  [len(e['text']) for e in data]  )
# max_seq_len =  max(seq_len)
# ids = np.array([e['id'] for e in data])


In [61]:
# text = vocab.stoi['<pad>'] * np.ones([len(data), max_seq_len, max_text_len], dtype=np.int64)

In [62]:
# for i in tqdm(range(len(data)), desc='converting tokens to ids'): # 모든 대화를 돌면서 한 대화 뭉텅이씩 처리
#         for idx_x, x in enumerate(data[i]['text']): # 대화 내 모든 문장을 돌면서 한 문장씩 처리
#                 for idx_message, message in enumerate(x): # 각 문장 내 모든 토큰을 돌면서 각 토큰별 vocab을 이용하여 id로 변환
#                         print(message)
#                         if message in vocab.stoi:
#                                 text[i, idx_x, idx_message] = vocab.stoi[message]
#                         else:
#                                 text[i, idx_x, idx_message] = vocab.stoi['<unk>']
#                 break

In [63]:
# text.shape # i번째 대화 뭉텅이, i번째 문장, i번째 토큰

In [64]:
# doc_label = np.array([x['label'] for x in data], dtype=np.int64)

# raw = np.array([e['text'] for e in data], dtype=object)

In [65]:
# doc_label.shape

In [66]:
# doc_label[0]

In [67]:
# text

In [68]:
# text_len = np.array([len(m) for e in data for m in e['text']])
# max_text_len = max(text_len)
# seq_len = np.array(  [len(e['text']) for e in data]  )
# max_seq_len =  max(seq_len)
# ids = np.array([e['id'] for e in data])

# # initialize the big numpy array by <pad>
# text = vocab.stoi['<pad>'] * np.ones([len(data), max_seq_len, max_text_len], dtype=np.int64)

# del_idx = []
# # convert each token to its corresponding id
# for i in tqdm(range(len(data)), desc='converting tokens to ids'):
#     for idx_x, x in enumerate(data[i]['text']):
#         for idx_message, message in enumerate(x):
#                 text[i, idx_x, :len(message)] = [
#                                     vocab.stoi[token] if token in vocab.stoi else vocab.stoi['<unk>'] 
#                                     for token in message
#                                     ]

In [9]:
class SupervisedDataset(Dataset):
    def __init__(self, data, args):
        '''
            data : dict_keys(['ids', 'text', 'text_len', 'label', 'raw', 'vocab_size', 'is_train']) 'authors'
        '''
        # self.berttokenizer = berttokenizer
        # if self.berttokenizer:
        #     self.tokenizer = AutoTokenizer.from_pretrained(os.path.join(args.pretrained_bert))
        self.args = args
        self.ids = data['ids']
        self.text = data['text']
        self.text_len = data['text_len']
        self.label = data['label']
        self.raw = data['raw']
        #self.authors = data['authors']
        self.vocab_size = data['vocab_size']
        self.train = False
        if 'is_train' in data:
            self.is_train = data['is_train']
            self.train = True

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        
        item = {
            'ids': self.ids[idx], 
            'text': self.text[idx], 
            'text_len': self.text_len[idx], 
            # 'label': np.expand_dims(self.label[idx],0),  # .expand_dims(x, axis=0) unsqueeze(0) for seq labelling (bert)
            'label': self.label[idx],
            # 'raw': self.raw[idx].tolist(), 
            'vocab_size': self.vocab_size,
            #'authors': self.authors[idx]
        }

       
        if self.train: item.update({'is_train': self.is_train})
        
        return item

In [10]:
train_loader = data_utils.DataLoader(SupervisedDataset(train_data, args), batch_size=args.batch_size, num_workers=2, shuffle=False)
val_loader = data_utils.DataLoader(SupervisedDataset(val_data, args), batch_size=args.batch_size, num_workers=2, shuffle=False)
test_loader = data_utils.DataLoader(SupervisedDataset(test_data, args), batch_size=args.batch_size, num_workers=2, shuffle=False)

## Embedding

In [11]:
class WORDEBD(nn.Module):
    '''
        An embedding layer that maps the token id into its corresponding word
        embeddings. The word embeddings are kept as fixed once initialized.
    '''
    def __init__(self, vocab, finetune_ebd):#, specific_vocab_size=None):
        super(WORDEBD, self).__init__()

        self.vocab_size, self.embedding_dim = vocab.vectors.size()
        # if specific_vocab_size != None: self.vocab_size = specific_vocab_size
        self.embedding_layer = nn.Embedding(
                self.vocab_size, self.embedding_dim)
        self.embedding_layer.weight.data = vocab.vectors

        self.finetune_ebd = finetune_ebd

        if self.finetune_ebd:
            self.embedding_layer.weight.requires_grad = True
        else:
            self.embedding_layer.weight.requires_grad = False

    def forward(self, data, weights=None):
        '''
            @param text: batch_size * max_text_len
            @return output: batch_size * max_text_len * embedding_dim
        '''
        if (weights is None): #or (self.finetune_ebd == False):
            return self.embedding_layer(data['text'])

        else:
            return F.embedding(data['text'],
                               weights['ebd.embedding_layer.weight'])

class CNNseq(nn.Module):
    '''
        An aggregation method that encodes every document through different
        convolution filters (followed by max-over-time pooling).
    '''
    def __init__(self, ebd, args):
        super(CNNseq, self).__init__()
        self.args = args

        self.ebd = ebd # pre-trained FastText로 initialization된 token representation => WORDEBD => nn.Embedding layer로 매핑된 것

        self.input_dim = self.ebd.embedding_dim

        # Convolution
        self.convs = nn.ModuleList([nn.Conv1d(
                    in_channels=self.input_dim,
                    out_channels=args.cnn_num_filters,
                    kernel_size=K) for K in args.cnn_filter_sizes])
        
        self.relu = nn.ReLU()

        self.ebd_dim = args.cnn_num_filters * len(args.cnn_filter_sizes)

    def _conv_max_pool(self, x, conv_filter=None, weights=None):
        '''
        Compute sentence level convolution
        Input:
            x:      batch_size, max_doc_len, embedding_dim
        Output:     batch_size, num_filters_total
        '''
        assert(len(x.size()) == 3) # [batch_size==max_sentences, max_tokens, embedding_dim]

        x = x.permute(0, 2, 1)  # batch_size, embedding_dim, doc_len
        x = x.contiguous()

        # Apply the 1d conv. Resulting dimension is
        # [batch_size, num_filters, doc_len-filter_size+1] * len(filter_size)
        assert(not ((conv_filter is None) and (weights is None)))
        if conv_filter is not None:
            x = [conv(x) for conv in conv_filter]

        # elif weights is not None:
        #     x = [F.conv1d(x, weight=weights['convs.{}.weight'.format(i)],
        #                 bias=weights['convs.{}.bias'.format(i)])
        #         for i in range(len(self.args.cnn_filter_sizes))]

        ## max pool over time. Resulting dimension is
        ## [batch_size, num_filters] * len(filter_size)
        #x = [F.max_pool1d(sub_x, sub_x.size(2)).squeeze(2) for sub_x in x]
        
        ## nn.MaxPool1d로 다시 생성
        max_pooled_outputs = []
        for sub_x in x:
            pool_size = sub_x.size(2)
            max_pool = nn.MaxPool1d(pool_size) # output shape: [batch_size, num_filters, 1]
            pooled = max_pool(sub_x).squeeze(2) # output shape: [batch_size, num_filters]
            max_pooled_outputs.append(pooled)
        
        # concatenate along all filters. Resulting dimension is
        # output: [batch_size, num_filters_total]
        x = torch.cat(max_pooled_outputs, 1) # output shape: [batch_size, num_filters*3]
        #x = torch.cat(x, 1)
        x = self.relu(x) #F.relu(x)
        return x

    def forward(self, data, weights=None):
        '''
            @param data dictionary
                @key text: batch_size * max_text_len
            @param weights placeholder used for maml

            @return output: batch_size * embedding_dim
        '''

        device = data['text'].device
        
        # Apply the word embedding, result:  batch_size, doc_len, embedding_dim
        
        ebd = self.ebd(data, weights) # ouptut: [batch_size, max_sentences, max_tokens, embedding_dim]

        # apply 1d conv + max pool, result:  batch_size, num_filters_total        
        ref = tuple(data['text'].size())
        shape = (ref[0], ref[1], ( len(self.args.cnn_filter_sizes) * self.args.cnn_num_filters))
        output = torch.randn(shape).to(device)
        
        if weights is None:
            for i in range(ebd.size(0)): # 각 배치에 대해
                out = self._conv_max_pool(ebd[i], conv_filter=self.convs) # 각 문장에 대해 처리 => (35, 300)
                output[i] = out
        
        else:
            for i in range(ebd.size(0)):
                out = self._conv_max_pool(ebd[i], weights=weights)
                output[i] = out
        
        return output
    
def get_embedding(vocab, args):
    ebd = WORDEBD(vocab, args.finetune_ebd)
    model = CNNseq(ebd, args)
    model.to(device)
    return model

In [13]:
# # output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7]
# # target = YS.view(-1)  # new shape: [32*35]

# output = torch.rand((32*35, 7))
# target = test_data['label'][:32].view()

# output.shape, target.shape

In [14]:
# for batch in test_loader:
#     YS = batch['label']
#     break

In [53]:
# YS = batch['label']
# target = YS.view(-1)
# target.shape

torch.Size([1120])

In [54]:
# torch.mean((torch.argmax(output, dim=1) == target).float()).item()

0.15267856419086456

In [64]:
# torch.mean((torch.argmax(output, dim=1) == target).float())

tensor(0.1527)

In [66]:
# (torch.argmax(output, dim=1) == target).float().sum() / len(output)

tensor(0.1527)

In [12]:
class distLinear(nn.Module):
    def __init__(self, indim, outdim):
        super(distLinear, self).__init__()
        self.L = nn.Linear(indim, outdim, bias = False)
        # split the weight update component to direction and norm
        # WeightNorm.apply(self.L, 'weight', dim=0)

        # a fixed scale factor to scale the output of cos value
        # into a reasonably large input for softmax
        self.scale_factor = 10

    def forward(self, x):

        x_norm = torch.norm(x, p=2, dim =1).unsqueeze(1).expand_as(x)
        x_normalized = x.div(x_norm + 0.00001)
        # L_norm = torch.norm(self.L.weight.data, p=2, dim=1).unsqueeze(1).expand_as(self.L.weight.data)

        # self.L.weight.data = self.L.weight.data.div(L_norm + 0.00001)

        cos_dist = self.L(x_normalized)  # matrix product by forward function
        scores = self.scale_factor * (cos_dist)

        return scores
    
class BASE(nn.Module):
    '''
        BASE model
    '''
    def __init__(self, args):
        super(BASE, self).__init__()
        self.args = args

        # cached tensor for speed
        # self.I_way = nn.Parameter(torch.eye(self.args.way, dtype=torch.float),
        #                           requires_grad=False)
    
    @staticmethod
    def compute_acc(pred, true, dim=1, nomax=False):
        '''
            Compute the accuracy.
            @param pred: batch_size * num_classes
            @param true: batch_size
        '''
        if nomax: return torch.mean((pred == true).float()).item()
        else: return torch.mean((torch.argmax(pred, dim=dim) == true).float()).item()
        
    @staticmethod
    def compute_f1(y_pred, true, dim=1, nomax=False,  labels=None, average='weighted'):
        '''
            Compute the weighted f1 score.
            @param pred: batch_size * num_classes
            @param true: batch_size
        '''
        if not nomax: _, y_pred = torch.max(y_pred, dim)

        f1 = f1_score(true.cpu().detach().numpy(), y_pred.cpu().detach().numpy(), average=average, labels=labels)

        return f1

    @staticmethod
    def compute_mcc(y_pred, true, dim=1, nomax=False):
        '''
            Compute the matthews correlation coeficient.
            @param pred: batch_size * num_classes
            @param true: batch_size
        '''
        if not nomax: _, y_pred = torch.max(y_pred, dim)

        mcc = matthews_corrcoef(true.cpu().detach().numpy(), y_pred.cpu().detach().numpy())

        return mcc
    
    @staticmethod
    def compute_f1_micro_noneutral(y_pred, true, dim=1, nomax=False, labels=None):
        
        if not nomax: _, y_pred = torch.max(y_pred, dim)

        f1 = f1_score(true.cpu().detach().numpy(), y_pred.cpu().detach().numpy(), average='micro', labels=labels)

        return f1

class MLPseq(BASE):
    def __init__(self, ebd_dim, args, top_layer=None):
        super(MLPseq, self).__init__(args)

        self.args = args
        self.ebd_dim = ebd_dim

        self.mlp = self._init_mlp(ebd_dim, self.args.mlp_hidden, self.args.dropout)
        self.out = self.get_top_layer(self.args, self.args.n_classes)
        #self.top_layer = top_layer
        self.dropout = nn.Dropout(self.args.dropout)

    @staticmethod
    def get_top_layer(args, n_classes):
        '''
            Creates final layer of desired type
            @return final classification layer
        '''
        return nn.Linear(args.mlp_hidden[-1], n_classes)

        
    def _init_mlp(self, in_d, hidden_ds, drop_rate):
        modules = []

        for d in hidden_ds[:-1]:
            modules.extend([
                nn.Dropout(drop_rate),
                nn.Linear(in_d, d),
                nn.ReLU()])
            in_d = d

        modules.extend([
            nn.Dropout(drop_rate),
            nn.Linear(in_d, hidden_ds[-1])])

        return nn.Sequential(*modules)
    
    def forward(self, XS, YS=None, XQ=None, YQ=None, weights=None, return_preds=False):
        '''
            if y is specified, return loss and accuracy
            otherwise, return the transformed x

            @param: XS: batch_size * input_dim
            @param: YS: batch_size (optional)

            @return: XS: batch_size * output_dim
        '''

        # normal training procedure, train stage only use query
        # if weights is None:
        #     XS = self.mlp(XS)
        # else:
        #     # find weight and bias keys for the mlp module
        #     w_keys, b_keys = [], []
        #     for key in weights.keys():
        #         if key[:4] == 'mlp.':
        #             if key[-6:] == 'weight':
        #                 w_keys.append(key)
        #             else:
        #                 b_keys.append(key)

        #     for i in range(len(w_keys)-1):
        #         #XS = F.dropout(XS, self.args.dropout, training=self.training)
        #         XS = self.dropout(XS)
        #         XS = F.linear(XS, weights[w_keys[i]], weights[b_keys[i]])
        #         XS = F.relu(XS)

        #     XS = F.dropout(XS, self.args.dropout, training=self.training)
        #     XS = F.linear(XS, weights[w_keys[-1]], weights[b_keys[-1]])

        XS = self.mlp(XS)
        XS = self.out(XS) # output: [batch, max_sentence, n_class]
        
        # if self.top_layer is not None:
        #     XS = self.top_layer(XS)

        # # normal training procedure, compute loss/acc
        # if YS is not None:
        #     # if self.args.taskmode == 'episodic':
        #     #     ## useful for episodes, ignored for full supervised
        #     #     _, YS = torch.unique(YS, sorted=True, return_inverse=True)
        #     loss = F.cross_entropy(XS, YS)
        #     acc = BASE.compute_acc(XS, YS)
        #     f1 = BASE.compute_f1(XS, YS)
        #     mcc = BASE.compute_mcc(XS, YS)

        #     if return_preds:
        #         _, y_pred = torch.max(XS, dim=1)
        #         return acc, loss, f1, mcc, y_pred, YS
        #     else:
        #         return acc, loss, f1, mcc

        # else:
        #     return XS
        
        return XS

def get_classifier(emb_dim, args):
    model = MLPseq(emb_dim, args)
    model.to(device)
    return model

In [13]:
model = {}
model["ebd"] = get_embedding(vocab, args)
model['ebd'].train()
model["clf"] = get_classifier(model["ebd"].ebd_dim, args)

In [14]:
model["ebd"], model["clf"]

(CNNseq(
   (ebd): WORDEBD(
     (embedding_layer): Embedding(13967, 300)
   )
   (convs): ModuleList(
     (0): Conv1d(300, 100, kernel_size=(3,), stride=(1,))
     (1): Conv1d(300, 100, kernel_size=(4,), stride=(1,))
     (2): Conv1d(300, 100, kernel_size=(5,), stride=(1,))
   )
   (relu): ReLU()
 ),
 MLPseq(
   (mlp): Sequential(
     (0): Dropout(p=0.1, inplace=False)
     (1): Linear(in_features=300, out_features=300, bias=True)
     (2): ReLU()
     (3): Dropout(p=0.1, inplace=False)
     (4): Linear(in_features=300, out_features=300, bias=True)
   )
   (out): Linear(in_features=300, out_features=7, bias=True)
   (dropout): Dropout(p=0.1, inplace=False)
 ))

## Train

In [15]:
import itertools

def grad_param(model, keys):
    '''
        Return a generator that generates learnable parameters in
        model[key] for key in keys.
    '''
    if len(keys) == 1:
        return filter(lambda p: p.requires_grad,
                model[keys[0]].parameters())
    else:
        return filter(lambda p: p.requires_grad,
                itertools.chain.from_iterable(
                    model[key].parameters() for key in keys))

def get_norm(model):
    '''
        Compute norm of the gradients
    '''
    total_norm = 0

    for p in model.parameters():
        if p.grad is not None:
            p_norm = p.grad.data.norm()
            total_norm += p_norm.item() ** 2

    total_norm = total_norm ** 0.5

    return total_norm

In [16]:
def train(train_data, val_data, model, args, loader=None):
    '''
        Train the model
        Use val_data to do early stopping
    '''

    
    out_dir = args.result_path

    
    best_acc = 0
    best_score = 0
    sub_cycle = 0
    best_path = None

    # opt = torch.optim.Adam(grad_param(model, ['ebd', 'clf']), lr=args.lr)
    opt = torch.optim.Adam(grad_param(model, ['ebd', 'clf']), lr=args.lr, betas=(0.9, 0.98), eps=pow(10, -9)) # CESTa optimizer parameters


    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, 'max', patience=2, factor=0.1, verbose=True) # args.patience//2

    print("{}, Start training supervised".format(datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S')), flush=True)

    for ep in range(args.train_epochs):

        grad = {'clf': [], 'ebd': []}
        
        # train on training set
        for batch in tqdm(loader, ncols=80, leave=False, desc=colored('Training on train', 'yellow')):
            train_one(batch, model, opt, args, grad)

        # Evaluate validation accuracy
        # cur_acc, cur_std, cur_f1, cur_f1_std, cur_mcc, cur_mcc_std, cur_f1_micro, cur_f1_micro_std = test(val_data, model, args, False)
        cur_acc, cur_std, cur_f1, cur_f1_std, cur_mcc, cur_mcc_std, cur_f1_micro, cur_f1_micro_std = test(val_data, model, args, False, loader=val_loader)
        print(cur_acc, cur_std, cur_f1, cur_f1_std, cur_mcc, cur_mcc_std)
        
        print(("{}, {:s} {:2d}, {:s} {:s}{:>7.4f} ± {:>6.4f}, {:s}{:>7.4f} ± {:>6.4f}, {:s}{:>7.4f} ± {:>6.4f}, {:s}{:>7.4f} ± {:>6.4f},"
               "{:s} {:s}{:>7.4f}, {:s}{:>7.4f}").format(
               datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S'),
               "ep", ep,
               colored("val  ", "cyan"),
               colored("acc:", "blue"), cur_acc, cur_std,
               colored("f1:", "blue"), cur_f1, cur_f1_std,
               colored("mcc:", "blue"), cur_mcc, cur_mcc_std,
               colored("f1 micro:", "blue"), cur_f1_micro, cur_f1_micro_std,
               colored("train stats", "cyan"),
               colored("ebd_grad:", "blue"), np.mean(np.array(grad['ebd'])),
               colored("clf_grad:", "blue"), np.mean(np.array(grad['clf'])),
               ), flush=True)
        scores = {'acc':cur_acc, 'f1': cur_f1, 'mcc': cur_mcc, 'f1_micro': cur_f1_micro}

        # Update the current best model if val acc is better
        # if cur_acc > best_acc:
        #     best_acc = cur_acc
        if scores[args.patience_metric] > best_score:
            best_score = scores[args.patience_metric]
            best_path = os.path.join(out_dir, str(ep))

            print( colored( "{}, Attempt to save cur best model to {}".format(
                datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S'),
                best_path) , 'magenta' ))

            while True:
                try:
                    torch.save(model['ebd'].state_dict(), best_path + '.ebd')
                    torch.save(model['clf'].state_dict(), best_path + '.clf')
                    break
                except (FileNotFoundError):
                    continue
            
            # save current model
            print( colored( "{}, Saved cur best model to {}".format(
                datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S'),
                best_path) , 'magenta' ))

            sub_cycle = 0
        else:
            sub_cycle += 1

        #if args.scheduler: scheduler.step(cur_acc)

        # Break if the val acc hasn't improved in the past patience epochs
        if sub_cycle == args.patience:
            break

    print("{}, End of training. Restore the best weights".format(
            datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S')),
            flush=True)

    # restore the best saved model
    while True:
        try:
            model['ebd'].load_state_dict(torch.load(best_path + '.ebd'))
            model['clf'].load_state_dict(torch.load(best_path + '.clf'))
            break
        except (FileNotFoundError):
            continue
    

    if args.save:
        # save the current model
        # out_dir = os.path.abspath(os.path.join(
        #                               os.path.curdir,
        #                               "saved-runs",
        #                               str(int(time.time() * 1e7))))
        out_dir = args.result_text_path
        
        # if args.result_path != '':
        #     dir_path = os.path.split(args.result_path)[0]
        #     out_dir = os.path.abspath(os.path.join(
        #                               os.path.curdir, dir_path) )

        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        best_path = os.path.join(out_dir, 'best')

        print(colored("{}, Save best model to {}".format(
            datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S'),
            best_path), "green"), flush=True)

        torch.save(model['ebd'].state_dict(), best_path + '.ebd')
        torch.save(model['clf'].state_dict(), best_path + '.clf')

        with open(best_path + '_args.txt', 'w') as f:
            for attr, value in sorted(args.__dict__.items()):
                f.write("{}={}\n".format(attr, value))

    return


def train_one(batch, model, opt, args, grad):
    '''
        Train the model on one sampled task.
    '''
    model['ebd'].train()
    model['clf'].train()
    opt.zero_grad()
    
    batch['text'] = batch['text'].to(device)
    batch['label'] = batch['label'].to(device)

    XS = model['ebd'](batch)
    YS = batch['label']

    # if args.classifier == "cesta":
    #     acc, loss, f1, mcc, f1_micro = model['clf'](XS, YS, None, None, authors=batch['authors'])
    # else:
    # Apply the classifier (need to be MLP classifier)
    #acc, loss, f1, mcc = model['clf'](XS, YS, None, None)

    out_XS = model['clf'](XS, None, None, None)
    
    output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7]

    # Flatten the target
    target = YS.view(-1)  # new shape: [32*35]

    loss = F.cross_entropy(output, target)
    # acc = BASE.compute_acc(out_XS, YS)
    # f1 = BASE.compute_f1(out_XS, YS)
    # mcc = BASE.compute_mcc(out_XS, YS)
    
    if loss is not None:
        loss.backward()

    if torch.isnan(loss):
        return

    # if args.clip_grad is not None:
    #     nn.utils.clip_grad_value_(grad_param(model, ['ebd', 'clf']), args.clip_grad)
    #     # nn.utils.clip_grad_norm_(grad_param(model, ['ebd', 'clf']), args.clip_grad) #0.5

    grad['clf'].append(get_norm(model['clf']))
    grad['ebd'].append(get_norm(model['ebd']))

    opt.step()


def test(test_data, model, args, verbose=True, target='val', loader=None):
    '''
        Evaluate the model on a bag of sampled tasks. Return the mean accuracy, 
        the weighted f1 score and the matthew correlation coeficient and their
        associated std. (ensure the model used is modified to return the values)
    '''
    model['ebd'].eval()
    model['clf'].eval()

    acc, f1, mcc, f1_micro, trues, preds = [], [], [], [], [], []
    # if loader is None:
    #     loader = DataLoader(SupervisedDataset(test_data, args), batch_size=args.batch_size, num_workers=2, shuffle=False)

    for batch in tqdm(loader, desc=colored('Testing regular on %s' % (target), 'yellow'), total=loader.__len__()):
        #res_acc, res_f1, res_mcc, res_f1_micro, res_pred, res_true = test_one(batch, model, args, out=(target=='test'))
        res_acc, res_f1, res_mcc, res_f1_micro = test_one(batch, model, args, out=(target=='test'))
        acc.append(res_acc)
        f1.append(res_f1)
        mcc.append(res_mcc)
        f1_micro.append(res_f1_micro)
        # trues.extend(res_true.cpu().detach().tolist())
        # preds.extend(res_pred.cpu().detach().tolist())

    acc, f1, mcc, f1_micro = np.array(acc), np.array(f1), np.array(mcc), np.array(f1_micro)

    # if target == 'test' and args.dataset == 'ouitchat_seq':
    #     target_names = ['no emotion', 'anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
    #     labels = [0, 1, 2, 3, 4, 5, 6]
    #     print(confusion_matrix(np.array(trues), np.array(preds), labels=labels))
    #     print(classification_report(np.array(trues), np.array(preds), labels=labels, target_names=target_names ) )

    if verbose:
        print("{}, {:s} {:>7.4f} ({:s} {:>7.4f}), {:s} {:>7.4f} ({:s} {:>7.4f}), {:s} {:>7.4f} ({:s} {:>7.4f}), {:s} {:>7.4f} ({:s} {:>7.4f})".format(
                datetime.datetime.now().strftime('%02y/%02m/%02d %H:%M:%S'),
                colored("acc mean", "blue"),
                np.mean(acc),
                colored("std", "blue"),
                np.std(acc),
                colored("f1 mean", "blue"),
                np.mean(f1),
                colored("std", "blue"),
                np.std(f1),
                colored("mcc mean", "blue"),
                np.mean(mcc),
                colored("std", "blue"),
                np.std(mcc),
                colored("f1 micro mean", "blue"),
                np.mean(f1_micro),
                colored("std", "blue"),
                np.std(f1_micro),
                ), flush=True)

        # latex table
        print("{:s} & {:s} & {:>7.4f} \\tiny $\\pm {:>7.4f}$ & {:>7.4f} \\tiny $\\pm {:>7.4f}$ & {:>7.4f} \\tiny $\\pm {:>7.4f}$ & {:>7.4f} \\tiny $\\pm {:>7.4f}$".format(
                args.embedding.replace('_', '\\_'),
                args.classifier.replace('_', '\\_'),
                np.mean(acc),
                np.std(acc),
                np.mean(f1),
                np.std(f1),
                np.mean(mcc),
                np.std(mcc),
                np.mean(f1_micro),
                np.std(f1_micro),
                ), flush=True)
    # if args.classifier == 'cesta':
    #     return np.mean(acc), np.std(acc), np.mean(f1), np.std(f1), np.mean(mcc), np.std(mcc), np.mean(f1_micro), np.std(f1_micro)
    return np.mean(acc), np.std(acc), np.mean(f1), np.std(f1), np.mean(mcc), np.std(mcc), np.mean(f1_micro), np.std(f1_micro)


def test_one(batch, model, args, out=False):
    '''
        Evaluate the model on one sampled task. Return the accuracy.
    '''

    batch['text'] = batch['text'].to(device)
    batch['label'] = batch['label'].to(device)

    # Embedding the document
    XS = model['ebd'](batch)
    YS = batch['label']

    # # Apply the classifier
    # if args.dump and out:
    #     acc, loss, f1, mcc = model['clf'](XS, YS=YS, out=out, XS_ids=batch['ids'])
    # elif out and args.classifier != 'cesta':
    #     acc, loss, f1, mcc, y_pred, y_true = model['clf'](XS, YS=YS, return_preds=True)
    #     return acc, f1, mcc, y_pred, y_true
    # else:
    #     if args.classifier == 'cesta': 
    #         acc, loss, f1, mcc, f1_micro, y_pred, y_true = model['clf'](XS, YS=YS, authors=batch['authors'], return_preds=True)
    #         return acc, f1, mcc, f1_micro, y_pred, y_true
    #     else: acc, loss, f1, mcc = model['clf'](XS, YS=YS)

    out_XS = model['clf'](XS, YS=None)

    output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7]
    target = YS.view(-1)  # new shape: [32*35]

    #loss = F.cross_entropy(output, YS)
    acc = BASE.compute_acc(output, target)
    f1 = BASE.compute_f1(output, target)
    mcc = BASE.compute_mcc(output, target)
    micro_f1_noneutral = BASE.compute_f1_micro_noneutral(output, target, labels=args['labels'])
    
    return acc, f1, mcc, micro_f1_noneutral
    
    # if out : #and args.classifier != 'cesta':
    #     acc, loss, f1, mcc, y_pred, y_true = model['clf'](XS, YS=YS, return_preds=True)
    #     return acc, f1, mcc, y_pred, y_true
    # else:
    #     # if args.classifier == 'cesta': 
    #     #     acc, loss, f1, mcc, f1_micro, y_pred, y_true = model['clf'](XS, YS=YS, authors=batch['authors'], return_preds=True)
    #     #     return acc, f1, mcc, f1_micro, y_pred, y_true
    #     #else: acc, loss, f1, mcc = model['clf'](XS, YS=YS)
    #     acc, loss, f1, mcc = model['clf'](XS, YS=YS)

    # return acc, f1, mcc


In [24]:
# ## 예시
# model['ebd'].train()
# model['clf'].train()

# batch_ = train_loader.dataset[0:2]
# batch_['text'] = torch.tensor(batch_['text']).to(device)
# batch_['label'] = torch.tensor(batch_['label']).to(device)

# batch_

In [25]:
# wordebd = WORDEBD(vocab, args.finetune_ebd).to(device)
# batch_['text'] = batch_['text'].to(device)
# batch_['label'] = batch_['label'].to(device)
# ebd_xs_ = wordebd(batch_)
# ebd_xs_.shape

In [26]:
# cnnebd = CNNseq(wordebd, args).to(device)
# batch_['text'] = batch_['text'].to(device)
# batch_['label'] = batch_['label'].to(device)
# cnn_xs_ = cnnebd(batch_)
# cnn_xs_.shape

In [27]:
# xs_ = model['ebd'](batch_)
# ys_ = batch_['label']

In [28]:
# xs_.size(), ys_.size()

In [29]:
# out_xs = model['clf'](xs_, None, None, None)
# out_xs.shape

In [30]:
# output = out_xs.view(-1, 7)  # new shape: [32*35, 7]

# # Flatten the target
# target = ys_.view(-1)  # new shape: [32*35]

# # Compute the loss
# loss = F.cross_entropy(output, target)

# loss

In [17]:
train(train_data, val_data, model, args, loader=train_loader)

23/12/06 18:25:51, Start training supervised


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.30it/s]8.21it/s][0m

0.9769531320780516 0.0053124940998657 0.9729722728874199 0.007060956560666433 0.4852064679405341 0.09166620261637358
23/12/06 18:26:16, ep  0, [36mval  [0m [34macc:[0m 0.9770 ± 0.0053, [34mf1:[0m 0.9730 ± 0.0071, [34mmcc:[0m 0.4852 ± 0.0917, [34mf1 micro:[0m 0.4680 ± 0.1017,[36mtrain stats[0m [34mebd_grad:[0m 0.0900, [34mclf_grad:[0m 0.1744





[35m23/12/06 18:26:16, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/0[0m
[35m23/12/06 18:26:16, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/0[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.79it/s]6.96it/s][0m

0.9776506740599871 0.005170125110784994 0.9737174860078077 0.0067637961989397526 0.493303103038907 0.10839100813585993
23/12/06 18:26:39, ep  1, [36mval  [0m [34macc:[0m 0.9777 ± 0.0052, [34mf1:[0m 0.9737 ± 0.0068, [34mmcc:[0m 0.4933 ± 0.1084, [34mf1 micro:[0m 0.4697 ± 0.1165,[36mtrain stats[0m [34mebd_grad:[0m 0.0755, [34mclf_grad:[0m 0.1037





[35m23/12/06 18:26:39, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/1[0m
[35m23/12/06 18:26:39, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/1[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.43it/s]7.04it/s][0m

0.977706478908658 0.005317403492100663 0.9733068175284254 0.007062193021633683 0.4846569071239556 0.1006669404375958
23/12/06 18:27:02, ep  2, [36mval  [0m [34macc:[0m 0.9777 ± 0.0053, [34mf1:[0m 0.9733 ± 0.0071, [34mmcc:[0m 0.4847 ± 0.1007, [34mf1 micro:[0m 0.4583 ± 0.1078,[36mtrain stats[0m [34mebd_grad:[0m 0.0886, [34mclf_grad:[0m 0.1004



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.03it/s]6.82it/s][0m

0.9782645106315613 0.004939091942789327 0.9744043597971612 0.006449420981850155 0.505822348437678 0.0930139863458389
23/12/06 18:27:24, ep  3, [36mval  [0m [34macc:[0m 0.9783 ± 0.0049, [34mf1:[0m 0.9744 ± 0.0064, [34mmcc:[0m 0.5058 ± 0.0930, [34mf1 micro:[0m 0.4753 ± 0.1018,[36mtrain stats[0m [34mebd_grad:[0m 0.1023, [34mclf_grad:[0m 0.0997





[35m23/12/06 18:27:24, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/3[0m
[35m23/12/06 18:27:24, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/3[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.38it/s]7.42it/s][0m

0.9773995522409678 0.005377743159541637 0.9737095186650961 0.006792284979471801 0.48471915087422685 0.08984527676240218
23/12/06 18:27:46, ep  4, [36mval  [0m [34macc:[0m 0.9774 ± 0.0054, [34mf1:[0m 0.9737 ± 0.0068, [34mmcc:[0m 0.4847 ± 0.0898, [34mf1 micro:[0m 0.4461 ± 0.1002,[36mtrain stats[0m [34mebd_grad:[0m 0.1149, [34mclf_grad:[0m 0.0998



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.87it/s]6.88it/s][0m

0.9775669667869806 0.005161757301840296 0.9748568012757683 0.00634802017282843 0.507801296949032 0.09642820857799296
23/12/06 18:28:09, ep  5, [36mval  [0m [34macc:[0m 0.9776 ± 0.0052, [34mf1:[0m 0.9749 ± 0.0063, [34mmcc:[0m 0.5078 ± 0.0964, [34mf1 micro:[0m 0.4784 ± 0.1038,[36mtrain stats[0m [34mebd_grad:[0m 0.1279, [34mclf_grad:[0m 0.0997





[35m23/12/06 18:28:09, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/5[0m
[35m23/12/06 18:28:09, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/5[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.71it/s]7.64it/s][0m

0.9773995634168386 0.006570383200879331 0.9729551166226778 0.00860186803512064 0.47519437354974675 0.09075064762394232
23/12/06 18:28:30, ep  6, [36mval  [0m [34macc:[0m 0.9774 ± 0.0066, [34mf1:[0m 0.9730 ± 0.0086, [34mmcc:[0m 0.4752 ± 0.0908, [34mf1 micro:[0m 0.4325 ± 0.1013,[36mtrain stats[0m [34mebd_grad:[0m 0.1415, [34mclf_grad:[0m 0.1017



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.35it/s]6.69it/s][0m

0.9779296945780516 0.0063966916839733835 0.973965467784162 0.008228119696992244 0.49336349394641543 0.09798029397413488
23/12/06 18:28:54, ep  7, [36mval  [0m [34macc:[0m 0.9779 ± 0.0064, [34mf1:[0m 0.9740 ± 0.0082, [34mmcc:[0m 0.4934 ± 0.0980, [34mf1 micro:[0m 0.4570 ± 0.1094,[36mtrain stats[0m [34mebd_grad:[0m 0.1508, [34mclf_grad:[0m 0.1066



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.00it/s]7.85it/s][0m

0.977399555966258 0.0062557353879579515 0.973985575855568 0.007686374240762557 0.49506861100825234 0.10291729203907361
23/12/06 18:29:16, ep  8, [36mval  [0m [34macc:[0m 0.9774 ± 0.0063, [34mf1:[0m 0.9740 ± 0.0077, [34mmcc:[0m 0.4951 ± 0.1029, [34mf1 micro:[0m 0.4688 ± 0.1129,[36mtrain stats[0m [34mebd_grad:[0m 0.1572, [34mclf_grad:[0m 0.1089



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 16.48it/s]6.99it/s][0m

0.9776506815105677 0.006701956699265997 0.9739437503646753 0.00841793817946136 0.4956505994889814 0.10624296712866835
23/12/06 18:29:39, ep  9, [36mval  [0m [34macc:[0m 0.9777 ± 0.0067, [34mf1:[0m 0.9739 ± 0.0084, [34mmcc:[0m 0.4957 ± 0.1062, [34mf1 micro:[0m 0.4662 ± 0.1146,[36mtrain stats[0m [34mebd_grad:[0m 0.1638, [34mclf_grad:[0m 0.1128



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.27it/s]7.04it/s][0m

0.9776227753609419 0.006092315641565041 0.9747218957292946 0.007701162929675861 0.5105214415346788 0.09506578666910709
23/12/06 18:30:02, ep 10, [36mval  [0m [34macc:[0m 0.9776 ± 0.0061, [34mf1:[0m 0.9747 ± 0.0077, [34mmcc:[0m 0.5105 ± 0.0951, [34mf1 micro:[0m 0.4848 ± 0.1039,[36mtrain stats[0m [34mebd_grad:[0m 0.1623, [34mclf_grad:[0m 0.1081





[35m23/12/06 18:30:02, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/10[0m
[35m23/12/06 18:30:02, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/10[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.54it/s]7.02it/s][0m

0.9772042408585548 0.0065382543585169916 0.9743336596223788 0.008305386632777427 0.5031150722330291 0.09502359162897324
23/12/06 18:30:25, ep 11, [36mval  [0m [34macc:[0m 0.9772 ± 0.0065, [34mf1:[0m 0.9743 ± 0.0083, [34mmcc:[0m 0.5031 ± 0.0950, [34mf1 micro:[0m 0.4743 ± 0.1055,[36mtrain stats[0m [34mebd_grad:[0m 0.1617, [34mclf_grad:[0m 0.1091



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.93it/s]7.07it/s][0m

0.9761718772351742 0.006608192894195396 0.9747384425411063 0.007666926452982785 0.5135870944008513 0.1010375938840236
23/12/06 18:30:47, ep 12, [36mval  [0m [34macc:[0m 0.9762 ± 0.0066, [34mf1:[0m 0.9747 ± 0.0077, [34mmcc:[0m 0.5136 ± 0.1010, [34mf1 micro:[0m 0.4975 ± 0.1023,[36mtrain stats[0m [34mebd_grad:[0m 0.1591, [34mclf_grad:[0m 0.1018





[35m23/12/06 18:30:47, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/12[0m
[35m23/12/06 18:30:47, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/12[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.25it/s]7.18it/s][0m

0.9763671923428774 0.0067814008392737294 0.9739937521318889 0.008018936561508087 0.49359904017387596 0.09163766003382175
23/12/06 18:31:11, ep 13, [36mval  [0m [34macc:[0m 0.9764 ± 0.0068, [34mf1:[0m 0.9740 ± 0.0080, [34mmcc:[0m 0.4936 ± 0.0916, [34mf1 micro:[0m 0.4718 ± 0.0948,[36mtrain stats[0m [34mebd_grad:[0m 0.1697, [34mclf_grad:[0m 0.1128



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.76it/s]5.76it/s][0m

0.9756975509226322 0.006192879530811608 0.9743980286287799 0.007169164923078567 0.509153506855511 0.09152496732606163
23/12/06 18:31:34, ep 14, [36mval  [0m [34macc:[0m 0.9757 ± 0.0062, [34mf1:[0m 0.9744 ± 0.0072, [34mmcc:[0m 0.5092 ± 0.0915, [34mf1 micro:[0m 0.4893 ± 0.0965,[36mtrain stats[0m [34mebd_grad:[0m 0.1688, [34mclf_grad:[0m 0.1219



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 17.29it/s]9.47it/s][0m

0.9756696466356516 0.00617380015822628 0.9745722945758711 0.007007133334605766 0.5119974716549991 0.07685955024565447
23/12/06 18:31:57, ep 15, [36mval  [0m [34macc:[0m 0.9757 ± 0.0062, [34mf1:[0m 0.9746 ± 0.0070, [34mmcc:[0m 0.5120 ± 0.0769, [34mf1 micro:[0m 0.4871 ± 0.0803,[36mtrain stats[0m [34mebd_grad:[0m 0.1690, [34mclf_grad:[0m 0.1313



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.29it/s]7.80it/s][0m

0.9756975453346968 0.006429712715748347 0.9753374108104597 0.006911502438781174 0.5442374246225116 0.09108255810328089
23/12/06 18:32:20, ep 16, [36mval  [0m [34macc:[0m 0.9757 ± 0.0064, [34mf1:[0m 0.9753 ± 0.0069, [34mmcc:[0m 0.5442 ± 0.0911, [34mf1 micro:[0m 0.5256 ± 0.0994,[36mtrain stats[0m [34mebd_grad:[0m 0.1606, [34mclf_grad:[0m 0.1288





[35m23/12/06 18:32:20, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/16[0m
[35m23/12/06 18:32:20, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/16[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.29it/s]6.18it/s][0m

0.9741908553987741 0.0057620361352376205 0.9743206682907524 0.0060139958865119405 0.529816524306306 0.08254728785832569
23/12/06 18:32:44, ep 17, [36mval  [0m [34macc:[0m 0.9742 ± 0.0058, [34mf1:[0m 0.9743 ± 0.0060, [34mmcc:[0m 0.5298 ± 0.0825, [34mf1 micro:[0m 0.5095 ± 0.0905,[36mtrain stats[0m [34mebd_grad:[0m 0.1551, [34mclf_grad:[0m 0.1211



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.19it/s]6.65it/s][0m

0.9731026794761419 0.006646955252707948 0.973880122580267 0.006753947151760082 0.5344976156313386 0.0919047234635493
23/12/06 18:33:07, ep 18, [36mval  [0m [34macc:[0m 0.9731 ± 0.0066, [34mf1:[0m 0.9739 ± 0.0068, [34mmcc:[0m 0.5345 ± 0.0919, [34mf1 micro:[0m 0.5142 ± 0.1045,[36mtrain stats[0m [34mebd_grad:[0m 0.1470, [34mclf_grad:[0m 0.1094



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.77it/s]8.73it/s][0m

0.9748046901077032 0.005438989355992289 0.9753136068375543 0.005752718823685731 0.5556165717241273 0.09656801254374908
23/12/06 18:33:30, ep 19, [36mval  [0m [34macc:[0m 0.9748 ± 0.0054, [34mf1:[0m 0.9753 ± 0.0058, [34mmcc:[0m 0.5556 ± 0.0966, [34mf1 micro:[0m 0.5389 ± 0.1058,[36mtrain stats[0m [34mebd_grad:[0m 0.1455, [34mclf_grad:[0m 0.1041





[35m23/12/06 18:33:30, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/19[0m
[35m23/12/06 18:33:30, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/19[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.63it/s]7.08it/s][0m

0.9750837124884129 0.006978293767178298 0.9751199864363483 0.007442924845762097 0.555868423194328 0.11091076598598433
23/12/06 18:33:53, ep 20, [36mval  [0m [34macc:[0m 0.9751 ± 0.0070, [34mf1:[0m 0.9751 ± 0.0074, [34mmcc:[0m 0.5559 ± 0.1109, [34mf1 micro:[0m 0.5410 ± 0.1199,[36mtrain stats[0m [34mebd_grad:[0m 0.1443, [34mclf_grad:[0m 0.1027





[35m23/12/06 18:33:53, Attempt to save cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/20[0m
[35m23/12/06 18:33:53, Saved cur best model to /home/hyuns6100/Mental-Heatlh-Care/Result/20[0m


[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.56it/s]6.91it/s][0m

0.9746093768626451 0.005508188097351017 0.9748158258218429 0.0060825738994552585 0.5430431229934503 0.07870469978764531
23/12/06 18:34:16, ep 21, [36mval  [0m [34macc:[0m 0.9746 ± 0.0055, [34mf1:[0m 0.9748 ± 0.0061, [34mmcc:[0m 0.5430 ± 0.0787, [34mf1 micro:[0m 0.5235 ± 0.0891,[36mtrain stats[0m [34mebd_grad:[0m 0.1444, [34mclf_grad:[0m 0.0978



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.01it/s]7.80it/s][0m

0.9762276802212 0.005257396522818682 0.9750673467051068 0.00639069284578478 0.5301149414028219 0.09922262407395413
23/12/06 18:34:40, ep 22, [36mval  [0m [34macc:[0m 0.9762 ± 0.0053, [34mf1:[0m 0.9751 ± 0.0064, [34mmcc:[0m 0.5301 ± 0.0992, [34mf1 micro:[0m 0.5157 ± 0.1068,[36mtrain stats[0m [34mebd_grad:[0m 0.1407, [34mclf_grad:[0m 0.0953



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.09it/s]4.95it/s][0m

0.9746930859982967 0.005029069433947089 0.9746497885036904 0.0056912694867659215 0.5390246045879209 0.10073673890021506
23/12/06 18:35:03, ep 23, [36mval  [0m [34macc:[0m 0.9747 ± 0.0050, [34mf1:[0m 0.9746 ± 0.0057, [34mmcc:[0m 0.5390 ± 0.1007, [34mf1 micro:[0m 0.5246 ± 0.1093,[36mtrain stats[0m [34mebd_grad:[0m 0.1433, [34mclf_grad:[0m 0.1000



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.06it/s]4.81it/s][0m

0.9760323688387871 0.0063532236174185875 0.9751145114505748 0.007203486936075662 0.5373399247378805 0.10007625878147375
23/12/06 18:35:27, ep 24, [36mval  [0m [34macc:[0m 0.9760 ± 0.0064, [34mf1:[0m 0.9751 ± 0.0072, [34mmcc:[0m 0.5373 ± 0.1001, [34mf1 micro:[0m 0.5188 ± 0.1109,[36mtrain stats[0m [34mebd_grad:[0m 0.1424, [34mclf_grad:[0m 0.0979



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.57it/s]7.30it/s][0m

0.9755859449505806 0.0057846936153213594 0.9748577269135225 0.006792035997139825 0.5335551566305137 0.09044981228650588
23/12/06 18:35:50, ep 25, [36mval  [0m [34macc:[0m 0.9756 ± 0.0058, [34mf1:[0m 0.9749 ± 0.0068, [34mmcc:[0m 0.5336 ± 0.0904, [34mf1 micro:[0m 0.5233 ± 0.0974,[36mtrain stats[0m [34mebd_grad:[0m 0.1382, [34mclf_grad:[0m 0.0923



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.79it/s]7.12it/s][0m

0.9763113874942064 0.005575829470871002 0.9758840884033482 0.00606917669216701 0.5482557445011781 0.10467543848351427
23/12/06 18:36:14, ep 26, [36mval  [0m [34macc:[0m 0.9763 ± 0.0056, [34mf1:[0m 0.9759 ± 0.0061, [34mmcc:[0m 0.5483 ± 0.1047, [34mf1 micro:[0m 0.5323 ± 0.1087,[36mtrain stats[0m [34mebd_grad:[0m 0.1381, [34mclf_grad:[0m 0.0952



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.00it/s]5.39it/s][0m

0.9754464346915483 0.005182230438994357 0.9754448154192177 0.005868707595749511 0.5544827539520831 0.09872154668204611
23/12/06 18:36:37, ep 27, [36mval  [0m [34macc:[0m 0.9754 ± 0.0052, [34mf1:[0m 0.9754 ± 0.0059, [34mmcc:[0m 0.5545 ± 0.0987, [34mf1 micro:[0m 0.5398 ± 0.1084,[36mtrain stats[0m [34mebd_grad:[0m 0.1345, [34mclf_grad:[0m 0.0918



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.55it/s]6.84it/s][0m

0.9744698666036129 0.005610901040937477 0.9748850076340039 0.005972139936102184 0.5502738610717739 0.08548392070428114
23/12/06 18:37:00, ep 28, [36mval  [0m [34macc:[0m 0.9745 ± 0.0056, [34mf1:[0m 0.9749 ± 0.0060, [34mmcc:[0m 0.5503 ± 0.0855, [34mf1 micro:[0m 0.5303 ± 0.0966,[36mtrain stats[0m [34mebd_grad:[0m 0.1403, [34mclf_grad:[0m 0.0982



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 16.83it/s]5.95it/s][0m

0.9755859430879354 0.005776079852190839 0.9745241933692568 0.006840088170856422 0.5211108995249822 0.09948229166855717
23/12/06 18:37:23, ep 29, [36mval  [0m [34macc:[0m 0.9756 ± 0.0058, [34mf1:[0m 0.9745 ± 0.0068, [34mmcc:[0m 0.5211 ± 0.0995, [34mf1 micro:[0m 0.5065 ± 0.1107,[36mtrain stats[0m [34mebd_grad:[0m 0.1366, [34mclf_grad:[0m 0.0984



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 16.86it/s]6.74it/s][0m

0.9734654016792774 0.0055095271856676785 0.9738121693238232 0.006199318639912829 0.5329809063239597 0.08275367612876919
23/12/06 18:37:47, ep 30, [36mval  [0m [34macc:[0m 0.9735 ± 0.0055, [34mf1:[0m 0.9738 ± 0.0062, [34mmcc:[0m 0.5330 ± 0.0828, [34mf1 micro:[0m 0.5151 ± 0.0911,[36mtrain stats[0m [34mebd_grad:[0m 0.1320, [34mclf_grad:[0m 0.0916



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.12it/s]7.84it/s][0m

0.9729073699563742 0.0068056900286164014 0.9728431486749618 0.007734928704607927 0.5109303627292878 0.0949874441170185
23/12/06 18:38:09, ep 31, [36mval  [0m [34macc:[0m 0.9729 ± 0.0068, [34mf1:[0m 0.9728 ± 0.0077, [34mmcc:[0m 0.5109 ± 0.0950, [34mf1 micro:[0m 0.4965 ± 0.1001,[36mtrain stats[0m [34mebd_grad:[0m 0.1356, [34mclf_grad:[0m 0.0971



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.21it/s]8.72it/s][0m

0.9767578113824129 0.005666671880597118 0.9754575998421774 0.006751572446996948 0.5312787656571556 0.08607286787973752
23/12/06 18:38:31, ep 32, [36mval  [0m [34macc:[0m 0.9768 ± 0.0057, [34mf1:[0m 0.9755 ± 0.0068, [34mmcc:[0m 0.5313 ± 0.0861, [34mf1 micro:[0m 0.5111 ± 0.0955,[36mtrain stats[0m [34mebd_grad:[0m 0.1303, [34mclf_grad:[0m 0.0971



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.90it/s]7.95it/s][0m

0.976283485069871 0.005490067523459321 0.9752808567044919 0.006506435242742771 0.5289561861014835 0.07928053241550868
23/12/06 18:38:54, ep 33, [36mval  [0m [34macc:[0m 0.9763 ± 0.0055, [34mf1:[0m 0.9753 ± 0.0065, [34mmcc:[0m 0.5290 ± 0.0793, [34mf1 micro:[0m 0.5124 ± 0.0875,[36mtrain stats[0m [34mebd_grad:[0m 0.1326, [34mclf_grad:[0m 0.1012



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.61it/s]7.71it/s][0m

0.9756417442113161 0.005795990535352611 0.9747563752238537 0.006883163015858237 0.5174425135152636 0.0820362852919902
23/12/06 18:39:16, ep 34, [36mval  [0m [34macc:[0m 0.9756 ± 0.0058, [34mf1:[0m 0.9748 ± 0.0069, [34mmcc:[0m 0.5174 ± 0.0820, [34mf1 micro:[0m 0.4912 ± 0.0928,[36mtrain stats[0m [34mebd_grad:[0m 0.1289, [34mclf_grad:[0m 0.0971



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.55it/s]7.69it/s][0m

0.9766183104366064 0.006642036100523427 0.9738588783007525 0.008148733315871869 0.4903623938641161 0.07978604383744642
23/12/06 18:39:38, ep 35, [36mval  [0m [34macc:[0m 0.9766 ± 0.0066, [34mf1:[0m 0.9739 ± 0.0081, [34mmcc:[0m 0.4904 ± 0.0798, [34mf1 micro:[0m 0.4548 ± 0.0861,[36mtrain stats[0m [34mebd_grad:[0m 0.1293, [34mclf_grad:[0m 0.0977



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 23.42it/s]7.62it/s][0m

0.9775948710739613 0.005890326339787389 0.9751002691447206 0.007366846411222162 0.5162757860117612 0.08898311624040531
23/12/06 18:40:00, ep 36, [36mval  [0m [34macc:[0m 0.9776 ± 0.0059, [34mf1:[0m 0.9751 ± 0.0074, [34mmcc:[0m 0.5163 ± 0.0890, [34mf1 micro:[0m 0.4910 ± 0.0975,[36mtrain stats[0m [34mebd_grad:[0m 0.1282, [34mclf_grad:[0m 0.0974



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.72it/s]7.98it/s][0m

0.9761718790978193 0.005936993256153652 0.9742882751864121 0.007270123461611761 0.498204995997757 0.0864751444626322
23/12/06 18:40:22, ep 37, [36mval  [0m [34macc:[0m 0.9762 ± 0.0059, [34mf1:[0m 0.9743 ± 0.0073, [34mmcc:[0m 0.4982 ± 0.0865, [34mf1 micro:[0m 0.4760 ± 0.0989,[36mtrain stats[0m [34mebd_grad:[0m 0.1283, [34mclf_grad:[0m 0.1059



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.99it/s]6.72it/s][0m

0.9755580369383097 0.006256975192994532 0.9735975635046148 0.0074341860792987 0.4838903637337089 0.10137756508557653
23/12/06 18:40:45, ep 38, [36mval  [0m [34macc:[0m 0.9756 ± 0.0063, [34mf1:[0m 0.9736 ± 0.0074, [34mmcc:[0m 0.4839 ± 0.1014, [34mf1 micro:[0m 0.4624 ± 0.1103,[36mtrain stats[0m [34mebd_grad:[0m 0.1276, [34mclf_grad:[0m 0.1086



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.63it/s]8.49it/s][0m

0.9771205428987741 0.005676631027626605 0.9753447853545362 0.006877808760017291 0.5264766890313131 0.07955383484016762
23/12/06 18:41:07, ep 39, [36mval  [0m [34macc:[0m 0.9771 ± 0.0057, [34mf1:[0m 0.9753 ± 0.0069, [34mmcc:[0m 0.5265 ± 0.0796, [34mf1 micro:[0m 0.5076 ± 0.0863,[36mtrain stats[0m [34mebd_grad:[0m 0.1212, [34mclf_grad:[0m 0.1007



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.07it/s]7.57it/s][0m

0.9764229878783226 0.006273314098769167 0.9744503013846995 0.0076483319502669645 0.5114783594066977 0.0860808804202352
23/12/06 18:41:29, ep 40, [36mval  [0m [34macc:[0m 0.9764 ± 0.0063, [34mf1:[0m 0.9745 ± 0.0076, [34mmcc:[0m 0.5115 ± 0.0861, [34mf1 micro:[0m 0.4932 ± 0.0945,[36mtrain stats[0m [34mebd_grad:[0m 0.1160, [34mclf_grad:[0m 0.0931



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 17.15it/s]6.78it/s][0m

0.9765345957130194 0.005825999899047127 0.9744920307749925 0.006961920275051609 0.5052030376278522 0.0872983667552736
23/12/06 18:41:52, ep 41, [36mval  [0m [34macc:[0m 0.9765 ± 0.0058, [34mf1:[0m 0.9745 ± 0.0070, [34mmcc:[0m 0.5052 ± 0.0873, [34mf1 micro:[0m 0.4860 ± 0.0917,[36mtrain stats[0m [34mebd_grad:[0m 0.1177, [34mclf_grad:[0m 0.0969



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 23.27it/s]7.47it/s][0m

0.9759765677154064 0.006551100689700259 0.9737830101572678 0.00790782612817721 0.4901986344019371 0.08458719681717364
23/12/06 18:42:14, ep 42, [36mval  [0m [34macc:[0m 0.9760 ± 0.0066, [34mf1:[0m 0.9738 ± 0.0079, [34mmcc:[0m 0.4902 ± 0.0846, [34mf1 micro:[0m 0.4704 ± 0.0930,[36mtrain stats[0m [34mebd_grad:[0m 0.1158, [34mclf_grad:[0m 0.0944



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 22.38it/s]6.90it/s][0m

0.9758928623050451 0.006141444351919954 0.9742559105430537 0.007322479085612812 0.5048093097581048 0.08724101852869341
23/12/06 18:42:36, ep 43, [36mval  [0m [34macc:[0m 0.9759 ± 0.0061, [34mf1:[0m 0.9743 ± 0.0073, [34mmcc:[0m 0.5048 ± 0.0872, [34mf1 micro:[0m 0.4818 ± 0.0963,[36mtrain stats[0m [34mebd_grad:[0m 0.1241, [34mclf_grad:[0m 0.1023



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.21it/s]5.62it/s][0m

0.9762834832072258 0.006269653080216177 0.9747850741671298 0.00729745162880773 0.5250822169062805 0.07828861183193497
23/12/06 18:42:59, ep 44, [36mval  [0m [34macc:[0m 0.9763 ± 0.0063, [34mf1:[0m 0.9748 ± 0.0073, [34mmcc:[0m 0.5251 ± 0.0783, [34mf1 micro:[0m 0.5079 ± 0.0810,[36mtrain stats[0m [34mebd_grad:[0m 0.1193, [34mclf_grad:[0m 0.0974



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.28it/s]7.01it/s][0m

0.9761160705238581 0.006344942093911509 0.9745684577143958 0.007423712515041549 0.5117290758990223 0.07915807402113076
23/12/06 18:43:22, ep 45, [36mval  [0m [34macc:[0m 0.9761 ± 0.0063, [34mf1:[0m 0.9746 ± 0.0074, [34mmcc:[0m 0.5117 ± 0.0792, [34mf1 micro:[0m 0.4976 ± 0.0854,[36mtrain stats[0m [34mebd_grad:[0m 0.1148, [34mclf_grad:[0m 0.0940



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 20.31it/s]6.70it/s][0m

0.9750000052154064 0.00626989764906004 0.9746914929942638 0.006676024353273723 0.5301817924235681 0.07984802733968373
23/12/06 18:43:45, ep 46, [36mval  [0m [34macc:[0m 0.9750 ± 0.0063, [34mf1:[0m 0.9747 ± 0.0067, [34mmcc:[0m 0.5302 ± 0.0798, [34mf1 micro:[0m 0.5130 ± 0.0870,[36mtrain stats[0m [34mebd_grad:[0m 0.1235, [34mclf_grad:[0m 0.0978



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 18.14it/s]7.51it/s][0m

0.9753627274185419 0.005162286617750854 0.9741630672326571 0.005989965843704185 0.5038489280093315 0.07439019039183095
23/12/06 18:44:08, ep 47, [36mval  [0m [34macc:[0m 0.9754 ± 0.0052, [34mf1:[0m 0.9742 ± 0.0060, [34mmcc:[0m 0.5038 ± 0.0744, [34mf1 micro:[0m 0.4880 ± 0.0812,[36mtrain stats[0m [34mebd_grad:[0m 0.1182, [34mclf_grad:[0m 0.0979



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.31it/s]6.52it/s][0m

0.9761160761117935 0.006169775011174002 0.9746053096873467 0.0072470378336012406 0.5134200106921656 0.06608746590540779
23/12/06 18:44:31, ep 48, [36mval  [0m [34macc:[0m 0.9761 ± 0.0062, [34mf1:[0m 0.9746 ± 0.0072, [34mmcc:[0m 0.5134 ± 0.0661, [34mf1 micro:[0m 0.4933 ± 0.0735,[36mtrain stats[0m [34mebd_grad:[0m 0.1177, [34mclf_grad:[0m 0.0985



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.52it/s]6.94it/s][0m

0.9763671960681677 0.006469304519178751 0.9736552840324121 0.008010762498960175 0.48989775739468316 0.09411799069483878
23/12/06 18:44:53, ep 49, [36mval  [0m [34macc:[0m 0.9764 ± 0.0065, [34mf1:[0m 0.9737 ± 0.0080, [34mmcc:[0m 0.4899 ± 0.0941, [34mf1 micro:[0m 0.4669 ± 0.1028,[36mtrain stats[0m [34mebd_grad:[0m 0.1173, [34mclf_grad:[0m 0.1000



[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 19.42it/s]7.49it/s][0m

0.9752232171595097 0.007118401651123753 0.9733954375035826 0.00828162161878278 0.48814614895031555 0.079989059508086
23/12/06 18:45:16, ep 50, [36mval  [0m [34macc:[0m 0.9752 ± 0.0071, [34mf1:[0m 0.9734 ± 0.0083, [34mmcc:[0m 0.4881 ± 0.0800, [34mf1 micro:[0m 0.4667 ± 0.0894,[36mtrain stats[0m [34mebd_grad:[0m 0.1176, [34mclf_grad:[0m 0.1038
23/12/06 18:45:16, End of training. Restore the best weights





[32m23/12/06 18:45:16, Save best model to /home/hyuns6100/Mental-Heatlh-Care/Result/result_text/best[0m


In [18]:
val_acc, val_std, _, _, _, _, val_f1_micro, val_f1_micro_std = test(val_data, model, args, verbose=True, target='val', loader=val_loader)

[33mTesting regular on val[0m: 100%|██████████| 32/32 [00:01<00:00, 21.40it/s]

23/12/06 18:45:31, [34macc mean[0m  0.9751 ([34mstd[0m  0.0070), [34mf1 mean[0m  0.9751 ([34mstd[0m  0.0074), [34mmcc mean[0m  0.5559 ([34mstd[0m  0.1109), [34mf1 micro mean[0m  0.5410 ([34mstd[0m  0.1199)
cnn & mlp &  0.9751 \tiny $\pm  0.0070$ &  0.9751 \tiny $\pm  0.0074$ &  0.5559 \tiny $\pm  0.1109$ &  0.5410 \tiny $\pm  0.1199$





## Test

In [19]:
print( colored('test_data', 'green') )

test_acc, test_std, _, _, _, _, test_f1_micro, test_f1_micro_std, = test(test_data, model, args, target='test', loader=test_loader)

[32mtest_data[0m


[33mTesting regular on test[0m: 100%|██████████| 32/32 [00:01<00:00, 23.15it/s]

23/12/06 18:45:45, [34macc mean[0m  0.9637 ([34mstd[0m  0.0112), [34mf1 mean[0m  0.9613 ([34mstd[0m  0.0132), [34mmcc mean[0m  0.4940 ([34mstd[0m  0.0956), [34mf1 micro mean[0m  0.4837 ([34mstd[0m  0.0988)
cnn & mlp &  0.9637 \tiny $\pm  0.0112$ &  0.9613 \tiny $\pm  0.0132$ &  0.4940 \tiny $\pm  0.0956$ &  0.4837 \tiny $\pm  0.0988$





In [20]:
if args.best_result_path:
    directory = args.best_result_path[:args.best_result_path.rfind("/")]
    if not os.path.exists(directory):
        os.makedirs(directory)

    result = {
        "test_acc": test_acc,
        "test_std": test_std,
        "val_acc": val_acc,
        "val_std": val_std,
        "test_f1_micro": test_f1_micro,
        "test_f1_micro_std": test_f1_micro_std,
        "val_f1_micro": val_f1_micro,
        "val_f1_micro_std": val_f1_micro_std
    }

    for attr, value in sorted(args.__dict__.items()):
        result[attr] = value

    with open(args.best_result_path, "wb") as f:
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)

In [21]:
print(result)

{'test_acc': 0.9636718761175871, 'test_std': 0.011181758962263125, 'val_acc': 0.9750837124884129, 'val_std': 0.006978293767178298, 'test_f1_micro': 0.4836933431523283, 'test_f1_micro_std': 0.09875537229629391, 'val_f1_micro': 0.5409813460077257, 'val_f1_micro_std': 0.11992616569879222, 'authors': False, 'batch_size': 32, 'best_result_path': '/home/hyuns6100/Mental-Heatlh-Care/Result/best_results.pkl', 'classifier': 'mlp', 'cnn_filter_sizes': [3, 4, 5], 'cnn_num_filters': 100, 'context_size': 35, 'convmode': 'seq', 'data_path': '/home/hyuns6100/Mental-Heatlh-Care/data/dailydialog_conv35seq_splits.json', 'dropout': 0.1, 'embedding': 'cnn', 'finetune_ebd': False, 'labels': [1, 2, 3, 4, 5, 6], 'lr': 0.001, 'maxtokens': 30, 'mlp_hidden': [300, 300], 'n_classes': 7, 'n_test_class': 7, 'n_train_class': 7, 'n_val_class': 7, 'patience': 30, 'patience_metric': 'f1_micro', 'result_path': '/home/hyuns6100/Mental-Heatlh-Care/Result/', 'result_text_path': '/home/hyuns6100/Mental-Heatlh-Care/Result/r

In [None]:
# def eval_torch(audio_path,checkpoint_path):
#     torch_model = resnet34().cuda()
#     load_model = torch.load(checkpoint_path)
#     torch_model.load_state_dict(load_model)
#     torch_model.eval()
    

#     paths = glob.glob(audio_path+"/*")
#     count = 0
#     with torch.no_grad():
#         count = 0
#         for input_path in tqdm(paths):
            
#             label_ = int(input_path.split('-')[2]) - 1
#             input_data = preprocess_audio(input_path)
#             input_data = np.transpose(input_data, axes=[2, 0, 1])
#             input_data = np.expand_dims(input_data, axis=0)
#             input_data = torch.FloatTensor(input_data).cuda()
#             outputs = torch_model(input_data)
                
#             pred = torch.argmax(outputs.cpu())

#             if label_ == pred:
#                 count += 1
            
#     print("=======================")
#     print(">> Pytorch result")
#     print(f"Acc: {count/len(paths)}")
#     print("=======================")


## PyTorch to ONNX

### onnx 변환 전 성능 확인

In [57]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/"
best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/result_text/best"

# 모델 state path
ebd_model_path = best_path + '.ebd'
clf_model_path = best_path + '.clf'

ebd_model_params = torch.load(ebd_model_path, map_location=device)
clf_model_params = torch.load(clf_model_path, map_location=device)

# 모델 정의 및 불러온 파라미터 설정
try_model = {}
wordebd = WORDEBD(vocab, finetune_ebd=False)
ebd = CNNseq(wordebd, args).to(device)
try_model['ebd'] = ebd

clf = MLPseq(try_model["ebd"].ebd_dim, args).to(device)
try_model['clf'] = clf

try_model['ebd'].load_state_dict(ebd_model_params)
try_model['clf'].load_state_dict(clf_model_params)

try_model['ebd'].eval()
try_model['clf'].eval()

MLPseq(
  (mlp): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
  )
  (out): Linear(in_features=300, out_features=7, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [30]:
test_acc, test_std, _, _, _, _, test_f1_micro, test_f1_micro_std, = test(test_data, try_model, args, target='test', loader=test_loader)

[33mTesting regular on test[0m: 100%|██████████| 32/32 [00:01<00:00, 25.58it/s]

23/12/06 18:55:06, [34macc mean[0m  0.9637 ([34mstd[0m  0.0112), [34mf1 mean[0m  0.9613 ([34mstd[0m  0.0132), [34mmcc mean[0m  0.4940 ([34mstd[0m  0.0956), [34mf1 micro mean[0m  0.4837 ([34mstd[0m  0.0988)
cnn & mlp &  0.9637 \tiny $\pm  0.0112$ &  0.9613 \tiny $\pm  0.0132$ &  0.4940 \tiny $\pm  0.0956$ &  0.4837 \tiny $\pm  0.0988$





In [24]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# #best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/"
# best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/result_text/best"

# # 모델 state path
# ebd_model_path = best_path + '.ebd'
# clf_model_path = best_path + '.clf'

# ebd_model_params = torch.load(ebd_model_path, map_location=device)
# clf_model_params = torch.load(clf_model_path, map_location=device)

# # 모델 정의 및 불러온 파라미터 설정
# model = {}
# wordebd = WORDEBD(vocab, finetune_ebd=False)
# ebd = CNNseq(wordebd, args).to(device)
# model['ebd'] = ebd

# clf = MLPseq(model["ebd"].ebd_dim, args).to(device)
# model['clf'] = clf

# model['ebd'].load_state_dict(ebd_model_params)
# model['clf'].load_state_dict(clf_model_params)

# model['ebd'].eval()
# model['clf'].eval()

# calibration data 
batch = test_loader.dataset[0]
batch['text'] = torch.tensor(batch['text']).unsqueeze(0).to(device)

XS = try_model['ebd'](batch)
# XS_np = XS.detach().cpu().numpy()

# # save to .npy
# np.save('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_calib_1', XS_np)

# convert to onnx
torch.onnx.export(try_model['clf'], XS, "/home/hyuns6100/Mental-Heatlh-Care/onnx/emo_clf_onnx.onxx")

In [25]:
import numpy as np

test_np_data = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_data.npy')
test_np_label = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_label.npy')

batch = torch.tensor(test_np_data).to(device)
batch_label = torch.tensor(test_np_label).to(device)

In [29]:
# test_one 함수
s = time.time()
out_XS = try_model['clf'](batch, YS=None)
print(time.time() - s)

output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7] (batch_size=32)
target = batch_label.view(-1)

acc = BASE.compute_acc(output, target)
f1 = BASE.compute_f1(output, target)
mcc = BASE.compute_mcc(output, target)
micro_f1_noneutral = BASE.compute_f1_micro_noneutral(output, target, labels=args['labels'])

acc, f1, mcc, micro_f1_noneutral

0.1465756893157959


(0.9594571590423584, 0.9396051476148122, 0.0, 0.0)

In [56]:
(torch.argmax(output, dim=1) == target).float().sum() / len(output)

tensor(0.9595, device='cuda:0')

## Save to npy 

In [35]:
try_model['ebd'].eval()
try_model['clf'].eval()

MLPseq(
  (mlp): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
  )
  (out): Linear(in_features=300, out_features=7, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [37]:
batch = test_loader.dataset[:]
batch['text'] = torch.tensor(batch['text']).to(device)

YS = torch.tensor(batch['label']).to(device)
XS = try_model['ebd'](batch)
XS_np = XS.detach().cpu().numpy()
YS_np = YS.detach().cpu().numpy()

# save to .npy
np.save('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_data', XS_np)
np.save('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_label', YS_np)

# 성능 확인 마지막!! 

In [75]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/"
best_path = "/home/hyuns6100/Mental-Heatlh-Care/Result/result_text/best"

# 모델 state path
ebd_model_path = best_path + '.ebd'
clf_model_path = best_path + '.clf'

ebd_model_params = torch.load(ebd_model_path, map_location=device)
clf_model_params = torch.load(clf_model_path, map_location=device)

# 모델 정의 및 불러온 파라미터 설정
try_model = {}
wordebd = WORDEBD(vocab, finetune_ebd=False)
ebd = CNNseq(wordebd, args).to(device)
try_model['ebd'] = ebd

clf = MLPseq(try_model["ebd"].ebd_dim, args).to(device)
try_model['clf'] = clf

try_model['ebd'].load_state_dict(ebd_model_params)
try_model['clf'].load_state_dict(clf_model_params)

try_model['ebd'].eval()
try_model['clf'].eval()

MLPseq(
  (mlp): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
  )
  (out): Linear(in_features=300, out_features=7, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [76]:
# try_model: best 모델 파라미터 load 
# XS: test_loader 에 저장된 모든 데이터 (1000개) 를 try_model로 임베딩한 input 데이터


# 임베딩 변환 후 & npy 저장 전
#### 데이터 생성
batch = test_loader.dataset[:]
batch['text'] = torch.tensor(batch['text']).to(device)

YS = torch.tensor(batch['label']).to(device) 
XS = try_model['ebd'](batch)


#### model 평가
out_XS = try_model['clf'](XS, YS=None)

output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7]
target = YS.view(-1)

acc = BASE.compute_acc(output, target)
f1 = BASE.compute_f1(output, target)
mcc = BASE.compute_mcc(output, target)
micro_f1_noneutral = BASE.compute_f1_micro_noneutral(output, target, labels=args['labels'])

acc, f1, mcc, micro_f1_noneutral

(0.9634857177734375,
 0.9612830286849585,
 0.4899312517676465,
 0.4828393135725429)

In [77]:
# 임베딩 변환 후 & npy 저장 및 다시 load

test_np_data = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_data.npy')
test_np_label = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_label.npy')

test = torch.tensor(test_np_data).to(device)
label = torch.tensor(test_np_label).to(device)

#### model 평가
import time
s = time.time()
out_XS = try_model['clf'](test, YS=None)
print(time.time() - s)


output = out_XS.view(-1, args.n_classes)  # new shape: [32*35, 7]
#label = YS.view(-1)
label = label.view(-1)

acc = BASE.compute_acc(output, label)
f1 = BASE.compute_f1(output, label)
mcc = BASE.compute_mcc(output, label)
micro_f1_noneutral = BASE.compute_f1_micro_noneutral(output, label, labels=args['labels'])

acc, f1, mcc, micro_f1_noneutral

0.0007719993591308594


(0.9634857177734375,
 0.9612830286849585,
 0.4899312517676465,
 0.4828393135725429)

## onnx -> pytorch

In [72]:
# import onnx

# model = onnx.load("/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_calib_1.onxx")

In [79]:
import onnx
from onnx2pytorch import ConvertModel
import torch
# import time

# # Load the ONNX model
# onnx_model = onnx.load("/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_calib_1.onxx")

# # Convert to PyTorch model
# pytorch_model = ConvertModel(onnx_model)

ImportError: cannot import name '_LazyBatchNorm' from 'torch.nn.modules.batchnorm' (/home/hyuns6100/anaconda3/envs/MentalHealth/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py)

In [90]:
import numpy as np

test_np_data = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_data.npy')
test_np_label = np.load('/home/hyuns6100/Mental-Heatlh-Care/onnx/emotion_test_label.npy')

# test = torch.tensor(test_np_data).to(device)
# label = torch.tensor(test_np_label).to(device)



In [84]:
# %pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flatbuffers (from onnxruntime)
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
Downloading onnxruntime-1.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.4/6.4 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading flatbuffers-23.5.26-py2.

In [92]:
import onnx, onnxruntime

onnx_path = "/home/hyuns6100/Mental-Heatlh-Care/onnx/emo_clf_onnx.onxx"
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

providers = ["TensorrtExecutionProvider", "CUDAExecutionProvider"]
session = onnxruntime.InferenceSession(onnx_path, providers=providers)




In [97]:
for td in test_np_data:
    outputs = session.run(None, {"input.1": td})

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Invalid rank for input: input.1 Got: 2 Expected: 3 Please fix either the inputs or the model.