# Modules and Global Variables

In [1]:
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
)

import torch, copy, os
from module.score import evaluation_f1
from module.load_json import *
from module.maps import *
from module.inference import *

In [2]:
print(f'torch.__version__: {torch.__version__}')
print(f'torch.cuda.is_available(): {torch.cuda.is_available()}')
NGPU = torch.cuda.device_count()
print(f'NGPU: {NGPU}')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.__version__: 1.7.1
torch.cuda.is_available(): True
NGPU: 4


# Paths and Modes

In [3]:
EVAL_MODE = False

RESULT_SAVE_NAME = 'klue_roberta_base_mlm_fine_tuned_uncleaned_v18_maxlen_256_b_restricted_to_top_7.json'

ACD_CHECKPOINT = 'training_results/uncleaned_v13_maxlen_256/acd/klue_roberta_base_mlm_fine_tuned_uncleaned_v13_maxlen_256/checkpoint-23440'
ASC_CHECKPOINT = 'training_results/uncleaned_v18_maxlen_256_asc_b/asc/klue_roberta_base_mlm_fine_tuned_uncleaned_v18_maxlen_256_asc_b/checkpoint-3000'

TEST_DATA_PATH = 'dataset/nikluge-sa-2022-test.jsonl'
EVAL_DATA_PATH = 'dataset/nikluge-sa-2022-dev.jsonl'

In [4]:
if EVAL_MODE == True:
    TEST_DATA_PATH = EVAL_DATA_PATH
test_data = jsonlload(TEST_DATA_PATH)
print('>>>>> >>>>> >>>>> ', TEST_DATA_PATH, ' <<<<< <<<<< <<<<<', '\n', sep='')

>>>>> >>>>> >>>>> dataset/nikluge-sa-2022-test.jsonl <<<<< <<<<< <<<<<



# Load Model and Tokenizer

In [5]:
acd_model = AutoModelForSequenceClassification.from_pretrained(ACD_CHECKPOINT)
acd_tokenizer = AutoTokenizer.from_pretrained(ACD_CHECKPOINT)

asc_model = AutoModelForSequenceClassification.from_pretrained(ASC_CHECKPOINT)
asc_tokenizer = AutoTokenizer.from_pretrained(ASC_CHECKPOINT)

# Inference

In [6]:
# pred_data = inference_m(acd_tokenizer, asc_tokenizer, acd_model, asc_model, copy.deepcopy(test_data))
pred_data = inference_b(acd_tokenizer, asc_tokenizer, acd_model, asc_model, copy.deepcopy(test_data))

if EVAL_MODE == False:
    save_path = './'

    jsondump(pred_data, os.path.join(save_path, RESULT_SAVE_NAME))
    pred_data = jsonload(os.path.join(save_path, RESULT_SAVE_NAME))
    
len(test_data), len(pred_data)

['본품#품질', '제품 전체#일반', '본품#일반', '제품 전체#품질', '제품 전체#디자인', '본품#편의성', '제품 전체#편의성']


100%|██████████| 2127/2127 [11:08<00:00,  3.18it/s]


(2127, 2127)

# Evaluation

In [7]:
if EVAL_MODE == True:
    print('ACD_CHECKPOINT: ', ACD_CHECKPOINT)
    print('ASC_CHECKPOINT: ', ASC_CHECKPOINT)
    print('INFERENCE DATA: ', TEST_DATA_PATH)

    print('EVAL_MODE :', EVAL_MODE)

    result = evaluation_f1(test_data, pred_data)
    print(list(result.items())[0])
    print(list(result.items())[1])