In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "2"  # Set the GPUs to use
import torch
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Current cuda device: 0
Count of using GPUs: 1


In [2]:
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
)

import torch, copy, os
from module.score import evaluation_f1
from module.load_json import *
from module.maps import *
from module.args import print_torch_info
from module.inference import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print_torch_info()

torch.__version__: 1.7.1
torch.cuda.is_available(): True
NGPU: 1


# Paths and Modes

In [3]:
EVAL_MODE = False

RESULT_ID = 'monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1'

ACD_CHECKPOINT = 'training_results/uncleaned_v21_run_1/acd_monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1/checkpoint-12500'
ASC_CHECKPOINT = 'training_results/uncleaned_v21_run_1/asc_monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1/checkpoint-1600'

TEST_DATA_PATH = 'dataset/nikluge-sa-2022-test.jsonl'
EVAL_DATA_PATH = 'dataset/nikluge-sa-2022-dev.jsonl'

In [4]:
if EVAL_MODE == True:
    TEST_DATA_PATH = EVAL_DATA_PATH
test_data = jsonlload(TEST_DATA_PATH)
print('>>>>> >>>>> >>>>> ', TEST_DATA_PATH, ' <<<<< <<<<< <<<<<', '\n', sep='')

>>>>> >>>>> >>>>> dataset/nikluge-sa-2022-test.jsonl <<<<< <<<<< <<<<<



# Load Models and Tokenizers

In [5]:
acd_model = AutoModelForSequenceClassification.from_pretrained(ACD_CHECKPOINT)
acd_tokenizer = AutoTokenizer.from_pretrained(ACD_CHECKPOINT)

asc_model = AutoModelForSequenceClassification.from_pretrained(ASC_CHECKPOINT)
asc_tokenizer = AutoTokenizer.from_pretrained(ASC_CHECKPOINT)

# Inference

In [6]:
for entity_property_pair in entity_property_pairs:
    num_of_ep_pairs = len(entity_property_pair)
    RESULT_SAVE_NAME = f'{RESULT_ID}_num_of_ep_pairs_{num_of_ep_pairs}.json'
    
    # pred_data = inference_m(acd_tokenizer, asc_tokenizer, acd_model, asc_model, copy.deepcopy(test_data), entity_property_pair)
    pred_data = inference_b(acd_tokenizer, asc_tokenizer, acd_model, asc_model, copy.deepcopy(test_data), entity_property_pair)
    
    if EVAL_MODE == False:
        save_path = './'

        jsondump(pred_data, os.path.join(save_path, RESULT_SAVE_NAME))
        pred_data = jsonload(os.path.join(save_path, RESULT_SAVE_NAME))
    
    print(RESULT_SAVE_NAME)    
    print(len(test_data), len(pred_data))
    
    if EVAL_MODE == True:
        print('ACD_CHECKPOINT: ', ACD_CHECKPOINT)
        print('ASC_CHECKPOINT: ', ASC_CHECKPOINT)
        print('INFERENCE DATA: ', TEST_DATA_PATH)

        print('EVAL_MODE :', EVAL_MODE)

        result = evaluation_f1(test_data, pred_data)
        print(list(result.items())[0])
        print(list(result.items())[1])
    break

['본품#가격', '본품#다양성', '본품#디자인', '본품#인지도', '본품#일반', '본품#편의성', '본품#품질', '브랜드#가격', '브랜드#디자인', '브랜드#인지도', '브랜드#일반', '브랜드#품질', '제품 전체#가격', '제품 전체#다양성', '제품 전체#디자인', '제품 전체#인지도', '제품 전체#일반', '제품 전체#편의성', '제품 전체#품질', '패키지/구성품#가격', '패키지/구성품#다양성', '패키지/구성품#디자인', '패키지/구성품#일반', '패키지/구성품#편의성', '패키지/구성품#품질']


 79%|███████▉  | 1686/2127 [13:41<03:49,  1.92it/s]

패키지/구성품#가격 found.
corrected as 패키지/ 구성품#가격


 81%|████████  | 1722/2127 [13:58<03:11,  2.11it/s]

패키지/구성품#가격 found.
corrected as 패키지/ 구성품#가격


100%|██████████| 2127/2127 [17:13<00:00,  2.06it/s]


monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1_num_of_ep_pairs_25.json
2127 2127
['본품#품질', '제품 전체#일반', '본품#일반', '제품 전체#품질']


100%|██████████| 2127/2127 [03:34<00:00,  9.90it/s]


monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1_num_of_ep_pairs_4.json
2127 2127
['본품#품질', '제품 전체#일반', '본품#일반', '제품 전체#품질', '제품 전체#디자인']


100%|██████████| 2127/2127 [04:20<00:00,  8.17it/s]


monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1_num_of_ep_pairs_5.json
2127 2127
['본품#품질', '제품 전체#일반', '본품#일반', '제품 전체#품질', '제품 전체#디자인', '본품#편의성', '제품 전체#편의성']


100%|██████████| 2127/2127 [05:44<00:00,  6.17it/s]


monologg_koelectra_base_v3_discriminator_uncleaned_v21_run_1_num_of_ep_pairs_7.json
2127 2127
