In [3]:
import os
import json
import numpy as np
from rouge_score import rouge_scorer, scoring

In [5]:
from datasets import load_dataset
dataset = load_dataset("lawcompany/KLAID", 'ljp')

  from .autonotebook import tqdm as notebook_tqdm
Generating train split: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 161192/161192 [00:00<00:00, 362030.26 examples/s]


In [6]:
full_dataset = dataset["train"]
shuffled_dataset = full_dataset.shuffle(seed=42)

train_testvalid = shuffled_dataset.train_test_split(test_size=0.1, seed=42)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5, seed=42)

train_dataset = train_testvalid['train']
valid_dataset = test_valid['train']
test_dataset = test_valid['test']

assert len(full_dataset) == len(train_dataset) + len(valid_dataset) + len(test_dataset)


In [7]:
print("Number of data: {}".format(len(full_dataset)))

Number of data: 161192


In [8]:
DATA_DIR = os.path.join("./curated-data")
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
TRAIN_DS = os.path.join(DATA_DIR, "law-kr-train.jsonl")
VAL_DS = os.path.join(DATA_DIR, "law-kr-val.jsonl")
TEST_DS = os.path.join(DATA_DIR, "law-kr-test.jsonl")


In [9]:

def create_dataset(dataset, output_file):
    with open(output_file, 'w', encoding='utf-8') as outfile:
        prompt = "다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n"
        for data in dataset:
            
            input_text = prompt + data['fact']
            output_text = data['laws_service']
            new_data = {
            "input": input_text,
            "output": output_text
            }
            json.dump(new_data, outfile)
            outfile.write('\n')  # Add a newline after each JSON object

create_dataset(train_dataset,TRAIN_DS)
create_dataset(valid_dataset,VAL_DS)
create_dataset(test_dataset,TEST_DS)

In [10]:
import time
with open(TRAIN_DS, 'r') as infile:
    for line in infile:
        data = json.loads(line)
        print(data)
        break

{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n피고인은 2019. 6. 29. 03:35경 김제시 B 앞 도로에서부터 같은 시 C에 있는 ‘D’ 중식당 주차장 앞 도로에 이르기까지 약 50m 구간에서 혈중알코올농도 0.067%의 술에 취한 상태로 E ‘봉고’ 화물차를 운전하였다. 이로써 피고인은 음주운전금지규정을 2회 이상 위반하였다.', 'output': '도로교통법 제148조의2 제1항,도로교통법 제44조 제1항'}


In [11]:
import time
with open(TEST_DS, 'r') as infile:
    for line in infile:
        data = json.loads(line)
        print(data)
        break

{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n1. 주거침입 피고인은 2018. 7. 12. 18:30경 대구 동구 B건물 C호 피해자 D의 주거지인 원룸에 탑차와 사다리를 이용하여 원룸 창문을 열고 들어 가 피해자의 주거에 침입하였다. 2. 절도 피고인은 위\n 1.항의 일시 및 장소에서 위 피해자의 주민등록증과 운전면허증을 가져가 절취하였다.', 'output': '형법 제319조 제1항,형법 제329조'}


Download llama3.1 .nemo checkpoint

```python
!/usr/local/ngc-cli/ngc registry model download-version "nvidia/nemo/llama-3_1-8b-instruct-nemo:1.0" 
```

In [None]:
%%bash

# Set paths to the model, train, validation and test sets.
MODEL="llama-3-8b-instruct-nemo_v1.0/llama3_1_8b_instruct.nemo"

TRAIN_DS="[./curated-data/law-kr-train.jsonl]"
VALID_DS="[./curated-data/law-kr-val.jsonl]"
TEST_DS="[./curated-data/law-kr-test.jsonl]"
TEST_NAMES="[law]"

SCHEME="lora"
TP_SIZE=1
PP_SIZE=1

rm -rf results
OUTPUT_DIR="./results/Meta-llama3.1-8B-Instruct-Kr-law-Lora"

torchrun --nproc_per_node=1 \
/opt/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
    exp_manager.exp_dir=${OUTPUT_DIR} \
    exp_manager.explicit_log_dir=${OUTPUT_DIR} \
    trainer.devices=1 \
    trainer.num_nodes=1 \
    trainer.precision=bf16-mixed \
    trainer.val_check_interval=20 \
    trainer.max_steps=500 \
    model.megatron_amp_O2=True \
    ++model.mcore_gpt=True \
    model.tensor_model_parallel_size=${TP_SIZE} \
    model.pipeline_model_parallel_size=${PP_SIZE} \
    model.micro_batch_size=1 \
    model.global_batch_size=8 \
    model.restore_from_path=${MODEL} \
    model.data.train_ds.num_workers=0 \
    model.data.validation_ds.num_workers=0 \
    model.data.train_ds.file_names=${TRAIN_DS} \
    model.data.train_ds.concat_sampling_probabilities=[1.0] \
    model.data.validation_ds.file_names=${VALID_DS} \
    model.peft.peft_scheme=${SCHEME}

In [16]:
!head -n 128 ./curated-data/law-kr-test.jsonl > ./curated-data/law-kr-test-n128.jsonl

In [18]:
%%bash
MODEL="llama-3-8b-instruct-nemo_v1.0/llama3_1_8b_instruct.nemo"

TEST_DS="[./curated-data/law-kr-test-n128.jsonl]" # Smaller test split
# TEST_DS="[./curated-data/law-qa-test_preprocessed.jsonl]" # Full test set
TEST_NAMES="[law]"

TP_SIZE=1
PP_SIZE=1

# This is where your LoRA checkpoint was saved
PATH_TO_TRAINED_MODEL="./results/Meta-llama3.1-8B-Instruct-Kr-law-Lora/checkpoints/megatron_gpt_peft_lora_tuning.nemo"

# The generation run will save the generated outputs over the test dataset in a file prefixed like so
OUTPUT_PREFIX="law_kr_lora"
torchrun --nproc_per_node=1 \
 /opt/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_generate.py \
    model.restore_from_path=${MODEL} \
    model.peft.restore_from_path=${PATH_TO_TRAINED_MODEL} \
    trainer.devices=1\
    trainer.num_nodes=1 \
    model.data.test_ds.file_names=${TEST_DS} \
    model.data.test_ds.names=${TEST_NAMES} \
    model.data.test_ds.global_batch_size=8 \
    model.data.test_ds.micro_batch_size=1 \
    model.data.test_ds.tokens_to_generate=50 \
    model.tensor_model_parallel_size=${TP_SIZE} \
    model.pipeline_model_parallel_size=${PP_SIZE} \
    inference.greedy=True  \
    model.data.test_ds.output_file_path_prefix=${OUTPUT_PREFIX} \
    model.data.test_ds.write_predictions_to_file=True \
    model.data.test_ds.add_bos=False \
    model.data.test_ds.add_eos=True \
    model.data.test_ds.add_sep=False \
    model.data.test_ds.label_key="output" \
    model.data.test_ds.prompt_template="\{input\}\ \{output\}"

    See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for more information.
      ret = run_job(
    


[NeMo I 2024-11-13 21:43:05 megatron_gpt_generate:125] 
    
    ************** Experiment configuration ***********
[NeMo I 2024-11-13 21:43:05 megatron_gpt_generate:126] 
    name: megatron_gpt_peft_${model.peft.peft_scheme}_tuning
    trainer:
      devices: 1
      accelerator: gpu
      num_nodes: 1
      precision: 16
      logger: false
      enable_checkpointing: false
      use_distributed_sampler: false
      max_epochs: 9999
      max_steps: 20000
      log_every_n_steps: 10
      val_check_interval: 200
      gradient_clip_val: 1.0
    exp_manager:
      explicit_log_dir: null
      exp_dir: null
      name: ${name}
      create_wandb_logger: false
      wandb_logger_kwargs:
        project: null
        name: null
      resume_if_exists: true
      resume_ignore_no_checkpoint: true
      create_checkpoint_callback: true
      checkpoint_callback_params:
        monitor: validation_${model.data.test_ds.metric.name}
        save_top_k: 1
        mode: max
        save_nemo_o

[NeMo W 2024-11-13 21:43:05 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/_graveyard/precision.py:49: The `MixedPrecisionPlugin` is deprecated. Use `pytorch_lightning.plugins.precision.MixedPrecision` instead.
    
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: context_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: expert_model_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: moe_extended_tp in its cfg. Add this key to cfg or config_mapping to make to make it 

[NeMo I 2024-11-13 21:43:26 megatron_init:263] Rank 0 has data parallel group : [0]
[NeMo I 2024-11-13 21:43:26 megatron_init:269] Rank 0 has combined group of data parallel and context parallel : [0]
[NeMo I 2024-11-13 21:43:26 megatron_init:274] All data parallel group ranks with context parallel combined: [[0]]
[NeMo I 2024-11-13 21:43:26 megatron_init:277] Ranks 0 has data parallel rank: 0
[NeMo I 2024-11-13 21:43:26 megatron_init:285] Rank 0 has context parallel group: [0]
[NeMo I 2024-11-13 21:43:26 megatron_init:288] All context parallel group ranks: [[0]]
[NeMo I 2024-11-13 21:43:26 megatron_init:289] Ranks 0 has context parallel rank: 0
[NeMo I 2024-11-13 21:43:26 megatron_init:296] Rank 0 has model parallel group: [0]
[NeMo I 2024-11-13 21:43:26 megatron_init:297] All model parallel group ranks: [[0]]
[NeMo I 2024-11-13 21:43:26 megatron_init:306] Rank 0 has tensor model parallel group: [0]
[NeMo I 2024-11-13 21:43:26 megatron_init:310] All tensor model parallel group ranks: 

[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: context_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: expert_model_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: moe_extended_tp in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: finalize_model_grads_func in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: use_te_rng_t

[NeMo I 2024-11-13 21:43:26 tokenizer_utils:178] Getting HuggingFace AutoTokenizer with pretrained_model_name: meta-llama/Meta-Llama-3-8B


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


[NeMo I 2024-11-13 21:43:26 megatron_base_model:584] Padded vocab_size: 128256, original vocab_size: 128256, dummy tokens: 0.


[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: context_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: expert_model_parallel_size in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: moe_extended_tp in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: finalize_model_grads_func in its cfg. Add this key to cfg or config_mapping to make to make it configurable.
[NeMo W 2024-11-13 21:43:26 megatron_base_model:1158] The model: MegatronGPTSFTModel() does not have field.name: use_te_rng_t

[NeMo I 2024-11-13 21:43:52 dist_ckpt_io:95] Using ('zarr', 1) dist-ckpt save strategy.
Loading distributed checkpoint with TensorStoreLoadShardedStrategy
Loading distributed checkpoint directly on the GPU
[NeMo I 2024-11-13 21:44:57 nlp_overrides:1180] Model MegatronGPTSFTModel was successfully restored from /workspace/llama-3-8b-instruct-nemo_v1.0/llama3_1_8b_instruct.nemo.
[NeMo I 2024-11-13 21:44:57 nlp_adapter_mixins:203] Before adding PEFT params:
      | Name  | Type     | Params | Mode 
    -------------------------------------------
    0 | model | GPTModel | 8.0 B  | train
    -------------------------------------------
    0         Trainable params
    8.0 B     Non-trainable params
    8.0 B     Total params
    32,121.045Total estimated model params size (MB)
[NeMo I 2024-11-13 21:45:00 nlp_adapter_mixins:208] After adding PEFT params:
      | Name  | Type     | Params | Mode 
    -------------------------------------------
    0 | model | GPTModel | 8.0 B  | train
    --

[NeMo W 2024-11-13 21:45:00 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/configuration_validator.py:161: You have overridden `MegatronGPTSFTModel.configure_sharded_model` which is deprecated. Please override the `configure_model` hook instead. Instantiation with the newer hook will be created on the device right away and have the right data type depending on the precision setting in the Trainer.
    
[NeMo W 2024-11-13 21:45:00 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/configuration_validator.py:143: You are using the `dataloader_iter` step flavor. If you consume the iterator more than once per step, the `batch_idx` argument in any hook that takes it will not match with the batch index of the last batch consumed. This might have unforeseen effects on callbacks or code that expects to get the correct index. This will also not work well with gradient accumulation. This feature is very experimental and subjec

[NeMo I 2024-11-13 21:45:00 megatron_gpt_sft_model:803] Building GPT SFT test datasets.
[NeMo I 2024-11-13 21:45:00 text_memmap_dataset:116] Building data files
[NeMo I 2024-11-13 21:45:00 text_memmap_dataset:525] Processing 1 data files using 46 workers


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[NeMo I 2024-11-13 21:45:02 text_memmap_dataset:495] Building indexing for fn = ./curated-data/law-kr-test-n128.jsonl
[NeMo I 2024-11-13 21:45:02 text_memmap_dataset:507] Saving idx file = ./curated-data/law-kr-test-n128.jsonl.idx.npy
[NeMo I 2024-11-13 21:45:02 text_memmap_dataset:509] Saving metadata file = ./curated-data/law-kr-test-n128.jsonl.idx.info
[NeMo I 2024-11-13 21:45:02 text_memmap_dataset:535] Time building 1 / 1 mem-mapped files: 0:00:01.804854
[NeMo I 2024-11-13 21:45:02 text_memmap_dataset:525] Processing 1 data files using 46 workers


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[NeMo I 2024-11-13 21:45:04 text_memmap_dataset:535] Time building 0 / 1 mem-mapped files: 0:00:01.893652
[NeMo I 2024-11-13 21:45:04 text_memmap_dataset:158] Loading data files
[NeMo I 2024-11-13 21:45:04 text_memmap_dataset:249] Loading ./curated-data/law-kr-test-n128.jsonl
[NeMo I 2024-11-13 21:45:04 text_memmap_dataset:161] Time loading 1 mem-mapped files: 0:00:00.004577
[NeMo I 2024-11-13 21:45:04 text_memmap_dataset:165] Computing global indices
[NeMo I 2024-11-13 21:45:04 megatron_gpt_sft_model:806] Length of test dataset: 128
[NeMo I 2024-11-13 21:45:04 megatron_gpt_sft_model:829] Building dataloader with consumed samples: 0


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
[NeMo W 2024-11-13 21:45:04 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=91` in the `DataLoader` to improve performance.
    
[NeMo W 2024-11-13 21:45:04 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/utilities.py:149: Found `dataloader_iter` argument in the `test_step`. Note that the support for this signature is experimental and the behavior is subject to change.
    
    
      input_info_tensor = torch.cuda.FloatTensor(input_info)
    
      string_tensor = torch.as_tensor(
    


Testing DataLoader 0: 100%|██████████| 16/16 [09:44<00:00,  0.03it/s][NeMo I 2024-11-13 21:54:48 megatron_gpt_sft_model:561] Total deduplicated inference data size: 128 to 128
[NeMo I 2024-11-13 21:54:48 megatron_gpt_sft_model:712] Predictions saved to law_kr_lora_test_law_inputs_preds_labels.jsonl


[NeMo W 2024-11-13 21:54:48 megatron_gpt_sft_model:652] No training data found, reconfiguring microbatches based on validation batch sizes.
[NeMo W 2024-11-13 21:54:48 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:439: It is recommended to use `self.log('val_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
    
[NeMo W 2024-11-13 21:54:48 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:439: It is recommended to use `self.log('test_loss_law', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
    
[NeMo W 2024-11-13 21:54:48 nemo_logging:349] /usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:439: It is recommended to use `self.log('test_loss', ..., sync_

Testing DataLoader 0: 100%|██████████| 16/16 [09:44<00:00,  0.03it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m       Test metric       [0m[1m [0m┃[1m [0m[1m      DataLoader 0       [0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│[36m [0m[36m        test_loss        [0m[36m [0m│[35m [0m[35m   0.05774552375078201   [0m[35m [0m│
│[36m [0m[36m      test_loss_law      [0m[36m [0m│[35m [0m[35m   0.05774552375078201   [0m[35m [0m│
│[36m [0m[36m        val_loss         [0m[36m [0m│[35m [0m[35m   0.05774552375078201   [0m[35m [0m│
└───────────────────────────┴───────────────────────────┘


In [19]:
!ls

00_NIMs.ipynb
00_NeMo_finetuning.ipynb
00_NeMo_finetuning.zip
01_NVIDIAAISolutions소개.pdf
01_RAG.ipynb
02_Advacned_RAG.ipynb
02_H100TensorCore를활용한학습가속화.pdf
03_분산학습을통한학습가속화.pdf
04_프로그램병목개선을위한GPU프로파일링활용.pdf
05_GenerativeAI학습플랫폼NVIDIANeMo.pdf
06_LLM추론최적화를위한TensorRT-LLM.pdf
07_NVIDIAAIEnterprise및NIM활용.pdf
Dockerfile
Dockerfile.addpackages
Dockerfile.custompytorch
NIMs.zip
NeMo
NeMo_finetuning.ipynb
TRT-LLM-AICA.zip
curated-data
law_kr_lora_test_law_inputs_preds_labels.jsonl
llama-3-8b-instruct-nemo_v1.0
llama3-lora-nemofw.ipynb
llama3_1_8b_instruct.nemo
ngc-cli
ngc-cli.md5
ngccli_linux.zip
pubmedqa
results
workspace


In [20]:
Pred_label_path = "law_kr_lora_test_law_inputs_preds_labels.jsonl"
with open(Pred_label_path, 'r', encoding='utf-8') as infile:
    for line in infile:
        data = json.loads(line)
        print(data)

{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n1. 주거침입 피고인은 2018. 7. 12. 18:30경 대구 동구 B건물 C호 피해자 D의 주거지인 원룸에 탑차와 사다리를 이용하여 원룸 창문을 열고 들어 가 피해자의 주거에 침입하였다. 2. 절도 피고인은 위\n 1.항의 일시 및 장소에서 위 피해자의 주민등록증과 운전면허증을 가져가 절취하였다.', 'pred': ' 형법 제319조 제1항,형법 제329조', 'label': ' 형법 제319조 제1항,형법 제329조'}
{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n피고인은 2022. 3. 30. 02:09경 경남 김해시 B모텔 앞 도로에서부터 같은 시 C에 있는 D 앞 도로에 이르기까지 약 1.5km 구간에서 혈중알코올농도 0.124%의 술에 취한 상태로 (차량번호 1 생략) 피아트 승용차를 운전하였다.', 'pred': ' 도로교통법 제148조의2 제3항 제2호,도로교통법 제44조 제1항', 'label': ' 도로교통법 제148조의2 제3항 제2호,도로교통법 제44조 제1항'}
{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n피고인은 2019. 6. 21. 23:20경 김해시 B 앞 도로부터 C에 있는 D 부근 도로에 이르기까지 약 200미터 구간에서 혈중알콜농도 0.121%의 술에 취한 상태로 E 그랜저 승용차를 운전하였다.', 'pred': ' 도로교통법 제148조의2 제3항 제2호,도로교통법 제44조 제1항', 'label': ' 도로교통법 제148조의2 제1항,도로교통법 제44조 제1항'}
{'input': '다음 법률 사실에 관련된 법률 조항을 알려주세요:\n\n피고인은 서울 강동구 B 3층에서 ‘C’라는 상호의 업소를 운영하는 자이다. 피고인은 2014. 10. 29.부터 같은 달 30.까지 위 ‘C’ 업소에서 성매매 여성 D을 고용한 후, 그곳을 찾은 남자 손님들로부터 8만 원에서 12만 원의 대금을 받고 그

In [21]:
def compute_rouge(input_file: str) -> dict:
    ROUGE_KEYS = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
    scorer = rouge_scorer.RougeScorer(ROUGE_KEYS, use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()
    lines = [json.loads(line) for line in open(input_file)]
    num_response_words = []
    num_ref_words = []
    for idx, line in enumerate(lines):
        prompt = line['input']
        response = line['pred']
        answer = line['label']
        scores = scorer.score(response, answer)
        aggregator.add_scores(scores)
        num_response_words.append(len(response.split()))
        num_ref_words.append(len(answer.split()))

    result = aggregator.aggregate()
    rouge_scores = {k: round(v.mid.fmeasure * 100, 4) for k, v in result.items()}
    print(rouge_scores)
    print(f"Average and stddev of response length: {np.mean(num_response_words):.2f}, {np.std(num_response_words):.2f}")
    print(f"Average and stddev of ref length: {np.mean(num_ref_words):.2f}, {np.std(num_ref_words):.2f}")

    return rouge_scores

In [22]:
compute_rouge(Pred_label_path)

{'rouge1': 90.3895, 'rouge2': 74.0378, 'rougeL': 89.7584, 'rougeLsum': 89.7422}
Average and stddev of response length: 4.20, 1.71
Average and stddev of ref length: 4.36, 1.91


{'rouge1': 90.3895, 'rouge2': 74.0378, 'rougeL': 89.7584, 'rougeLsum': 89.7422}