## 기본적인 함수 선언 & 모델 다운로드

In [1]:
# Google Colab에 필요한 패키지 설치
!apt-get install -y mecab libmecab-dev mecab-ipadic-utf8
!pip install mecab-python3 rouge-score

import MeCab
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import json
import requests
import random
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# MeCab 태거 초기화 (기본 사전 사용)
tagger = MeCab.Tagger()

# GitHub의 JSONL 파일 URL
url = "https://raw.githubusercontent.com/beefed-up-geek/HCLT-KACL2024/main/Taeyoon_notebooks/240830_final_data.jsonl"

# JSONL 파일 다운로드
response = requests.get(url)
lines = response.text.strip().split('\n')

# Hugging Face API 토큰 설정
huggingface_token = "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"

# 모델 및 토크나이저 로드
model = AutoModelForCausalLM.from_pretrained(
    "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    use_auth_token=huggingface_token
)
tokenizer = AutoTokenizer.from_pretrained(
    "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
    use_auth_token=huggingface_token
)

# ROUGE 점수 계산기 초기화
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
smoothing_function = SmoothingFunction().method1

# 형태소 분석 함수
def morphological_analysis(sentence):
    tokenized = []
    parsed = tagger.parse(sentence).split("\n")
    for mor in parsed:
        if "\t" in mor:
            token = mor.split("\t")[0]
            tokenized.append(token)
    return tokenized

#인공지능의 마지막 대답만 추출하는 함수
def extract_last_response(input_text):
    start_index = input_text.rfind('[|assistant|]')
    if start_index != -1:
        return input_text[start_index + len('[|assistant|]'): len(input_text)-len("[|endofturn|]")].strip()
    return input_text
import json
import random
import requests


# 인공지능과 대화하는 함수
def chat_with_ai(user_inputs, print_all=False):
    messages = [
        {"role": "system", "content": "You are EXAONE model from LG AI Research, a helpful assistant."}
    ]

    for user_input in user_inputs:
        if user_input == "":
            break

        # 사용자 입력 추가
        messages.append({"role": "user", "content": user_input})

        # 대화 템플릿 적용 및 토큰화
        input_ids = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt"
        )

        # 모델을 사용해 응답 생성
        output = model.generate(
            input_ids.to("cuda"),
            eos_token_id=tokenizer.eos_token_id,
            max_new_tokens=512
        )

        # 인공지능 응답 추출
        ai_response = tokenizer.decode(output[0])
        ai_response = extract_last_response(ai_response)

        # 인공지능 응답을 대화에 추가
        messages.append({"role": "assistant", "content": ai_response})

    # 전체 대화 내역 출력 여부
    if print_all:
        for message in messages:
            role = message["role"].capitalize()
            print(f"{role}: {message['content']}\n")

    # 마지막 응답 반환
    return messages[-1]['content']


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libmecab-dev is already the newest version (0.996-14build9).
mecab-ipadic-utf8 is already the newest version (2.7.0-20070801+main-3).
mecab is already the newest version (0.996-14build9).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=41f24b5744d1a43e07feaa337ec2300fc489aa2a30f594e59906c617409a156d
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

configuration_exaone.py:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct:
- configuration_exaone.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_exaone.py:   0%|          | 0.00/81.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct:
- modeling_exaone.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/70.7k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.93M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/563 [00:00<?, ?B/s]

## 여기서 점수 측정용 데이터 n개 랜덤하게 추출
만약 데이터를 새로 추출하고 싶거나, 추출하는 데이터의 갯수를 바꾸고 싶으면 <br>**n을 수정하고 다시 실행**

In [2]:
n=10 # 추출할 데이터의 갯수

# JSONL 파일 다운로드
response = requests.get(url)
lines = response.text.strip().split('\n')

# 무작위로 10개의 데이터를 샘플링
sampled_data = random.sample(lines, n)

# 평가 실행
data_pairs = []

for line in sampled_data:
    data = json.loads(line)
    input_data = data['input']
    output_data = data['output']

    # input을 딕셔너리 형태의 문자열로 변환
    input_str = json.dumps(input_data, ensure_ascii=False)

    data_pairs.append({
        "id": data['id'],
        "input": input_str,
        "output": output_data
    })

## 여기서 프롬프트 엔지니어링

In [3]:
def create_user_inputs(input_data):
    user_inputs = [
        f"{input_data}표에서 highlighted_cells들이 무엇을 나타내고 있는지 알려줘.",  # 여기부터 5개의 문자열들을 프롬프트 엔지니어링
        "highlighted_cells들이 의미하는 바를 한 문장으로 설명해줘.",
        "table_title을 사용해서 결론을 내려줘.",
        "",  # 빈 문자열로 대화를 종료
        "추가 입력"
    ]
    return user_inputs


## 바로 점수 측정

In [7]:
def evaluate_and_print_scores(data_pairs):
    total_bleu = 0
    total_rouge1 = 0
    total_rougeL = 0
    count = len(data_pairs)
    print("데이터 ID                      | ROUGE-1 | ROUGE-L | BLEU | 인공지능 대답")
    print("-------------------------------+---------+---------+------+---------------")
    for data in data_pairs:
        input_data = data['input']
        output_data = data['output']
        ai_response = chat_with_ai(create_user_inputs(input_data), print_all=False)
        ai_response_tokens = morphological_analysis(ai_response)

        best_bleu = 0
        best_rouge1 = 0
        best_rougeL = 0

        for ref in output_data:
            ref_tokens = morphological_analysis(ref)

            # BLEU 점수 계산
            bleu_score = sentence_bleu([ref_tokens], ai_response_tokens, smoothing_function=smoothing_function)
            # ROUGE 점수 계산
            rouge_scores = scorer.score(" ".join(ai_response_tokens), " ".join(ref_tokens))
            rouge1_score = rouge_scores['rouge1'].fmeasure
            rougeL_score = rouge_scores['rougeL'].fmeasure

            # 가장 높은 점수로 갱신
            if bleu_score > best_bleu:
                best_bleu = bleu_score
            if rouge1_score > best_rouge1:
                best_rouge1 = rouge1_score
            if rougeL_score > best_rougeL:
                best_rougeL = rougeL_score

        total_bleu += best_bleu
        total_rouge1 += best_rouge1
        total_rougeL += best_rougeL

        # 결과 출력
        print(f"{data['id']} | {best_rouge1:.4f} |  {best_rougeL:.4f} | {best_bleu:.4f} | {ai_response[:40]}...")

    # 평균 점수 계산 및 출력
    avg_bleu = total_bleu / count
    avg_rouge1 = total_rouge1 / count
    avg_rougeL = total_rougeL / count

    print("\n======================================================")
    print(f"Average ROUGE-1: {avg_rouge1:.4f}")
    print(f"Average ROUGE-L: {avg_rougeL:.4f}")
    print(f"Average BLEU: {avg_bleu:.4f}")

evaluate_and_print_scores(data_pairs)

데이터 ID                      | ROUGE-1 | ROUGE-L | BLEU | 인공지능 대답
-------------------------------+---------+---------+------+---------------
nikluge-gtps-2023-train-003652 | 0.0000 |  0.0000 | 0.0079 | 공공장소 소란 만족도라는 표 제목을 고려할 때, `highlighted_...
nikluge-gtps-2023-train-006372 | 0.4000 |  0.4000 | 0.0172 | 주어진 table_title "공동주택의 금연구역 지정 현황"을 고려하여...

Average ROUGE-1: 0.2000
Average ROUGE-L: 0.2000
Average BLEU: 0.0125
