## 0. 모델, 토크나이저 다운로드 & 필요한 함수 선언

In [None]:
# Google Colab에 필요한 패키지 설치
!pip install konlpy rouge-score
!apt-get install -y openjdk-11-jdk
from google.colab import drive
from rouge_score import rouge_scorer
import json
import requests
import random
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from collections import Counter
from konlpy.tag import Kkma
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Kkma 형태소 분석기 초기화
kkma = Kkma()

# 형태소 분석 함수
def morphological_analysis(sentence):
    return kkma.morphs(sentence)

# BLEU 점수 계산 함수
def calculate_bleu(reference_tokens, candidate_tokens):
    return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=SmoothingFunction().method1)

# ROUGE-1 점수 계산 함수
def calculate_rouge_1(reference_tokens, candidate_tokens):
    ref_count = Counter(reference_tokens)
    cand_count = Counter(candidate_tokens)
    overlap = sum((ref_count & cand_count).values())

    precision = overlap / len(candidate_tokens) if len(candidate_tokens) > 0 else 0.0
    recall = overlap / len(reference_tokens) if len(reference_tokens) > 0 else 0.0
    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

    return precision, recall, f1_score

# ROUGE-L 점수 계산 함수
def calculate_rouge_l(reference_tokens, candidate_tokens):
    def lcs(X, Y):
        m = len(X)
        n = len(Y)
        L = [[0] * (n + 1) for _ in range(m + 1)]
        for i in range(m + 1):
            for j in range(n + 1):
                if i == 0 or j == 0:
                    L[i][j] = 0
                elif X[i - 1] == Y[j - 1]:
                    L[i][j] = L[i - 1][j - 1] + 1
                else:
                    L[i][j] = max(L[i - 1][j], L[i][j - 1])
        return L[m][n]

    lcs_length = lcs(reference_tokens, candidate_tokens)

    precision = lcs_length / len(candidate_tokens) if len(candidate_tokens) > 0 else 0.0
    recall = lcs_length / len(reference_tokens) if len(reference_tokens) > 0 else 0.0
    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

    return precision, recall, f1_score

huggingface_token = "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"

# 모델 및 토크나이저 로드
model = AutoModelForCausalLM.from_pretrained(
    "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    use_auth_token=huggingface_token
)
tokenizer = AutoTokenizer.from_pretrained(
    "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
    use_auth_token=huggingface_token
)

#=========================================================================
# ROUGE 점수 계산기 초기화=
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
smoothing_function = SmoothingFunction().method1

# GitHub의 JSONL 파일 URL
url = "https://raw.githubusercontent.com/beefed-up-geek/HCLT-KACL2024/main/Taeyoon_notebooks/240830_final_data.jsonl"

# JSONL 파일 다운로드
response = requests.get(url)
lines = response.text.strip().split('\n')

#인공지능의 마지막 대답만 추출하는 함수
def extract_last_response(input_text):
    start_index = input_text.rfind('[|assistant|]')
    if start_index != -1:
        return input_text[start_index + len('[|assistant|]'): len(input_text)-len("[|endofturn|]")].strip()
    return input_text

# 인공지능과 대화하는 함수
def chat_with_ai(user_inputs, print_all=False):
    messages = [
        {"role": "system", "content": "You are EXAONE model from LG AI Research, a helpful assistant."}
    ]

    for user_input in user_inputs:
        if user_input == "":
            break

        # 사용자 입력 추가
        messages.append({"role": "user", "content": user_input})

        # 대화 템플릿 적용 및 토큰화
        input_ids = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt"
        )

        # 모델을 사용해 응답 생성
        output = model.generate(
            input_ids.to("cuda"),
            eos_token_id=tokenizer.eos_token_id,
            max_new_tokens=512
        )

        # 인공지능 응답 추출
        ai_response = tokenizer.decode(output[0])
        ai_response = extract_last_response(ai_response)

        # 인공지능 응답을 대화에 추가
        messages.append({"role": "assistant", "content": ai_response})

    # 전체 대화 내역 출력 여부
    if print_all:
        for message in messages:
            role = message["role"].capitalize()
            print(f"{role}: {message['content']}\n")

    # 마지막 응답 반환
    return messages[-1]['content']

Collecting konlpy
  Downloading konlpy-0.6.0-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting JPype1>=0.7.0 (from konlpy)
  Downloading JPype1-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading konlpy-0.6.0-py2.py3-none-any.whl (19.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.4/19.4 MB[0m [31m85.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading JPype1-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (488 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m488.6/488.6 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=93fc0579977505598bb96d5f4d13dac241f7f

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

configuration_exaone.py:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct:
- configuration_exaone.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_exaone.py:   0%|          | 0.00/81.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct:
- modeling_exaone.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/70.7k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.93M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/563 [00:00<?, ?B/s]

## 1. 조건문 코드 예제

In [None]:
prompt = '''만약 사용자가 입력한 음식이 과일이면 0을 아니면 1을 출력해줘. 다른건 말하지말고 1과 0만 출력해줘.
아래는 사용자와의 대화 예시야
  <사용자> 자동차
  <AI> 0
  <사용자> 사과
  <AI> 1
  <사용자> 쌀밥
  <AI> 0
  <사용자> 딸기
  <AI> 1


아래는 실제 사용자 입력이야
<사용장> {}
<AI>'''.format("망고")
print(prompt)
print(chat_with_ai([prompt,"","","",""]))

만약 사용자가 입력한 음식이 과일이면 0을 아니면 1을 출력해줘. 다른건 말하지말고 1과 0만 출력해줘. 
아래는 사용자와의 대화 예시야
  <사용자> 자동차
  <AI> 0
  <사용자> 사과
  <AI> 1
  <사용자> 쌀밥
  <AI> 0
  <사용자> 딸기
  <AI> 1


아래는 실제 사용자 입력이야
<사용장> 망고
<AI>
1


## 2.위와 같은 원리로 과일 탐지 함수를 만든것

In [None]:
def is_fruit(fruit_name):
  prompt = '''만약 사용자가 입력한 음식이 과일이면 0을 아니면 1을 출력해줘. 다른건 말하지말고 1과 0만 출력해줘.
  아래는 사용자와의 대화 예시야
  <사용자> 자동차
  <AI> 0
  <사용자> 사과
  <AI> 1
  <사용자> 쌀밥
  <AI> 0
  <사용자> 딸기
  <AI> 1

  아래는 실제 사용자 입력이야
  <사용장> {}
  <AI>'''.format(fruit_name)
  result = chat_with_ai([prompt,"","","",""])[:1]
  if result == "1":
    return True
  else:
    return False

print("망고는 과일일까?: ",is_fruit("망고"))
print("단팥빵은 과일일까?: ",is_fruit("단팥빵"))
print("자동차는 과일일까?: ",is_fruit("자동차"))

망고는 과일일까?:  True
단팥빵은 과일일까?:  False
자동차는 과일일까?:  False
