# 맞춤법 검사기

240618

- [ref](https://github.com/FareedKhan-dev/AI-Spell-Checker/blob/main/streamlit_app.py)

In [1]:
## import
# API 키를 환경변수로 관리하기 위한 설정 파일
from dotenv import load_dotenv
import os

# API 키 정보 로드
load_dotenv()

print('API KEY(OPENAI): %s...'%os.environ.get("OPENAI_API_KEY")[:-30])
print('API KEY(GEMINI): %s...'%os.environ.get("GOOGLE_API_KEY")[:-30])
print('API KEY(CLAUDE): %s...'%os.environ.get("ANTHROPIC_API_KEY")[:-30])

## 화면 비율 조정
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

API KEY(OPENAI): sk-proj-d36MFnJ08dMAz6YqCP...
API KEY(GEMINI): AIzaSyC_3...
API KEY(CLAUDE): sk-ant-api03-fJhfiyWgdLkbw65N5vfvPNzEkQv6EZf9Lew6BMCJm2uQ7DOOsZwfsNZ9VGKQBF2DJ...


  from IPython.core.display import display, HTML


In [2]:
# import
import os
import re
import time
import random
random.seed(0)
import pandas as pd
from IPython.display import Markdown, display
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI  # for gemini
from langchain_anthropic import ChatAnthropic  # for claude
from pprint import pprint
import ast

In [3]:
## CONFIG
config_verbose = False
# config_model_name="gpt-3.5-turbo"  # 12s, gpt-3.5-turbo-0125	$0.50 / 1M tokens	$1.50 / 1M tokens
config_model_name="gpt-4-turbo-preview"  # 30s, gpt-4-0125-preview	$10.00 / 1M tokens	$30.00 / 1M tokens
# config_model_name="gpt-4o-2024-05-13"  # 30s, gpt-4-0125-preview	$10.00 / 1M tokens	$30.00 / 1M tokens
# config_model_name="gemini-pro"  # 8s, GOOGLE GEMINI 모델
# config_model_name="claude-3-opus-20240229"  # 46s, claude 3 모델

# config_temperature=0.0  # 다양한 댓글을 생성하기 위해 큰 숫자를 줄까?
config_temperature=0.3  # 다양한 댓글을 생성하기 위해 큰 숫자를 줄까?
# config_temperature=0.6  # 다양한 댓글을 생성하기 위해 큰 숫자를 줄까?
# config_temperature=0.9  # 다양한 댓글을 생성하기 위해 큰 숫자를 줄까?


## Model Setting ###################################################
# setting model
if config_model_name=="gemini-pro":
    llm_model = ChatGoogleGenerativeAI(model=config_model_name,
                                                          temperature=config_temperature)
elif config_model_name=="claude-3-opus-20240229":
    llm_model = ChatAnthropic(model_name=config_model_name,
                                                  temperature=config_temperature)
else:
    llm_model = ChatOpenAI(model_name=config_model_name,
                                               temperature=config_temperature)


prompt_template_spell_check = '''\
You are a spell checker.

Input: {user_input}

Find the misspellings in the input sentence and output them in dictionary format.
Just output the dictionary without any further explanation.
'''

# setting pre prompt
_prompt_template_spell_check = PromptTemplate.from_template(prompt_template_spell_check)

# make LCEL
chain_spell_checker = _prompt_template_spell_check | llm_model | StrOutputParser()


In [4]:
# # run
# input_sentence = '아버지과 방에 들어가신다'

# time_start = time.time()

# if config_verbose:
#     print(prompt_template_spell_check.format(user_input=input_sentence))


# # 입력 파라미터 예시
# input_params = {
#     "user_input": input_sentence,
# }

# # # api 콜
# result = chain_spell_checker.invoke(input_params)
# json_string = result.replace('```json\n', '').replace('\n```', '')  # 불필요한 부분 제거
# result_dictionary = json.loads(json_string)  # JSON 문자열을 Python dictionary로 변환

# sentence_output = input_sentence
# for key in list(result_dictionary.keys()):
#     value = result_dictionary[key]
#     sentence_output = sentence_output.replace(key, f'<s><span style="background-color:red">{key}</span></s>→<span style="background-color:lightgreen">{value}</span>')

# print(f'## Number of Misspelling: {len(result_dictionary)} ##')
# display(Markdown(sentence_output))
# pprint(result_dictionary)

# time_end = time.time()
# print(f"Elapsed time: {time_end - time_start:.1f} seconds.")

In [5]:
# run
def run_llm_misspelling(input_sentence: str) -> tuple:
    time_start = time.time()

    if config_verbose:
        print(prompt_template_spell_check.format(user_input=input_sentence))

    # 입력 파라미터 예시
    input_params = {
        "user_input": input_sentence,
    }

    # # api 콜
    result = chain_spell_checker.invoke(input_params)
    json_string = result.replace('```json\n', '').replace('\n```', '')  # 불필요한 부분 제거
    result_dictionary = json.loads(json_string)  # JSON 문자열을 Python dictionary로 변환

    sentence_output = input_sentence
    sentence_output_corrected = input_sentence
    for key in list(result_dictionary.keys()):
        value = result_dictionary[key]
        sentence_output = sentence_output.replace(key, f'<s><span style="background-color:red">{key}</span></s>→<span style="background-color:lightgreen">{value}</span>')
        sentence_output_corrected = sentence_output_corrected.replace(key, value)
        
    print(f'## Number of Misspelling: {len(result_dictionary)} ##')
    display(Markdown(sentence_output))
    pprint(result_dictionary)

    elapsed_time = time_start - time_start
    print(f"Elapsed time: {elapsed_time:.1f} seconds.")
    
    return result_dictionary, sentence_output, sentence_output_corrected, elapsed_time

# input_sentence = '아버지가방에들어가신다'
# input_sentence = '뭐라 씨부리 쌋노 이 쌔끼가 말이야 참말로 다가'
input_sentence = '막네동생이 뒤땅을 까다까 하다 말고 구시렁됐다.'
# input_sentence = 'What the fuck are you doingg?? boi?'

result_dictionary, sentence_output, sentence_output_corrected, elapsed_time = run_llm_misspelling(input_sentence=input_sentence)

## Number of Misspelling: 4 ##


<s><span style="background-color:red">막네동생이</span></s>→<span style="background-color:lightgreen">막내동생이</span> <s><span style="background-color:red">뒤땅을</span></s>→<span style="background-color:lightgreen">뒷담을</span> <s><span style="background-color:red">까다까</span></s>→<span style="background-color:lightgreen">까닥</span> 하다 말고 <s><span style="background-color:red">구시렁됐다</span></s>→<span style="background-color:lightgreen">구시렁거렸다</span>.

{'구시렁됐다': '구시렁거렸다', '까다까': '까닥', '뒤땅을': '뒷담을', '막네동생이': '막내동생이'}
Elapsed time: 0.0 seconds.


In [6]:
# run gradio
import gradio as gr
import json
from IPython.display import Markdown, display
from pprint import pprint
import time

interface = gr.Interface(
    fn=run_llm_misspelling,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence here...", value="막네동생이 뒤땅을 까다까 하다 말고 구시렁됐다."),
    outputs=[
        gr.JSON(label="Misspelling Dictionary"),
        gr.HTML(label="Annotated Sentence"),
        gr.Textbox(label="Corrected Sentence"),
        gr.Textbox(label="Elapsed Time")
    ],
    title="Spell Checker",
    description="Enter a sentence to check for misspellings and get corrections.",
    examples=['뭐라 씨부리 쌋노 이 쌔끼가 말이야 참말로 다가',
              '막네동생이 뒤땅을 까다까 하다 말고 구시렁됐다.',
              'What the fuck are you doingg?? boi?'],
)

interface.launch(debug=True)


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


IMPORTANT: You are using gradio version 4.21.0, however version 4.29.0 is available, please upgrade.
--------
## Number of Misspelling: 4 ##


<s><span style="background-color:red">막네동생이</span></s>→<span style="background-color:lightgreen">막내동생이</span> <s><span style="background-color:red">뒤땅을</span></s>→<span style="background-color:lightgreen">뒷담을</span> <s><span style="background-color:red">까다까</span></s>→<span style="background-color:lightgreen">까닥</span> 하다 말고 <s><span style="background-color:red">구시렁됐다</span></s>→<span style="background-color:lightgreen">구시렁거렸다</span>.

{'구시렁됐다': '구시렁거렸다', '까다까': '까닥', '뒤땅을': '뒷담을', '막네동생이': '막내동생이'}
Elapsed time: 0.0 seconds.
## Number of Misspelling: 4 ##


<s><span style="background-color:red">막네동생이</span></s>→<span style="background-color:lightgreen">막내동생이</span> <s><span style="background-color:red">뒤땅을</span></s>→<span style="background-color:lightgreen">뒷담을</span> <s><span style="background-color:red">까다까</span></s>→<span style="background-color:lightgreen">까댔다가</span> 하다 말고 <s><span style="background-color:red">구시렁됐다</span></s>→<span style="background-color:lightgreen">구시렁거렸다</span>.

{'구시렁됐다': '구시렁거렸다', '까다까': '까댔다가', '뒤땅을': '뒷담을', '막네동생이': '막내동생이'}
Elapsed time: 0.0 seconds.
## Number of Misspelling: 4 ##


<s><span style="background-color:red">뭐라</span></s>→<span style="background-color:lightgreen">뭐라고</span> <s><span style="background-color:red">씨부리</span></s>→<span style="background-color:lightgreen">시부리</span> <s><span style="background-color:red">쌋노</span></s>→<span style="background-color:lightgreen">쏘았노</span> 이 <s><span style="background-color:red">쌔끼가</span></s>→<span style="background-color:lightgreen">새끼가</span> 말이야 참말로 다가

{'뭐라': '뭐라고', '쌋노': '쏘았노', '쌔끼가': '새끼가', '씨부리': '시부리'}
Elapsed time: 0.0 seconds.
Keyboard interruption in main thread... closing server.


