In [1]:
# 환경설정

!pip3 install transformers
!pip3 install torch
!pip3 install konlpy
!pip3 install jamo
!pip3 install sentence-transformers
!pip3 install flask
!pip3 install flask-ngrok

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 5.3 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 35.2 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 35.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 44.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 6.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting 

In [2]:
# 시험데이터셋 수집

!wget https://raw.githubusercontent.com/aifactory-team/hanryubank/main/warehouse.zip
!unzip -q warehouse.zip

--2021-11-23 14:57:52--  https://raw.githubusercontent.com/aifactory-team/hanryubank/main/warehouse.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15349 (15K) [application/zip]
Saving to: ‘warehouse.zip’


2021-11-23 14:57:53 (55.5 MB/s) - ‘warehouse.zip’ saved [15349/15349]



In [3]:
# 모듈 설정

from transformers import ElectraTokenizer, ElectraForQuestionAnswering, pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd

tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-small-v2-distilled-korquad-384")
model = ElectraForQuestionAnswering.from_pretrained("monologg/koelectra-small-v2-distilled-korquad-384")
qa = pipeline("question-answering", tokenizer=tokenizer, model=model)
qs = SentenceTransformer('bespin-global/klue-korsts-roberta-base-sentence-embedding') # qs question similarity

import konlpy
okt = konlpy.tag.Okt()

from jamo import h2j, j2hcj

vowels = ['ㅣ', 'ㅔ', 'ㅐ', 'ㅏ', 'ㅜ', 'ㅗ', 'ㅓ', 'ㅡ', 'ㅟ', 'ㅚ', 'ㅑ', 'ㅕ', 'ㅛ', 'ㅠ', 'ㅒ', 'ㅖ', 'ㅘ', 'ㅝ', 'ㅙ', 'ㅞ', 'ㅢ']

Downloading:   0%|          | 0.00/249k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.3M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.11k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/744 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/443M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/156 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/495k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/587 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/248k [00:00<?, ?B/s]



In [4]:
# 입력된 질문(query)에 해당하는 유사한 질문을 찾고, 그 유사한 질문에 해당하는 지문을 검색하는 함수

def get_context_and_question(query:str, ts:float):

    # 데이터베이스 로딩
    df = pd.read_csv('dataset_test.csv')
    df = df.iloc[:24, :8]

    # 유사도
    sim = []
    # 데이터베이스에서 사용자 질의와 유사한 질문 획득
    for db_question in df['Q1 (질문1)']:
      
      # 문장에 대해 사전 학습된 문맥 정보를 활용한 임베딩 결과로 인코딩
      embedding1 = qs.encode(db_question, convert_to_tensor=True)
      embedding2 = qs.encode(query, convert_to_tensor=True)
      # compute similarity scores of two embeddings
      cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
      sim.append(cosine_scores.item())
    
    # 유사 질문에 해당하는 지문과 질문 획득
    context = df.loc[sim.index(max(sim))]["Context (지문)"]
    question = df.loc[sim.index(max(sim))]["Q1 (질문1)"]

    return context, question

In [5]:
# 지문과 질문을 입력하여 정답을 얻는 함수

def get_answer_with_context(context, question):
    result = qa({"question": question, "context": context})
    score = result["score"]
    start = result["start"]
    end = result["end"]
    answer = result["answer"]
    return answer, start, end, score

In [6]:
# 해답의 어미를 바꾸는 함수

def modify_answer(before_answer):

    document = before_answer
    
    clean_words = []

    for word in okt.pos(document, stem=True):
        if word[1] not in ['Josa', 'Eomi', 'Punctuation']:
            clean_words.append(word[0])
    
    last_char = j2hcj(h2j(clean_words[-1]))[-1]

    if last_char in vowels:
        clean_words.append('라네')
    else:
        clean_words.append('이라네')

    after_answer = ''.join(clean_words)

    return after_answer

In [7]:
# 질문으로 답 얻기

def print_answer(query):

    # 질문에 해당하는 데이터베이스에 저장된 유사 질문 및 지문 획득
    context, question = get_context_and_question(query, 0.5)

    # 지문과 질문을 입력해서 정답을 얻음
    answer, start, end, score = get_answer_with_context(context, question)

    # 출력
    print("query: " + query)
    print("question: " + question)
    print("predict: {0} ({1},{2},{3})".format(answer, start, end, score))
    print("service: " + modify_answer(answer))

In [8]:
print_answer('훈민정음을 창제한 이유')
print_answer('훈민정음을 왜 창제했지?')
print_answer('훈민정음을 만든 이유는?')
print_answer('세종대왕이 훈민정음을 창제한 이유는?')
print_answer('세종대왕이 훈민정음을 왜 창제했을까?')
print_answer('세종대왕은 왜 훈민정음을 창제했지?')

print_answer('조세에 관하여 무엇을 시행했는가')
print_answer('세종대왕이 조세와 관련해서 뭘 시행했지?')
print_answer('조세랑 관련해서 뭘 했지?')

query: 훈민정음을 창제한 이유
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 훈민정음을 왜 창제했지?
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 훈민정음을 만든 이유는?
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 세종대왕이 훈민정음을 창제한 이유는?
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 세종대왕이 훈민정음을 왜 창제했을까?
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 세종대왕은 왜 훈민정음을 창제했지?
question: 훈민정음을 창제한 이유
predict: 애민 정신을 (320,326,0.6614944338798523)
service: 애민정신이라네
query: 조세에 관하여 무엇을 시행했는가
question: 조세에 관하여 무엇을 시행했는가
predict: 여론 조사를 (30,36,0.9997721314430237)
service: 여론조사라네
query: 세종대왕이 조세와 관련해서 뭘 시행했지?
question: 조세에 관하여 무엇을 시행했는가
predict: 여론 조사를 (30,36,0.9997721314430237)
service: 여론조사라네
query: 조세랑 관련해서 뭘 했지?
question: 조세에 관하여 무엇을 시행했는가
predict: 여론 조사를 (30,36,0.9997721314430237)
service: 여론조사라네


In [9]:
from flask import *
from flask_ngrok import run_with_ngrok
import flask

app = Flask(__name__)

@app.route('/')
@app.route('/index')
def index():
    return flask.render_template('index.html')

@app.route('/hello')
def hello():
  return 'Hello World'

@app.route('/answer', methods=['GET', 'POST'])
def answer():
    if request.method == 'POST':
        query = str(request.form['query'])

        # 질문에 해당하는 데이터베이스에 저장된 유사 질문 및 지문 획득
        context, question = get_context_and_question(query, 0.5)

        # 지문과 질문을 입력해서 정답을 얻음
        db_answer, start, end, score = get_answer_with_context(context, question)

        service_answer = modify_answer(db_answer)

    return flask.render_template('answer.html', service_answer = service_answer, db_answer = db_answer, score=score, start=start, end=end, query=query, question=question, context=context)


In [10]:
run_with_ngrok(app)
app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
Exception in thread _colab_inspector_thread:
Traceback (most recent call last):
  File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.7/dist-packages/google/colab/_debugpy.py", line 59, in inspector_thread
    _variable_inspector.run(shell, time)
  File "/usr/local/lib/python3.7/dist-packages/google/colab/_variable_inspector.py", line 28, in run
    globals().clear()
TypeError: 'module' object is not callable



 * Running on http://359f-34-66-5-239.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 14:59:37] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 14:59:37] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 14:59:49] "[37mPOST /answer HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:00:01] "[37mGET /index HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:00:25] "[37mPOST /answer HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:00:33] "[37mGET /index HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:01:03] "[37mPOST /answer HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:01:31] "[37mPOST /answer HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Nov/2021 15:02:22] "[37mGET /index HTTP/1.1[0m" 200 -
