In [1]:
# !pip install gluonnlp pandas tqdm   
# !pip install mxnet
# !pip install sentencepiece==0.1.91
# !pip install transformers==4.8.2
# !pip install flask-ngrok

# !pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'
# !pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

In [1]:
from flask_ngrok import run_with_ngrok
from flask import Flask, request, render_template
import torch
import pickle
import numpy as np

In [2]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, tqdm_notebook
import pandas as pd

#transformers
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup
from transformers import BertModel

#GPU 사용 시
device = torch.device('cpu')

In [3]:
from kobert.pytorch_kobert import get_pytorch_kobert_model
from kobert_tokenizer import KoBERTTokenizer
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel, vocab = get_pytorch_kobert_model() 

using cached model. /Users/krc/year_dream/Project5/.cache/kobert_v1.zip
using cached model. /Users/krc/year_dream/Project5/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [4]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer,vocab, max_len,
                 pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         

    def __len__(self):
        return (len(self.labels))

In [5]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=4,   ##클래스 수 조정##
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device),return_dict=False)
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [7]:
model = BERTClassifier(bertmodel,  dr_rate=0.5)

model.load_state_dict(torch.load('./model_002.pt', map_location = device) )
model.eval()

BERTClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(8002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True

In [8]:
max_len = 128
batch_size = 32
warmup_ratio = 0.1
num_epochs = 100
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

In [9]:
tok=tokenizer.tokenize

In [10]:
def predict(predict_sentence):

    data = [predict_sentence, '0']
    dataset_another = [data]

    another_test = BERTDataset(dataset_another, 0, 1, tok, vocab, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=0)
    
    model.eval()

    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)

        valid_length= valid_length
        label = label.long().to(device)

        out = model(token_ids, valid_length, segment_ids)


        test_eval=[]
        for i in out:
            logits=i
            logits = logits.detach().cpu().numpy()

            if np.argmax(logits) == 0:
                test_eval.append("청소년 관람불가")
            elif np.argmax(logits) == 1:
                test_eval.append("15세 관람가")
            elif np.argmax(logits) == 2:
                test_eval.append("12세 관람가")
            elif np.argmax(logits) == 3:
                test_eval.append("전체 관람가")
    return test_eval[0]

In [11]:
import socket

HOST = '127.0.0.1'  # Standard loopback interface address (localhost)
PORT = 65432  

In [15]:
# app = Flask(__name__, template_folder='templates')

# run_with_ngrok(app)

# model = BERTClassifier(bertmodel,  dr_rate=0.5).to(device)
# model.load_state_dict(torch.load( './model/model_002.pt' , map_location=device))
# model.eval()

# @app.route('/')
# def home():
#     return render_template('index.html')

# @app.route('/getprediction' , methods = ['POST'])
# def getprediction():
#     input = request.form.values()
#     answer = predict(input)
#     return render_template('index.html' , output = 'Predicted Movie rate is : {}'.format(answer))

# app.run()

In [16]:
app = Flask(__name__, template_folder='templates')

run_with_ngrok(app)

@app.route('/', methods=['GET', 'POST'])
def index():
	if request.method == 'POST':
		data = request.form.get('text')
		predicted_rating = predict(data)

		return render_template('index.html', rating=predicted_rating)

	return render_template('index.html')


if __name__ == '__main__':
	app.run()

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
Exception in thread Thread-27:
Traceback (most recent call last):
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/threading.py", line 1254, in run
    self.function(*self.args, **self.kwargs)
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/site-packages/flask_ngrok.py", line 70, in start_ngrok
    ngrok_address = _run_ngrok()
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/site-packages/flask_ngrok.py", line 31, in _run_ngrok
    ngrok = subprocess.Popen([executable, 'http', '5000'])
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/subprocess.py", line 858, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "/Users/krc/opt/anaconda3/envs/dl/lib/python3.8/subprocess.py", line 1704, in _execute_child
    raise child_exception_type(errno_num, err_msg, err_fi