In [None]:
!pip install dill
!pip install minio
!pip install transformers

Collecting minio
  Using cached minio-7.1.12-py3-none-any.whl (76 kB)
Installing collected packages: minio
Successfully installed minio-7.1.12
Collecting transformers
  Using cached transformers-4.23.1-py3-none-any.whl (5.3 MB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Using cached tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
Collecting huggingface-hub<1.0,>=0.10.0
  Using cached huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
Collecting regex!=2019.12.17
  Using cached regex-2022.9.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (757 kB)
Installing collected packages: tokenizers, regex, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 regex-2022.9.13 tokenizers-0.13.1 transformers-4.23.1


In [None]:
import dill
from minio import Minio
import io

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import datetime as pydatetime
import numpy as np

from transformers import BertModel

#transformers
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [None]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, vocab,
                max_len, pad, pair):
   
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, vocab=vocab, pad=pad, pair=pair)
        
        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))
         
    def __len__(self):
        return (len(self.labels))

class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=2,   ##클래스 수 조정##
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

def get_kobert_model(model_path, vocab_file, ctx="cpu"):
    bertmodel = BertModel.from_pretrained(model_path, return_dict=False)
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         padding_token='[PAD]')
    return bertmodel, vocab_b_obj

In [None]:
def open_pkl_model(path):
    with open(path, 'rb') as f:
        model = dill.load(f)
        
    return model
    
def save_model(buffer, end_point, port, access_key, secret_key, model_info):
    print("[save hook] | 모델을 저장합니다.")

    minioClient = Minio(f'{end_point}:{port}',
                    access_key=access_key,
                    secret_key=secret_key,
                    secure=False)

    buffer_len = buffer.tell()
    buffer.seek(0)

    try:
        minioClient.put_object('models', model_info['file_name'], data=buffer, length=buffer_len)
    except Exception as e:
        raise e

    print("[save hook] | 모델이 저장되었습니다.")

In [None]:
timestamp = 1640757257
model = open_pkl_model(f"./models/model-{timestamp}.pkl")
buffer = io.BytesIO()
torch.save(model.state_dict(), buffer)
save_model(buffer, "ip", 9000, "admin", "pass", {"file_name": f'kobert/model-{timestamp}.pt'})

[save hook] | 모델을 저장합니다.
[save hook] | 모델이 저장되었습니다.
