In [1]:
import os
import re
import sys
import pickle

import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.backend_bases import RendererBase
from scipy import signal
from scipy.io import wavfile
from PIL import Image
from transformers import BertModel, BertTokenizer
import torch
from scipy.fftpack import fft
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

import random
import torch.optim as optim
from transformers import ElectraModel, ElectraTokenizer

AUDIO_SR = 16000
WAV_DIR = '/data/speechdb/aihub_emotion/wav/'

In [2]:
from tqdm import tqdm

In [3]:
def audio2spectrogram(filepath):
    #fig = plt.figure(figsize=(5,5))
    samplerate, test_sound  = wavfile.read(filepath, mmap=True)
    
    
    #print('samplerate',samplerate)
    _, spectrogram = log_specgram(test_sound, samplerate)
    #print(spectrogram.shape)
    #print(type(spectrogram))
    #plt.imshow(spectrogram.T, aspect='auto', origin='lower')
    return spectrogram
    
def audio2wave(filepath):
    fig = plt.figure(figsize=(5,5))
    samplerate, test_sound  = wavfile.read(filepath, mmap=True)
    plt.plot(test_sound)
    
# def log_specgram(audio, sample_rate, window_size=40, step_size=20, eps=1e-10):
def log_specgram(audio, sample_rate, window_size=40, step_size=20, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    #print('noverlap',noverlap)
    #print('nperseg',nperseg)
    freqs, _, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, np.log(spec.T.astype(np.float32) + eps)

In [4]:
f1 = '/data/speechdb/aihub_emotion/wav/5e25902f305bcf3ad153a6a9.wav'
f2 = '/data/speechdb/aihub_emotion/wav/5e378eaec8c25f16cd1453c4.wav'

In [5]:
# samples, sample_rate = librosa.load(os.path.join(TRAIN_PATH, label, fname), sr=AUDIO_SR)
# def log_specgram(wav, sr, eps=1e-8):
#     '''
#     로그 스펙트로그램 변환
#     '''
#     D = librosa.stft(wav, n_fft=240, hop_length=60, win_length=240, window='hamming')
#     spect, phase = librosa.magphase(D)
#     return np.log(spect + eps)

# #def _mel_specgram(wav, sr, eps=1e-8):
# #    '''
# #    멜 스펙트로그램 변환 (오래걸림)
# #    '''
# #    M = librosa.feature.melspectrogram(wav, sr=sr, n_mels=121, hop_length=60, fmin=20, fmax=4000)
# #    melgram = librosa.logamplitude(M, ref_power=1.0)
# #    return melgram

# def mel_specgram(wav, sr, eps=1e-8):
#     '''
#     멜 스펙트로그램 변환
#     '''
#     mel = librosa.feature.melspectrogram(wav, sr=sr, n_mels=121, hop_length=600, fmin=10, fmax=8000).astype(np.float32)
#     melgram, phase = librosa.magphase(mel)
#     melgram= np.log(melgram + eps)
#     return melgram

In [4]:
N_CHANNELS = 3
def get_3d_spec(Sxx_in, moments=None):
    if moments is not None:
        (base_mean, base_std, delta_mean, delta_std,
             delta2_mean, delta2_std) = moments
    else:
        base_mean, delta_mean, delta2_mean = (0, 0, 0)
        base_std, delta_std, delta2_std = (1, 1, 1)
    h, w = Sxx_in.shape
    right1 = np.concatenate([Sxx_in[:, 0].reshape((h, -1)), Sxx_in], axis=1)[:, :-1]
    delta = (Sxx_in - right1)[:, 1:]
    delta_pad = delta[:, 0].reshape((h, -1))
    delta = np.concatenate([delta_pad, delta], axis=1)
    right2 = np.concatenate([delta[:, 0].reshape((h, -1)), delta], axis=1)[:, :-1]
    delta2 = (delta - right2)[:, 1:]
    delta2_pad = delta2[:, 0].reshape((h, -1))
    delta2 = np.concatenate([delta2_pad, delta2], axis=1)
    base = (Sxx_in - base_mean) / base_std
    delta = (delta - delta_mean) / delta_std
    delta2 = (delta2 - delta2_mean) / delta2_std
    stacked = [arr.reshape((h, w, 1)) for arr in (base, delta, delta2)]
    return np.concatenate(stacked, axis=2)

In [5]:
df_train = pd.read_csv('./data/SER_train.tsv', sep='\t')
df_valid = pd.read_csv('./data/SER_valid.tsv', sep='\t')

In [6]:
label2id = {l:i for i, l in enumerate(df_train['label'].unique())}
id2label = {i:l for l, i in label2id.items()}

In [7]:
label2id

{'FEAR': 0,
 'DISGUST': 1,
 'NEUTRAL': 2,
 'ANGRY': 3,
 'SADNESS': 4,
 'HAPPINESS': 5,
 'SURPRISE': 6}

In [8]:
audio_wav_dict = pickle.load(open('../2108_speech_emotion/data/audio_vectors_sr16k.pkl', 'rb'))

In [9]:
def log_specgram(wav, sr=16000, eps=1e-8):
    '''
    로그 스펙트로그램 변환
    '''
    D = librosa.stft(wav, n_fft=240, hop_length=120, win_length=240, window='hamming')
    spect, phase = librosa.magphase(D)
    return np.log(spect + eps)

In [17]:
len(audio_wav_dict)

0

In [11]:
# %%time
# ### Validation dataset
# features_valid = []
# for _, row in tqdm(df_valid.iterrows()):
#     wav = audio_wav_dict[row['wav_id']]
#     spector = log_specgram(wav)
#     spector=get_3d_spec(spector)
#     input_tensor=torch.tensor(np.transpose(spector,(2,1,0))) # (channel, time, freq)
#     input_batch = input_tensor.unsqueeze(0)
#     features_valid.append({
#      'wav_id': row['wav_id'],
#      'text': row['발화문'],
#      'sprectrome':input_batch,
#      'label': label2id[row['label']]
#     })
#     del audio_wav_dict[row['wav_id']]
# pickle.dump(features_valid, open('./data/features_valid.pkl', 'wb'), protocol=4)
# del features_valid

13198it [02:45, 79.87it/s]


CPU times: user 4min 34s, sys: 18.9 s, total: 4min 52s
Wall time: 3min 30s


In [12]:
# %%time
# ### Train dataset
# features_train = []
# for _, row in tqdm(df_train.iterrows()):
#     wav = audio_wav_dict[row['wav_id']]
#     spector = log_specgram(wav)
#     spector=get_3d_spec(spector)
#     input_tensor=torch.tensor(np.transpose(spector,(2,1,0))) # (channel, time, freq)
#     input_batch = input_tensor.unsqueeze(0)
#     features_train.append({
#      'wav_id': row['wav_id'],
#      'text': row['발화문'],
#      'sprectrome':input_batch,
#      'label': label2id[row['label']]
#     })
#     del audio_wav_dict[row['wav_id']]
# pickle.dump(features_train, open('./data/features_train.pkl', 'wb'), protocol=4)

30793it [08:20, 61.53it/s]


CPU times: user 13min 34s, sys: 40.6 s, total: 14min 14s
Wall time: 9min 44s


In [15]:
# X, y = pickle.load(open(data_file, 'rb'))

In [16]:
# batch_encoding = tokenizer.batch_encode_plus(
#         [(example.text_a, example.text_b) for example in examples],
#         max_length=max_length,
#         padding="max_length",
#         add_special_tokens=True,
#         truncation=True,
#     )

## 1) Audio

In [18]:
import torch
import torch.nn as nn
#from .utils import load_state_dict_from_url
from torch.hub import load_state_dict_from_url

# __all__ = ['AlexNet', 'alexnet']


model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.num_classes=num_classes
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((12, 12))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        print('features',x.shape)
        
        #x = self.avgpool(x)
        #print('avgpool',x.shape)
        #x = torch.flatten(x, 1)
        #print('flatten',x.shape)
        #x = self.classifier(x)
        return x
    
def alexnet(pretrained=False, progress=True, **kwargs):
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model


class ModifiedAlexNet(nn.Module):
    def __init__(self, num_classes=7):
        super(ModifiedAlexNet, self).__init__()
        self.num_classes=num_classes
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256, num_classes),
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)
        #print('features',x.shape)
        x=torch.flatten(x, start_dim=2)#a1,a2,a3......al{a of dim c} 
        x=torch.sum(x, dim=2)#a1*alpha1+a2*alpha2+.......+al*alphal
        #print(x.shape)
        x=self.classifier(x)
        #print('classifier',x)
        #x=self.softmax(x)
        #print('softmax',x)
        #x = self.avgpool(x)
        #print('avgpool',x.shape)
        #x = torch.flatten(x, 1)
        #print('flatten',x.shape)
        #x = self.classifier(x)
        return x
    
def modifiedAlexNet(pretrained=False, progress=True, **kwargs):
    model_modified = ModifiedAlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model_modified.load_state_dict(state_dict)
    return model_modified

In [19]:
original_model = alexnet(pretrained=True)
original_dict = original_model.state_dict()
modifiedAlexNet = modifiedAlexNet(pretrained=False)
modified_model_dict = modifiedAlexNet.state_dict()
pretrained_modified_model_dict = {k: v for k, v in original_dict.items() if k in modified_model_dict}

## 1) Text

In [22]:
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from transformers import ElectraForSequenceClassification
from transformers import BertModel
# model = BertModel.from_pretrained("monologg/kobert")

In [23]:
kobert = BertForSequenceClassification.from_pretrained(
    "monologg/kobert", 
    num_labels = 7,   
    output_attentions = False,
    output_hidden_states = False, 
)
print(model)

# koelectra = ElectraForSequenceClassification.from_pretrained(
#     "monologg/koelectra-base-v3-discriminator", 
#     num_labels = 7,   
#     output_attentions = False,
#     output_hidden_states = False, 
# )
# # print(model)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(8002, 768, padding_idx=1)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )

In [29]:
## Define Hook to extract the intermediate OutPut
# hook : https://hongl.tistory.com/157 
outputs_text= []
def hook_text(module, input, output):
    outputs_text.clear()
    outputs_text.append(output)
    return None

outputs_audio= []
def hook_audio(module, input, output):
    outputs_audio.clear()
    outputs_audio.append(output)
    return None

In [30]:
class CombinedAudioTextModel(nn.Module):
    def __init__(self, num_classes, tokenizer, text_model, audio_model):
        super(CombinedAudioTextModel, self).__init__()
        self.num_classes=num_classes
#         self.tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
        self.tokenizer = tokenizer
#         self.tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

#         self.text_model=torch.load('/content/drive/My Drive/savedModel/model_text.pt')
#         self.audio_model=torch.load('/content/drive/My Drive/savedModel/model_audio_old_optimizer.pt')
        self.text_model = text_model
        self.audio_model = audio_model
        
    
#         self.text_model.electra.register_forward_hook(hook_text)
        self.text_model.bert.pooler.register_forward_hook(hook_text)
        self.audio_model.features.register_forward_hook(hook_audio)

        for param in self.text_model.parameters():
            param.requires_grad = False
        for param in self.audio_model.parameters():
            param.requires_grad = False

        self.dropout = nn.Dropout(.5)
        self.linear = nn.Linear(1024, num_classes)

        self.softmax = nn.Softmax(dim=1)

    def forward(self,text,audio):
        self.text_model(text)
        self.audio_model(audio)
        audio_embed=outputs_audio[0]
        text_embed=outputs_text[0]
        audio_embed=torch.flatten(audio_embed, start_dim=2)#a1,a2,a3......al{a of dim c} 
        audio_embed=torch.sum(audio_embed, dim=2)
        concat_embded=torch.cat((text_embed,audio_embed),1)
        x=self.dropout(concat_embded)
        x=self.linear(x)
        return x

In [31]:
tokenizer = BertTokenizer.from_pretrained('monologg/kobert')

model=CombinedAudioTextModel(num_classes=7, tokenizer=tokenizer, text_model=kobert, audio_model=modifiedAlexNet)
model.to('cuda')

CombinedAudioTextModel(
  (text_model): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(8002, 768, padding_idx=1)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=

In [48]:
# del model

# Train

In [80]:
from tokenization_kobert import KoBertTokenizer
tokenizer = KoBertTokenizer.from_pretrained('monologg/kobert')

In [48]:
optimizer = optim.Adam(params=model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
writer = SummaryWriter(log_dir='./log/')

In [49]:
total_steps = 1
NUM_EPOCHS=20
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
# tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
# tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

In [50]:
model.train()
model.to('cuda')

CombinedAudioTextModel(
  (text_model): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(8002, 768, padding_idx=1)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=

In [51]:
# len(train_list)

In [52]:
# features_train

In [53]:
%%time
for epoch in range(NUM_EPOCHS):
    random.shuffle(features_train)
    for every_trainlist in tqdm(features_train):
        label1=every_trainlist['label']
        text=every_trainlist['text']
        label1=torch.tensor([label1])
        sprectrome=every_trainlist['sprectrome']
        if(sprectrome.shape[2]>65):
            optimizer.zero_grad()
            input_ids = torch.tensor(tokenizer.encode(text, add_special_tokens=True)).unsqueeze(0) 
            sprectrome = sprectrome.to('cuda')
            label1=label1.to('cuda')
            input_ids=input_ids.to('cuda')
            output = model(input_ids,sprectrome)
            loss = criterion(output, label1)
            loss.backward()
            optimizer.step()
            _, preds = torch.max(output, 1)
            accuracy = torch.sum(preds == label1)
            if total_steps % 100 == 0:
                with torch.no_grad():
                    _, preds = torch.max(output, 1)
                    accuracy = torch.sum(preds == label1)
                    writer.add_scalar('loss', loss.item(), total_steps)
                    writer.add_scalar('accuracy', accuracy.item(), total_steps)                     
            total_steps+=1
    lr_scheduler.step()

100%|██████████| 30793/30793 [13:32<00:00, 37.92it/s]
 35%|███▌      | 10814/30793 [05:33<10:16, 32.40it/s]


KeyboardInterrupt: 

In [36]:
# TypeError: expected Tensor as element 0 in argument 0, but got BaseModelOutputWithPastAndCrossAttentions

In [None]:
detected call of lr_scheduler.step before optimizer.step

In [38]:
accuracy

tensor(0, device='cuda:0')

In [39]:
_, preds = torch.max(output, 1)

In [47]:
features_train

[{'wav_id': '5f83ec249e04b149046cc789',
  'text': '코로나 땜에 밖에 나가지도 못하고 집에서 일만 하고 있어.',
  'sprectrome': tensor([[[[-1.8421e+01, -1.8421e+01, -1.8421e+01,  ..., -1.8421e+01,
             -1.8421e+01, -1.8421e+01],
            [-1.8421e+01, -1.8421e+01, -1.8421e+01,  ..., -1.8421e+01,
             -1.8421e+01, -1.8421e+01],
            [-1.8421e+01, -1.8421e+01, -1.8421e+01,  ..., -1.8421e+01,
             -1.8421e+01, -1.8421e+01],
            ...,
            [-3.7329e+00, -4.1331e+00, -6.4639e+00,  ..., -1.0230e+01,
             -1.1257e+01, -1.2965e+01],
            [-5.8022e+00, -4.4692e+00, -4.4978e+00,  ..., -9.8265e+00,
             -1.1638e+01, -1.0675e+01],
            [-4.6353e+00, -4.5992e+00, -4.8927e+00,  ..., -1.0402e+01,
             -1.1788e+01, -1.1486e+01]],
  
           [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
              0.0000e+00,  0.0000e+00],
            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
              0.0000e+00,  0.00

In [40]:
features_valid = pickle.load(open('./data/features_valid.pkl', 'rb'))

In [44]:
y_actu=[]
y_pred=[]
#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model.to('cpu')
model.eval()
for every_test_list in tqdm(features_valid[:1000]):
    label1=every_test_list['label']
    label1=torch.tensor([label1])
    sprectrome=every_test_list['sprectrome']
    text=every_test_list['text']
    input_ids = torch.tensor(tokenizer.encode(text, add_special_tokens=True)).unsqueeze(0)
    with torch.no_grad():
        if(sprectrome.shape[2]>65):
            #sprectrome = sprectrome.to('cuda')
            #label1=label1.to('cuda')
            output = model(input_ids,sprectrome)
            _, preds = torch.max(output, 1)
            y_actu.append(label1.numpy()[0])
            y_pred.append(preds.numpy()[0])

100%|██████████| 1000/1000 [02:59<00:00,  5.56it/s]


In [45]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

y_pred = [id2label[i] for i in y_pred]
y_actu = [id2label[i] for i in y_actu]
print(classification_report(y_actu, y_pred))

              precision    recall  f1-score   support

       ANGRY       0.32      0.26      0.29       175
     DISGUST       0.00      0.00      0.00        59
        FEAR       0.00      0.00      0.00        77
   HAPPINESS       0.00      0.00      0.00        97
     NEUTRAL       0.57      0.02      0.05       169
     SADNESS       0.44      0.94      0.60       401
    SURPRISE       0.00      0.00      0.00        22

    accuracy                           0.43      1000
   macro avg       0.19      0.17      0.13      1000
weighted avg       0.33      0.43      0.30      1000



  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
confusion_matrix(y_actu, y_pred)

array([[ 46,   0,   0,   0,   0, 129,   0],
       [ 12,   0,   0,   0,   0,  47,   0],
       [ 17,   0,   0,   0,   1,  59,   0],
       [ 25,   0,   0,   0,   1,  71,   0],
       [ 16,   0,   0,   0,   4, 149,   0],
       [ 24,   0,   0,   0,   1, 376,   0],
       [  6,   0,   0,   0,   0,  16,   0]])

In [60]:
for f in features_train[:30]:
    t = f['text']
    print(id2label[f['label']], t)

SADNESS 결과 듣고 지금 밖에 걸어가고 있는 중이야.
NEUTRAL 음 알았어. 고마워.
ANGRY 지금 한 시간도 넘었어.
SADNESS 주변에서 다 하길래. 돈도 많이 벌고. 그래서 나도 해봤어.
FEAR 친구랑 만나기로 한 달 전부터 약속한 거긴 한데 상황이 좋지 않으니까 약속을 미루고 싶어.
SADNESS 그래. 걱정해줘서 고마워.
NEUTRAL 그래 그게 좋겠다.
HAPPINESS 백화점 향수 이벤트야.
HAPPINESS 그런 것도 있냐? 어떤 건데?
ANGRY 처음엔 괜찮았는데 점점 걔가 날. 하.
NEUTRAL 엔터테인먼트 주식이야. 요새 케이팝이 대세잖아.
SADNESS 3년 전에 우리 오빠가 소개시켜 줬어.
FEAR 심장이 여전히 벌렁벌렁해.
SURPRISE 시키다가 엄청 깜짝 깜짝 놀랬어.
SADNESS 통장에 돈이 하나도 없어! 큰일 났네.
SADNESS 나 결국 헤어졌어.
ANGRY 집을 돼지우리로 만들어서 그래. 청소를 안해 청소를.
ANGRY 어! 진짜 기분 나빴어.
SADNESS 되게 많이 넘어지고 그래갖고 많이들 다쳤을거야, 아마. 내 친구는 발목도 다쳤다니까?
ANGRY 약속이 있어서 친구를 기다리는데 아직도 안 왔어.
NEUTRAL 나 오늘 짭새가 몰려있길래 궁금해서 구경 갔다왔어.
SADNESS 그러니까. 아. 이제 회사 그만두고 싶다.
SADNESS 엄마가 가장 힘들어하시지. 엄마가 제일 의지하고 계셨었거든.
NEUTRAL 다행이다. 거기 맛집이 많다는데 잘 몰라서 걱정했거든.
ANGRY 최근에 최근에 사람들이 반려견에 대한 관심이 높아지면서 많은 사람들이 반려견을 키우고 있대 그런데 키우다가 중간에 버리는 경우가 생긴다는 거야.
FEAR 생명에는 지장 없으시지만 병원에 며칠 입원해 계셔야해.
NEUTRAL 거의 선수급으로 잘하지.
HAPPINESS 회사에서 걸려 온 전화 때문에 너무 화가 나고 짜증이 났는데 너한테 이야기하고 나니까 마음이 풀렸어. 다음에도 너한테 이야기할게.
SADNESS 응. 자주 있는 일이야. 속

In [68]:
tokenizer.tokenize('[CLS] 안녕하세요 반갑습니다 [SEP]')

['[CLS]', '[UNK]', '[UNK]', '[SEP]']

In [64]:
t = '친구가 아직도 안 오고 있어!'

In [76]:
tokenizer.tokenize("[CLS] 한국어 ELECTRA를 공유합니다. [SEP]")

AttributeError: 'SentencePieceProcessor' object has no attribute 'encode'

In [71]:
tokenizer = BertTokenizer.from_pretrained("monologg/kobert")

In [None]:
>>> from tokenization_kobert import KoBertTokenizer
>>>  # monologg/distilkobert도 동일
>>> tokenizer.tokenize("[CLS] 한국어 모델을 공유합니다. [SEP]")
>>> ['[CLS]', '▁한국', '어', '▁모델', '을', '▁공유', '합니다', '.', '[SEP]']
>>> tokenizer.convert_tokens_to_ids(['[CLS]', '▁한국', '어', '▁모델', '을', '▁공유', '합니다', '.', '[SEP]'])
>>> [2, 4958, 6855, 2046, 7088, 1050, 7843, 54, 3]


In [77]:
from tokenization_kobert import KoBertTokenizer

In [78]:
from tokenization_kobert import KoBertTokenizer
tokenizer = KoBertTokenizer.from_pretrained('monologg/kobert')
tokenizer.tokenize("[CLS] 한국어 모델을 공유합니다. [SEP]")

In [79]:
tokenizer.tokenize("[CLS] 한국어 모델을 공유합니다. [SEP]")

AttributeError: 'SentencePieceProcessor' object has no attribute 'encode'