In [9]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from pytorch_transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from torch.optim import Adam
import torch.nn.functional as F
#load dataset
train_df = pd.read_csv('../dataset/ratings_train.txt', sep='\t')
test_df = pd.read_csv('../dataset/ratings_test.txt', sep='\t')    

train_df = train_df.sample(frac=0.4, random_state=999)
test_df = test_df.sample(frac=0.4, random_state=999)

In [10]:
class Document(Dataset):
    ''' Naver Sentiment Movie Corpus Dataset 
        initializer : data frame
        length : length of each sentence(어절단위)
        get itemized tokens :
        text : 1st frame
        label : 1: positive expression 
                2: negative expression
    '''
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.iloc[idx, 1]
        label = self.df.iloc[idx, 2]
        return text, label

In [11]:
PATH='D:\\RAPA\\VHEX-Tech-NLP\\outcomes\\model.pth'

In [12]:
device = torch.device("cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased')
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [13]:
class Tester(Dataset):
    ''' initializer : data frame
        length : length of each sentence(어절단위)
        get itemized tokens :
        text : 1st frame
        label : 1: positive expression 
                2: negative expression
    '''
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.iloc[idx, 0]
        label = self.df.iloc[idx, 1]
        return text, label

def createtester(x,y):
    my_dict = {"a": [x], "b": [y]}
    df = pd.DataFrame(my_dict)
    #text = df.iloc[idx, 0]
    return df

In [17]:
#a=createtester('재미있고 정말정말 대단한 영화다. 10점 만점에 10점을 줄 수 밖에 없다!!!!!!!!!!',1)
a=createtester('재미도 하나도 없는 정말 쓰레기 같은 영화다',0)
b=Tester(a)

In [18]:
eval_loader = DataLoader(b, batch_size=2, shuffle=False, num_workers=0)
for text, label in eval_loader:
    encoded_list = [tokenizer.encode(t, add_special_tokens=True) for t in text]
    padded_list =  [e + [0] * (512-len(e)) for e in encoded_list]
    sample = torch.tensor(padded_list)
    sample, label = sample.to(device), label.to(device)
    labels = torch.tensor(label)
    outputs = model(sample, labels=labels)
    _, logits = outputs
pred = torch.argmax(F.softmax(logits), dim=1)
correct = pred.eq(labels)

  import sys
  # Remove the CWD from sys.path while we load stuff.


In [19]:
correct

tensor([True])