In [3]:
!pip install torch torchtext numpy



In [0]:
from torchtext import data,datasets

TEXT = data.Field(lower=True,batch_first=True)
LABEL = data.LabelField()

# make splits for data
train, val, test = datasets.SST.splits(TEXT, LABEL, 'data/',fine_grained=True)


In [0]:
# TEXT.build_vocab(train, vectors="fasttext.en.300d")
TEXT.build_vocab(train, vectors="glove.6B.300d")
LABEL.build_vocab(train,val,test)


In [6]:
print('len(TEXT.vocab)', len(TEXT.vocab))
print(LABEL.vocab.itos)
print(LABEL.vocab.stoi)
print('len(LABEL.vocab)', len(LABEL.vocab))   # vocab include '<unk>'
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())


len(TEXT.vocab) 16581
['negative', 'positive', 'neutral', 'very positive', 'very negative']
defaultdict(<function _default_unk_index at 0x7f0e4bff70d0>, {'negative': 0, 'positive': 1, 'neutral': 2, 'very positive': 3, 'very negative': 4})
len(LABEL.vocab) 5
TEXT.vocab.vectors.size() torch.Size([16581, 300])


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

_DEBUG=False

def ilog(*args,**kwargs):
    if _DEBUG:
        print(*args,**kwargs)
    
class textCNN(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[100,100,100]
        self.embeding = nn.Embedding.from_pretrained(embedding_matrix)
        self.convs = nn.ModuleList([nn.Conv2d(1, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        x = self.embeding(x)
        ilog('after embeding',x.size())
        x = x.unsqueeze(1)
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x
    
class textCNNMulti(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[100,100,100]
        self.static_embed = nn.Embedding.from_pretrained(embedding_matrix)
        self.non_static_embed = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.convs = nn.ModuleList([nn.Conv2d(2, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        non_static_input = self.non_static_embed(x)
        static_input = self.static_embed(x)
        x = torch.stack([non_static_input, static_input], dim=1)
        ilog('after embeding',x.size())
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x


class textCNNNonStatic(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[100,100,100]
        self.embeding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.convs = nn.ModuleList([nn.Conv2d(1, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        x = self.embeding(x)
        ilog('after embeding',x.size())
        x = x.unsqueeze(1)
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x

In [0]:
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(32, 256, 256),shuffle=True)


In [9]:
args={}
args['vocb_size']=len(TEXT.vocab)
args['dim']=300
args['n_class']=len(LABEL.vocab)
args['embedding_matrix']=TEXT.vocab.vectors
args['lr']=1e-5
args['epochs']=400
args['log_interval']=20
args['test_interval']=100
args['save_dir']='./'

print(args['vocb_size'])
print(args['n_class'])

16581
5


In [10]:
import os
import sys

import torch

import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(1)


<torch._C.Generator at 0x7f0e4c4c53b0>

In [11]:
torch.cuda.is_available()

True

In [13]:
from collections import deque

def save(model, save_dir, save_prefix, steps):
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    save_prefix = os.path.join(save_dir, save_prefix)
    save_path = '{}_steps_{}.pt'.format(save_prefix, steps)
    torch.save(model.state_dict(), save_path)



model=textCNNMulti(args)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
criterion = nn.CrossEntropyLoss()

best_acc = 0
last_step = 0
model.train()
steps=0


def create_early_stopping(patience):
    recent_metric = deque(maxlen=patience)
    best_metric = None

    def check(metric, model):
        nonlocal best_metric
        is_stop = False
        if not best_metric or metric > best_metric:
            print('save best_model.pt, metric: {}'.format(metric))
            best_metric = metric
            torch.save(model, 'best_model.pt')

        recent_metric.append(metric)

        if all([i < best_metric for i in recent_metric]):
            is_stop = True
        return is_stop

    return check


def eval(data_iter, model, args):
    model.eval()
    corrects, avg_loss = 0, 0
    for i,data in enumerate(data_iter):
        x, target = data.text, data.label
        x=x.to(device)

        target=target.to(device)

        logit = model(x)
        loss = F.cross_entropy(logit, target, reduction='sum')

        avg_loss += loss.item()
        corrects += (torch.max(logit, 1)
                     [1].view(target.size()).data == target.data).sum()

    size = len(data_iter.dataset)
    avg_loss /= size
    accuracy = 100.0 * int(corrects)/size
    print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(avg_loss, 
                                                                       accuracy, 
                                                                       corrects, 
                                                                       size))
    model.train()
    return accuracy

early_stop = create_early_stopping(150)

for epoch in range(1, args['epochs']+1):
    for i,data in enumerate(train_iter):
        steps+=1

        x, target = data.text, data.label
        x=x.to(device)
        target=target.to(device)

        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if steps % args['log_interval'] == 0:
            corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum()
            accuracy = 100.0 * int(corrects)/data.batch_size
            print(
                'Epoch [{}] Batch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(epoch,
                                                                         steps, 
                                                                         loss.item(), 
                                                                         accuracy,
                                                                         corrects,
                                                                         data.batch_size))
        if steps % args['test_interval'] == 0:
            val_acc = eval(val_iter, model, args)
            is_stop = early_stop(val_acc, model)
            if is_stop:
                raise RuntimeError('early stop')

        model.train()
print('final_result')


Epoch [1] Batch[20] - loss: 1.548777  acc: 28.1250%(9/32)
Epoch [1] Batch[40] - loss: 1.583792  acc: 21.8750%(7/32)
Epoch [1] Batch[60] - loss: 1.607926  acc: 28.1250%(9/32)
Epoch [1] Batch[80] - loss: 1.596044  acc: 34.3750%(11/32)
Epoch [1] Batch[100] - loss: 1.547954  acc: 34.3750%(11/32)

Evaluation - loss: 1.572960  acc: 26.3397%(290/1101) 

save best_model.pt, metric: 26.33969118982743


  "type " + obj.__name__ + ". It won't be checked "


Epoch [1] Batch[120] - loss: 1.546566  acc: 25.0000%(8/32)
Epoch [1] Batch[140] - loss: 1.559595  acc: 25.0000%(8/32)
Epoch [1] Batch[160] - loss: 1.658371  acc: 15.6250%(5/32)
Epoch [1] Batch[180] - loss: 1.504653  acc: 34.3750%(11/32)
Epoch [1] Batch[200] - loss: 1.631015  acc: 28.1250%(9/32)

Evaluation - loss: 1.564610  acc: 27.1571%(299/1101) 

save best_model.pt, metric: 27.157129881925524
Epoch [1] Batch[220] - loss: 1.515379  acc: 34.3750%(11/32)
Epoch [1] Batch[240] - loss: 1.652367  acc: 25.0000%(8/32)
Epoch [1] Batch[260] - loss: 1.584523  acc: 21.8750%(7/32)
Epoch [2] Batch[280] - loss: 1.604073  acc: 25.0000%(8/32)
Epoch [2] Batch[300] - loss: 1.540003  acc: 34.3750%(11/32)

Evaluation - loss: 1.560351  acc: 28.9737%(319/1101) 

save best_model.pt, metric: 28.97366030881017
Epoch [2] Batch[320] - loss: 1.518684  acc: 34.3750%(11/32)
Epoch [2] Batch[340] - loss: 1.571397  acc: 34.3750%(11/32)
Epoch [2] Batch[360] - loss: 1.610183  acc: 18.7500%(6/32)
Epoch [2] Batch[380] - 

RuntimeError: ignored

In [14]:
best_model=torch.load('best_model.pt')
best_model.eval()
eval(test_iter, best_model, args)


Evaluation - loss: 1.249011  acc: 45.1584%(998/2210) 



45.158371040723985