In [3]:
!pip install torch torchtext numpy



In [0]:
from torchtext import data,datasets

TEXT = data.Field(lower=True,batch_first=True)
LABEL = data.Field(sequential=False)

# make splits for data
train, val, test = datasets.SST.splits(TEXT, LABEL, 'data/',fine_grained=True)


In [5]:
# TEXT.build_vocab(train, vectors="fasttext.en.300d")
TEXT.build_vocab(train, vectors="glove.840B.300d")
LABEL.build_vocab(train,val,test)

.vector_cache/glove.840B.300d.zip: 2.18GB [10:29, 3.46MB/s]                            
100%|█████████▉| 2195594/2196017 [04:51<00:00, 7968.55it/s]

In [6]:
print('len(TEXT.vocab)', len(TEXT.vocab))
print('len(LABEL.vocab)', len(LABEL.vocab)-1)   # vocab include '<unk>'
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())


len(TEXT.vocab) 16581
len(LABEL.vocab) 5
TEXT.vocab.vectors.size() torch.Size([16581, 300])


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

_DEBUG=False

def ilog(*args,**kwargs):
    if _DEBUG:
        print(*args,**kwargs)
    
class textCNN(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[150,150,150]
        self.embeding = nn.Embedding.from_pretrained(embedding_matrix)
        self.convs = nn.ModuleList([nn.Conv2d(1, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        x = self.embeding(x)
        ilog('after embeding',x.size())
        x = x.unsqueeze(1)
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x
    
class textCNNMulti(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[150,150,150]
        self.static_embed = nn.Embedding.from_pretrained(embedding_matrix)
        self.non_static_embed = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.convs = nn.ModuleList([nn.Conv2d(2, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        non_static_input = self.non_static_embed(x)
        static_input = self.static_embed(x)
        x = torch.stack([non_static_input, static_input], dim=1)
        ilog('after embeding',x.size())
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x


class textCNNNonStatic(nn.Module):
    def __init__(self,args):
        super().__init__()
        dim = args['dim']
        n_class = args['n_class']
        embedding_matrix=args['embedding_matrix']
        kernels=[3,4,5]
        kernel_number=[150,150,150]
        self.embeding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.convs = nn.ModuleList([nn.Conv2d(1, number, (size, dim),padding=(size-1,0)) for (size,number) in zip(kernels,kernel_number)])
        self.dropout=nn.Dropout()
        self.out = nn.Linear(sum(kernel_number), n_class)
 
    def forward(self, x):
        ilog('ori input',x.size())
        x = self.embeding(x)
        ilog('after embeding',x.size())
        x = x.unsqueeze(1)
        ilog('unsqueeze',x.size())
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        ilog(x[0].size())
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.out(x)
        return x

In [0]:
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_sizes=(128, 256, 256),shuffle=True)


In [9]:
args={}
args['vocb_size']=len(TEXT.vocab)
args['dim']=300
args['n_class']=len(LABEL.vocab)-1
args['embedding_matrix']=TEXT.vocab.vectors
args['lr']=0.001
args['momentum']=0.8
args['epochs']=180
args['log_interval']=100
args['test_interval']=500
args['save_dir']='./'

print(args['vocb_size'])
print(args['n_class'])

16581
5


In [10]:
import os
import sys

import torch

import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(1)


<torch._C.Generator at 0x7fbebeded330>

In [11]:
def save(model, save_dir, save_prefix, steps):
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    save_prefix = os.path.join(save_dir, save_prefix)
    save_path = '{}_steps_{}.pt'.format(save_prefix, steps)
    torch.save(model.state_dict(), save_path)

model=textCNNMulti(args)
model.cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=args['lr'],momentum=args['momentum'])
criterion = nn.CrossEntropyLoss()

best_acc = 0
last_step = 0
model.train()
steps=0


def eval(data_iter, model, args):
    model.eval()
    corrects, avg_loss = 0, 0
    for i,data in enumerate(data_iter):
        x, target = data.text, data.label
        x=x.cuda()
 
        target.sub_(1)
        target=target.cuda()

        logit = model(x)
        loss = F.cross_entropy(logit, target, reduction='sum')

        avg_loss += loss.item()
        corrects += (torch.max(logit, 1)
                     [1].view(target.size()).data == target.data).sum()

    size = len(data_iter.dataset)
    avg_loss /= size
    accuracy = 100.0 * int(corrects)/size
    print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(avg_loss, 
                                                                       accuracy, 
                                                                       corrects, 
                                                                       size))
    return accuracy



for epoch in range(1, args['epochs']+1):
    for i,data in enumerate(train_iter):
        steps+=1

        x, target = data.text, data.label
        x=x.cuda()


        target.sub_(1)
        target=target.cuda()

        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if steps % args['log_interval'] == 0:
            corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum()
            accuracy = 100.0 * int(corrects)/data.batch_size
            print(
                'Epoch [{}] Batch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(epoch,
                                                                         steps, 
                                                                         loss.item(), 
                                                                         accuracy,
                                                                         corrects,
                                                                         data.batch_size))
        if steps % args['test_interval'] == 0:
            val_acc = eval(val_iter, model, args)
            if val_acc > best_acc:
                best_acc = val_acc
                last_step = steps
                save(model, args['save_dir'], 'best', steps)

        model.train()
print('final_result')
eval(test_iter, model, args)

Epoch [2] Batch[100] - loss: 1.575456  acc: 24.2188%(31/128)
Epoch [3] Batch[200] - loss: 1.565488  acc: 28.9062%(37/128)
Epoch [5] Batch[300] - loss: 1.486482  acc: 35.9375%(46/128)
Epoch [6] Batch[400] - loss: 1.529465  acc: 32.0312%(41/128)
Epoch [8] Batch[500] - loss: 1.528383  acc: 31.2500%(40/128)

Evaluation - loss: 1.529624  acc: 35.2407%(388/1101) 

Epoch [9] Batch[600] - loss: 1.485863  acc: 36.7188%(47/128)
Epoch [11] Batch[700] - loss: 1.497726  acc: 35.1562%(45/128)
Epoch [12] Batch[800] - loss: 1.530256  acc: 28.1250%(36/128)
Epoch [14] Batch[900] - loss: 1.498434  acc: 33.5938%(43/128)
Epoch [15] Batch[1000] - loss: 1.480918  acc: 37.5000%(48/128)

Evaluation - loss: 1.474611  acc: 36.6031%(403/1101) 

Epoch [17] Batch[1100] - loss: 1.431244  acc: 39.0625%(50/128)
Epoch [18] Batch[1200] - loss: 1.457355  acc: 40.6250%(52/128)
Epoch [20] Batch[1300] - loss: 1.423446  acc: 38.2812%(49/128)
Epoch [21] Batch[1400] - loss: 1.401799  acc: 36.7188%(47/128)
Epoch [23] Batch[1500

45.47511312217195