In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext.legacy import data, datasets
import random

**Preprocessing text dataset**

In [2]:
seed = 966
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:

TEXT = data.Field(tokenize='spacy', lower=True)
LABEL = data.LabelField()



In [4]:
train, test = datasets.TREC.splits(TEXT, LABEL)
train, val = train.split(random_state = random.seed(seed))

downloading train_5500.label


train_5500.label: 100%|██████████| 336k/336k [00:00<00:00, 361kB/s]


downloading TREC_10.label


TREC_10.label: 100%|██████████| 23.4k/23.4k [00:00<00:00, 101kB/s] 


In [5]:
vars(train[-1])

{'text': ['how', 'do', 'you', 'say', '2', 'in', 'latin', '?'], 'label': 'ENTY'}

In [6]:

TEXT.build_vocab(train, min_freq=2)
LABEL.build_vocab(train)

In [7]:
print("Vocabulary size of TEXT:",len(TEXT.vocab.stoi))
print("Vocabulary size of LABEL:",len(LABEL.vocab.stoi))
print(LABEL.vocab.stoi)

Vocabulary size of TEXT: 2643
Vocabulary size of LABEL: 6
defaultdict(None, {'ENTY': 0, 'HUM': 1, 'DESC': 2, 'NUM': 3, 'LOC': 4, 'ABBR': 5})


In [8]:
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train, val, test),
    batch_size = 64,
    sort_key=lambda x: len(x.text), 
    device=device
)

**Building a Simple CNN Model**

In [9]:
class CNN(nn.Module):
  def __init__(self, vocabulary_size, embedding_size, 
               kernels_number, kernel_sizes, output_size, dropout_rate):
    super().__init__()
    self.embedding = nn.Embedding(vocabulary_size, embedding_size)
    self.convolution_layers = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=kernels_number, kernel_size=(k, embedding_size)) 
                                            for k in kernel_sizes])
    self.dropout = nn.Dropout(dropout_rate)
    self.fully_connected = nn.Linear(len(kernel_sizes) * kernels_number, output_size)
  def forward(self, text):
    text = text.permute(1, 0)
    input_embeddings = self.embedding(text)
    input_embeddings = input_embeddings.unsqueeze(1)
    conved = [F.relu(convolution_layer(input_embeddings)).squeeze(3) for convolution_layer in self.convolution_layers]
    pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
    concat = self.dropout(torch.cat(pooled, dim=1))
    final_output = self.fully_connected(concat)
    return final_output

In [10]:
input_size = len(TEXT.vocab)
embedding_size = 100
kernels_number = 100
kernel_sizes = [2, 3, 4]
output_size = len(LABEL.vocab)
dropout_rate = 0.3

In [11]:
model = CNN(input_size, embedding_size, kernels_number, kernel_sizes, output_size, dropout_rate)

In [12]:
print(model)

CNN(
  (embedding): Embedding(2643, 100)
  (convolution_layers): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  )
  (dropout): Dropout(p=0.3, inplace=False)
  (fully_connected): Linear(in_features=300, out_features=6, bias=True)
)


In [13]:
model.to(device)

CNN(
  (embedding): Embedding(2643, 100)
  (convolution_layers): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  )
  (dropout): Dropout(p=0.3, inplace=False)
  (fully_connected): Linear(in_features=300, out_features=6, bias=True)
)

**Train and Evaluate Functions**

In [14]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

optimizer = optim.Adam(model.parameters())

In [15]:
def accuracy(predictions, actual_label):
    max_predictions = predictions.argmax(dim = 1, keepdim = True, )
    correct_predictions = max_predictions.squeeze(1).eq(actual_label)
    accuracy = correct_predictions.sum() / torch.cuda.FloatTensor([actual_label.shape[0]])
    return accuracy

In [16]:
def train(model, iterator, optimizer, criterion):

    model.train()
    epoch_loss = 0
    epoch_acc = 0
    
    for batch in iterator:
        optimizer.zero_grad()
        
        predictions = model(batch.text)
        
        loss = criterion(predictions, batch.label)
        
        acc = accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [17]:
def evaluate(model, iterator, criterion):

    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text)
            
            loss = criterion(predictions, batch.label)
            
            acc = accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

**Training the model**

In [18]:
number_of_epochs = 20

best_acc = float('-inf')

for epoch in range(number_of_epochs):
    
    # Write the code here
    train_loss, train_acc = train(model,train_iterator,optimizer,criterion)
    # Write the code here
    valid_loss, valid_acc = evaluate(model,valid_iterator,criterion)
    
    if valid_acc > best_acc:
        # Write the code here
        best_acc=valid_acc
        torch.save(model.state_dict(), 'trec.pt')
    
    print(f'Epoch {epoch+1} ')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Validation Loss: {valid_loss:.3f} |  Validation Acc: {valid_acc*100:.2f}%')

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 1 
	Train Loss: 1.308 | Train Acc: 47.92%
	 Validation Loss: 0.956 |  Validation Acc: 64.57%
Epoch 2 
	Train Loss: 0.788 | Train Acc: 71.94%
	 Validation Loss: 0.736 |  Validation Acc: 72.54%
Epoch 3 
	Train Loss: 0.600 | Train Acc: 79.30%
	 Validation Loss: 0.637 |  Validation Acc: 75.61%
Epoch 4 
	Train Loss: 0.465 | Train Acc: 84.93%
	 Validation Loss: 0.615 |  Validation Acc: 75.99%
Epoch 5 
	Train Loss: 0.361 | Train Acc: 89.19%
	 Validation Loss: 0.554 |  Validation Acc: 78.60%
Epoch 6 
	Train Loss: 0.279 | Train Acc: 91.90%
	 Validation Loss: 0.532 |  Validation Acc: 80.71%
Epoch 7 
	Train Loss: 0.210 | Train Acc: 94.19%
	 Validation Loss: 0.514 |  Validation Acc: 80.57%
Epoch 8 
	Train Loss: 0.168 | Train Acc: 95.76%
	 Validation Loss: 0.522 |  Validation Acc: 81.49%
Epoch 9 
	Train Loss: 0.141 | Train Acc: 96.45%
	 Validation Loss: 0.515 |  Validation Acc: 82.44%
Epoch 10 
	Train Loss: 0.113 | Train Acc: 97.46%
	 Validation Loss: 0.509 |  Validation Acc: 82.37%
Epoch 11 

In [19]:
model.load_state_dict(torch.load('trec.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.428 | Test Acc: 89.48%
