# Import Packages

In [40]:
from configs import get_config
from data_loader import get_loader

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable

from tqdm import tqdm

# Set Configuration

In [33]:
config = get_config(
    parse=False,
    vocab_size=20000,
    hidden_size=300,
    n_channel_per_window=2,
    label_size=2,
    dropout=0.5)

In [34]:
config

Configurations
{'batch_size': 100,
 'data_dir': PosixPath('/Users/jmin/workspace/fastcampus_chatbot/Day_02/CNN/datasets'),
 'dropout': 0.5,
 'epochs': 20,
 'hidden_size': 300,
 'label_size': 2,
 'log_every_epoch': 1,
 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>,
 'lr': 0.001,
 'n_channel_per_window': 2,
 'optimizer': <class 'torch.optim.sgd.SGD'>,
 'save_dir': PosixPath('/Users/jmin/workspace/fastcampus_chatbot/Day_02/CNN/log'),
 'save_every_epoch': 1,
 'vocab_size': 20000}

# Load training data loader

In [16]:
train_loader = get_loader(batch_size=20, max_size=config.vocab_size, is_train=True, data_dir='./datasets/')

Building Vocabulary 



In [68]:
vocab = train_loader.dataset.fields['text'].vocab

In [71]:
vocab.stoi['안']

30

In [19]:
len(train_loader)

7302

In [22]:
batch = next(iter(train_loader))
batch

<torchtext.data.batch.Batch at 0x106bf9c50>

In [24]:
# [max_seq_len, batch_size]
batch.text

Variable containing:

Columns 0 to 10 
  1991    503   1196    921    355      0      0     63    176     72     72
     5     15    818     28   3288   9169   2541      4     44    215   1658
    42   1350    764     26    114    106      1     10    314     10    130
   110      1     10    106     66    187      1    604     13    328   1658
    39      1    909    189      7      2      1   1051     15    274     95
  1482      1    159      4     53    117      1      2    367    166   1658
     1      1     16     55    403      1      1    814     10     15    241
     1      1     76    123    126      1      1      1    420    414    184
     1      1     24     90   5207      1      1      1      1     17    131
     1      1    706     50     10      1      1      1      1     38      8
     1      1    129     47   5181      1      1      1      1     63    182
     1      1     31    304     32      1      1      1      1      4    807
     1      1      2   4098   5149   

In [38]:
# [batch_size]
batch.label

Variable containing:
 0
 1
 1
 0
 0
 1
 0
 1
 0
 1
 0
 0
 1
 1
 1
 0
 1
 0
 1
 0
[torch.LongTensor of size 20]

## Model

<img src="../images/cnn_text_classification.png", width=600, height=60>

In [35]:
class CNN(nn.Module):
    def __init__(self, config):
        super(CNN, self).__init__()
        self.config = config
        
        self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
        
        self.conv = nn.ModuleList([
            nn.Conv2d(
                in_channels=1,
                out_channels=config.n_channel_per_window,
                kernel_size=(3, config.hidden_size)),
            
            nn.Conv2d(
                in_channels=1,
                out_channels=config.n_channel_per_window,
                kernel_size=(4, config.hidden_size)),

            nn.Conv2d(
                in_channels=1,
                out_channels=config.n_channel_per_window,
                kernel_size=(5, config.hidden_size))
        ])
        
        n_total_channels = len(self.conv) * config.n_channel_per_window
        
        self.dropout = nn.Dropout(config.dropout)
        self.fc = nn.Linear(n_total_channels, config.label_size)
        
    def forward(self, x):
        """
        Args:
            x: [batch_size, max_seq_len]
        Return:
            logit: [batch_size, label_size]
        """
        
        # [batch_size, max_seq_len, hidden_size]
        x = self.embedding(x)
        
        # [batch_size, 1, max_seq_len, hidden_size]
        x = x.unsqueeze(1)
        
        # Apply Convolution filter followed by Max-pool
        out_list = []
        for conv in self.conv:
            
            ########## Convolution #########
            
            # [batch_size, n_kernels, _, 1]
            x_ = F.relu(conv(x))
            
            # [batch_size, n_kernels, _]
            x_ = x_.squeeze(3)
            
            ########## Max-pool #########
            
            # [batch_size, n_kernels, 1]
            x_ = F.max_pool1d(x_, x_.size(2))
            
            # [batch_size, n_kernels]
            x_ = x_.squeeze(2)
            
            out_list.append(x_)
        
        # [batch_size, 3 x n_kernels]
        out = torch.cat(out_list, 1)
        
        ######## Dropout ########
        out = self.dropout(out)
        
        # [batch_size, label_size]
        logit = self.fc(out)
        
        return logit

In [36]:
model = CNN(config)

In [37]:
model

CNN (
  (embedding): Embedding(20000, 300)
  (conv): ModuleList (
    (0): Conv2d(1, 2, kernel_size=(3, 300), stride=(1, 1))
    (1): Conv2d(1, 2, kernel_size=(4, 300), stride=(1, 1))
    (2): Conv2d(1, 2, kernel_size=(5, 300), stride=(1, 1))
  )
  (dropout): Dropout (p = 0.5)
  (fc): Linear (6 -> 2)
)

# Build loss function

In [44]:
loss_fn = config.loss_fn()

loss_fn

# Build Optimizer 

In [50]:
optimizer = config.optimizer(model.parameters(), config.lr)
optimizer

<torch.optim.sgd.SGD at 0x11458ad30>

In [62]:
for epoch in range(2): # n_epochs
    print(f'Epoch: {epoch}')
    for batch_i, batch in enumerate(tqdm(train_loader)):
        # text: [max_seq_len, batch_size]
        # label: [batch_size]
        text, label = batch.text, batch.label

        # [batch_size, max_seq_len]
        text.data.t_()
        
        # [batch_size, 2]
        logit = model(text)
        
        # Calculate loss
        batch_loss = loss_fn(logit, label)
        batch_loss.backward()
        optimizer.step()
        
        if (batch_i + 1) % 50 == 0:
            tqdm.write(f'batch loss: {batch_loss.data}')


  0%|          | 0/7302 [00:00<?, ?it/s]

Epoch: 0


[A
  0%|          | 1/7302 [00:00<46:27,  2.62it/s][A
  0%|          | 3/7302 [00:00<35:12,  3.46it/s][A
  0%|          | 5/7302 [00:00<28:13,  4.31it/s][A
  0%|          | 7/7302 [00:00<23:11,  5.24it/s][A
  0%|          | 8/7302 [00:01<21:16,  5.72it/s][A
  0%|          | 9/7302 [00:01<18:53,  6.44it/s][A
  0%|          | 11/7302 [00:01<16:21,  7.43it/s][A
  0%|          | 12/7302 [00:01<15:33,  7.81it/s][A
  0%|          | 14/7302 [00:01<14:03,  8.64it/s][A
  0%|          | 16/7302 [00:01<13:04,  9.28it/s][A
  0%|          | 18/7302 [00:01<12:36,  9.63it/s][A
  0%|          | 20/7302 [00:02<12:39,  9.59it/s][A
  0%|          | 22/7302 [00:02<12:19,  9.84it/s][A
  0%|          | 24/7302 [00:02<12:53,  9.41it/s][A
  1%|          | 52/7302 [00:05<11:02, 10.94it/s]

batch loss: 
 20.5067
[torch.FloatTensor of size 1]



  1%|▏         | 101/7302 [00:09<12:42,  9.44it/s]

batch loss: 
 0.6732
[torch.FloatTensor of size 1]



  2%|▏         | 151/7302 [00:14<09:37, 12.38it/s]

batch loss: 
 0.6825
[torch.FloatTensor of size 1]



  3%|▎         | 201/7302 [00:19<11:25, 10.37it/s]

batch loss: 
 0.8007
[torch.FloatTensor of size 1]



  3%|▎         | 250/7302 [00:24<11:38, 10.09it/s]

batch loss: 
 0.6258
[torch.FloatTensor of size 1]



  4%|▍         | 301/7302 [00:29<12:12,  9.56it/s]

batch loss: 
 0.6719
[torch.FloatTensor of size 1]



  5%|▍         | 351/7302 [00:34<14:44,  7.86it/s]

batch loss: 
 0.6882
[torch.FloatTensor of size 1]



  5%|▌         | 401/7302 [00:40<12:21,  9.31it/s]

batch loss: 
 0.6841
[torch.FloatTensor of size 1]



  6%|▌         | 451/7302 [00:46<13:30,  8.45it/s]

batch loss: 
 0.7267
[torch.FloatTensor of size 1]



  7%|▋         | 501/7302 [00:51<12:24,  9.14it/s]

batch loss: 
 0.7666
[torch.FloatTensor of size 1]



  8%|▊         | 551/7302 [00:55<10:34, 10.63it/s]

batch loss: 
 0.6966
[torch.FloatTensor of size 1]



  8%|▊         | 601/7302 [01:00<09:57, 11.22it/s]

batch loss: 
 0.6733
[torch.FloatTensor of size 1]



  9%|▉         | 651/7302 [01:05<09:58, 11.11it/s]

batch loss: 
 2.6338
[torch.FloatTensor of size 1]



 10%|▉         | 701/7302 [01:09<09:45, 11.28it/s]

batch loss: 
 0.6943
[torch.FloatTensor of size 1]



 10%|█         | 753/7302 [01:14<08:23, 13.00it/s]

batch loss: 
 0.6882
[torch.FloatTensor of size 1]



 11%|█         | 801/7302 [01:19<10:18, 10.51it/s]

batch loss: 
 0.6577
[torch.FloatTensor of size 1]



 11%|█▏        | 839/7302 [01:22<12:51,  8.38it/s]

KeyboardInterrupt: 