In [1]:
!pip install torch-summary
!pip install line_profiler



In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from tqdm.notebook import tqdm
import math
from copy import deepcopy

In [3]:
char_len = 1014
batch_size = 128
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’’’/\|_@#$%ˆ&*˜‘+-=<>()[]{}"
char_map = {char:idx+1 for idx, char in enumerate(alphabet)}

def generate_embedding_matrix(size=len(alphabet)):
    return torch.vstack([torch.zeros(size), torch.eye(size)])

In [5]:
class CharCNN(nn.Module):
    def __init__(self, input_dim, enc_dim, output_dim, net_type="small"):
        super().__init__()
        
        self.input_dim = input_dim
        self.enc_dim = enc_dim
        
        if net_type == "small":
            self.latent_size = 1024
            self.cnn_features = 256
        else:
            self.latent_size = 2048
            self.cnn_features = 1024
        
        self.embed = nn.Embedding.from_pretrained(generate_embedding_matrix(), freeze=True)
        self.cnn, w_size = self.generate_cnn([
            # Kernel Size, Pooling Kernel Size
            [7, 3], # L1
            [7, 3], # L2
            [3, 0], # L3
            [3, 0], # L4
            [3, 0], # L5
            [3, 3]  # L6
        ])
        self.fc = nn.Sequential(
            nn.Linear(w_size * self.cnn_features, self.latent_size),
            nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(self.latent_size, self.latent_size),
            nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(self.latent_size, output_dim)
        )
    
    def generate_cnn(self, layer_params):
        cnn_layers = []
        feature_w = self.input_dim
        for idx, (kernel_size, pool_size) in enumerate(layer_params):
            inp_size = self.enc_dim if idx == 0 else self.cnn_features
            feature_w = feature_w - kernel_size + 1
            cnn_layers.append(nn.Conv1d(inp_size, self.cnn_features, kernel_size))
            cnn_layers.append(nn.ReLU())
            if pool_size != 0:
                feature_w = feature_w // pool_size
                cnn_layers.append(nn.MaxPool1d(pool_size))
        return nn.Sequential(*cnn_layers), feature_w
    
    def init_weights(self, m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv1d):
            m.weight.data.normal_(0, 0.05)
    
    def forward(self, x):
        embeddings = self.embed(x).permute(0, 2, 1)
        cnn_out = self.cnn(embeddings)
        fc_in = torch.flatten(cnn_out, start_dim=1)
        fc_out = self.fc(fc_in)
        return fc_out

In [6]:
class YelpPolarity(Dataset):
    def __init__(self, csv_path):
        self.data_frame = pd.read_csv(csv_path)
        self.weights = [0.5 for _ in range(len(self.data_frame))]
        self.embed = nn.Embedding.from_pretrained(generate_embedding_matrix(), freeze=True)
    
    def __len__(self):
        return len(self.data_frame)
    
    def __getitem__(self, idx):
        data_instance = self.data_frame.iloc[idx]
        inp_string = data_instance[1]
        inp_string = inp_string[:min(char_len, len(inp_string))]
        inp_string = inp_string.lower()
            
#         X_lis = [len(alphabet)] * char_len
        X_lis = [0] * char_len
        X_lis[:len(inp_string)] = [char_map.get(char, 0) for char in reversed(inp_string)]
        
        X = torch.LongTensor(X_lis)
#         X = self.embed(torch.LongTensor(X_lis))
        y = int(data_instance[0] - 1)
        return X, y

In [7]:
# train_dset = YelpPolarity('../input/yelp-review-polarity/yelp_review_polarity_csv/train.csv', 50000)
train_dset = YelpPolarity("../input/yelp-review-dataset/yelp_review_polarity_csv/train.csv")

# train_sampler = torch.utils.data.WeightedRandomSampler(train_dset.weights, 10000, False)
train_loader = torch.utils.data.DataLoader(train_dset, batch_size=batch_size, num_workers=2, pin_memory=True, shuffle=True)

# valid_sampler = torch.utils.data.WeightedRandomSampler(train_dset.weights, 3000, False)
# valid_loader = torch.utils.data.DataLoader(train_dset, batch_size=batch_size, num_workers=2, sampler=valid_sampler, pin_memory=True)

test_dset = YelpPolarity('../input/yelp-review-dataset/yelp_review_polarity_csv/test.csv')
test_loader = torch.utils.data.DataLoader(test_dset, batch_size=batch_size, shuffle=False, num_workers=2)

In [10]:
net_type = "small"
net = CharCNN(char_len, 70, 2, net_type=net_type).to(device)
# net = CharacterLevelCNN(n_classes=2, input_dim=70).to(device)
net.apply(lambda m: net.init_weights(m))
summary(net, torch.zeros((2, char_len)).long())
# summary(net, torch.zeros((2, 1014, 70)))

# criterion = nn.BCEWithLogitsLoss()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(net.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5 ** (1 / 3.))

Layer (type:depth-idx)                   Output Shape              Param #
├─Embedding: 1-1                         [-1, 1014, 70]            (4,970)
├─Sequential: 1-2                        [-1, 256, 34]             --
|    └─Conv1d: 2-1                       [-1, 256, 1008]           125,696
|    └─ReLU: 2-2                         [-1, 256, 1008]           --
|    └─MaxPool1d: 2-3                    [-1, 256, 336]            --
|    └─Conv1d: 2-4                       [-1, 256, 330]            459,008
|    └─ReLU: 2-5                         [-1, 256, 330]            --
|    └─MaxPool1d: 2-6                    [-1, 256, 110]            --
|    └─Conv1d: 2-7                       [-1, 256, 108]            196,864
|    └─ReLU: 2-8                         [-1, 256, 108]            --
|    └─Conv1d: 2-9                       [-1, 256, 106]            196,864
|    └─ReLU: 2-10                        [-1, 256, 106]            --
|    └─Conv1d: 2-11                      [-1, 256, 104]     

In [11]:
for epoch in tqdm(range(30)):   
    train_loss = 0.0
    train_acc = 0.0
    num_train = 0
    net.train()
    for indices, labels in tqdm(train_loader, leave=False):
        indices = indices.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(indices)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * len(labels)
        accuracy = (torch.argmax(outputs, axis=1) == labels.squeeze()).sum().item()
        train_acc += accuracy
        num_train += len(labels)
#         torch.nn.utils.clip_grad_norm_(filter(lambda p: p.requires_grad, net.parameters()), 1.0)
    
    net.eval()
    test_loss = 0.0
    test_acc = 0.0
    num_test = 0
    for indices, labels in tqdm(test_loader, leave=False):
        indices = indices.to(device)
        labels = labels.to(device)
        
        outputs = net(indices)
        loss = nn.functional.cross_entropy(outputs, labels.squeeze())
        
        test_loss += loss.item() * len(labels)
#         test_acc += (torch.round(outputs) == labels).sum().item()
        test_acc += (torch.argmax(outputs, axis=1) == labels.squeeze()).sum().item()
        num_test += len(labels)
    
    print(f"Training: {train_loss / num_train:.4f}, {train_acc / num_train:.4f} --- Testing: {test_loss / num_test:.4f}, {test_acc / num_test:.4f}", flush=True)
    
    if (test_acc / num_test) >= 0.94 or (train_acc / num_train) >= 0.94:
        print("Load onto hugging face")
        best_model = deepcopy(net)
        break
#     if epoch % 3 == 2:
    scheduler.step()
    print(f"LR Updated to {optimizer.param_groups[0]['lr']:.7f}", flush=True)

print('Finished Training')
if best_model is None:
    best_model = deepcopy(net)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/4375 [00:00<?, ?it/s]

  0%|          | 0/297 [00:00<?, ?it/s]

Training: 0.4509, 0.7657 --- Testing: 0.2525, 0.8910
LR Updated to 0.0079370


  0%|          | 0/4375 [00:00<?, ?it/s]

  0%|          | 0/297 [00:00<?, ?it/s]

Training: 0.2174, 0.9112 --- Testing: 0.2161, 0.9102
LR Updated to 0.0062996


  0%|          | 0/4375 [00:00<?, ?it/s]

  0%|          | 0/297 [00:00<?, ?it/s]

Training: 0.1822, 0.9274 --- Testing: 0.1760, 0.9298
LR Updated to 0.0050000


  0%|          | 0/4375 [00:00<?, ?it/s]

  0%|          | 0/297 [00:00<?, ?it/s]

Training: 0.1621, 0.9358 --- Testing: 0.1637, 0.9364
LR Updated to 0.0039685


  0%|          | 0/4375 [00:00<?, ?it/s]

  0%|          | 0/297 [00:00<?, ?it/s]

Training: 0.1464, 0.9430 --- Testing: 0.1628, 0.9376
Load onto hugging face
Finished Training


In [13]:
torch.save(best_model.state_dict(), "./char-cnn-model.pth")