## Step 1: Dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt

from torchtext import datasets
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import GloVe
# from tensorboardX import SummaryWriter

use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)

# reference: https://zhuanlan.zhihu.com/p/562565880
train_dataset, test_dataset = datasets.IMDB(root='./IMDB_data', split=('train', 'test'))

################################################################################
The 'datapipes', 'dataloader2' modules are deprecated and will be removed in a
future torchdata release! Please see https://github.com/pytorch/data/issues/1196
to learn more and leave feedback.
################################################################################



In [3]:
from torch.nn.utils.rnn import pad_sequence

tokenizer = get_tokenizer('basic_english')
tokenizer('UIUC Siebel School.')

GLOVE_DIM = 100
GLOVE = GloVe(name='6B', dim=GLOVE_DIM)

def collate_fn(batch):
    labels, texts = list(), list()
    for label, text in batch:
        # print('label', label)
        label = label - 1
        assert label in [0, 1]
        
        tokens = tokenizer(text)
        emb = list()
        for token in tokens:
            if token in GLOVE.stoi:
                emb.append(GLOVE[token])
        
        labels.append(label)
        # for e in emb:
        #     print(e.shape)
        texts.append(torch.stack(emb))
        
    labels = torch.tensor(labels)
    # print(texts)
    # print(f'len(texts)={len(texts)}')
    texts = pad_sequence(texts, batch_first=True)
    return texts, labels

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

## Step 2: IBP Modified Model

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc0 = nn.Linear(100, 100)
        self.fc1 = nn.Linear(100, 100)
        self.fc2 = nn.Linear(100, 100)
        self.output = nn.Linear(100, 2)
        self.softmax = nn.Softmax(dim=1)
    
    def _prop_affine(self, l_x, u_x, W, b):
        sum = (u_x + l_x) / 2
        dif = (u_x - l_x) / 2
        
        pos = torch.matmul(sum, W.T) + b.unsqueeze(0)
        neg = torch.matmul(dif, torch.abs(W).T)
        
        l_x = pos - neg
        u_x = pos + neg
        return l_x, u_x
    
    def _prop_average(self, l_x, u_x, dim=2):
        lower = torch.mean(l_x, dim=dim)
        upper = torch.mean(u_x, dim=dim)
        return lower, upper
    
    def _prop_relu(self, l_x, u_x):
        l_x = torch.max(l_x, torch.zeros_like(l_x))
        u_x = torch.max(u_x, torch.zeros_like(u_x))
        return l_x, u_x
    
    def _prop_softmax(self, l_x, u_x, dim=1):
        upper_1 = torch.exp(u_x)
        upper_2 = torch.sum(torch.exp(l_x), dim=dim, keepdim=True) - torch.exp(l_x) + torch.exp(u_x)
        upper = upper_1 / upper_2
        
        lower_1 = torch.exp(l_x)
        lower_2 = torch.sum(torch.exp(u_x), dim=dim, keepdim=True) - torch.exp(u_x) + torch.exp(l_x)
        lower = lower_1 / lower_2
        return lower, upper
    
    def forward(self, x, l_x, u_x):
        # print(1, l_x.shape)
        l_x, u_x = self._prop_affine(l_x, u_x, self.fc0.weight, self.fc0.bias)
        # print(2, l_x.shape)
        l_x, u_x = self._prop_relu(l_x, u_x)
        l_x, u_x = self._prop_average(l_x, u_x, dim=1)
        # print(3, l_x.shape)
        
        l_x, u_x = self._prop_affine(l_x, u_x, self.fc1.weight, self.fc1.bias)
        l_x, u_x = self._prop_relu(l_x, u_x)
        l_x, u_x = self._prop_affine(l_x, u_x, self.fc2.weight, self.fc2.bias)
        l_x, u_x = self._prop_relu(l_x, u_x)
        l_y, u_y = self._prop_affine(l_x, u_x, self.output.weight, self.output.bias)
        l_y, u_y = self._prop_softmax(l_y, u_y, dim=1)
        
        x = self.fc0(x)
        x = F.relu(x)
        x = torch.mean(x, dim=1)
        x1 = F.relu(self.fc1(x))
        x2 = F.relu(self.fc2(x1))
        y = self.softmax(self.output(x2))
        
        return y, l_y, u_y

model = Net()

## Step 3: Training and testing

In [7]:
lr = 0.1

def criterion(x, l_x, u_x, y, kappa):
    cel = nn.CrossEntropyLoss()
    l_fit = cel(x, y)
    
    z = u_x.clone()
    z[:, y] = l_x[:, y]
    l_spec = cel(z, y)
    return kappa*l_fit + (1-kappa)*l_spec

optimizer = optim.SGD(model.parameters(), lr=lr)

In [8]:
import time

num_epochs = 10
kappa = 0.9
e_train = 0.01
warmup = 3
max_e_epoch = 8

model = model.to(device)

def test(model, eps):
    model.eval()
    num_correct = 0
    num_total = 0
    
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            yhat, _, _ = model(x, x-eps, x+eps)
            # print('yhat, y', yhat, y)
            _, yhat_label = torch.max(yhat, 1)
            
            num_total += y.shape[0]
            num_correct += (y == yhat_label).sum().item()
    print(f"Accuracy: {num_correct / num_total * 100}%")

start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    sum_loss = 0
    
    now_e_epoch = (
        0 if epoch <= warmup
        else epoch - warmup if warmup < epoch <= max_e_epoch
        else max_e_epoch - warmup
    )
    gradual_epochs = max_e_epoch - warmup
    eps = e_train * (now_e_epoch / gradual_epochs)
    kap = 1 * (1 - now_e_epoch / gradual_epochs) + kappa * (now_e_epoch / gradual_epochs)
    print(eps, kap)
    
    for batch_idx, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)
        # print(0, x.shape, y.shape)
        
        optimizer.zero_grad()
        
        yhat, l_yhat, u_yhat = model(x, x-eps, x+eps)
        loss = criterion(yhat, l_yhat, u_yhat, y, kap)
        
        loss.backward()
        optimizer.step()
        
        sum_loss += loss.item()
    print(f"Epoch {epoch}, loss {sum_loss}")
    test(model, eps)
torch.save(model, "model3_2.pth")
end_time = time.time()

print(end_time - start_time)

0.0 1.0


Epoch 0, loss 245.58007702231407
Accuracy: 50.0%
0.0 1.0
Epoch 1, loss 254.36994701623917
Accuracy: 50.0%
0.0 1.0
Epoch 2, loss 255.591561794281
Accuracy: 50.0%
0.0 1.0
Epoch 3, loss 255.63893893361092
Accuracy: 50.0%
0.002 0.9800000000000001
Epoch 4, loss 248.1787613928318
Accuracy: 50.0%
0.004 0.96
Epoch 5, loss 247.3724873661995
Accuracy: 50.0%
0.006 0.9400000000000001
Epoch 6, loss 243.39388191699982
Accuracy: 50.0%
0.008 0.92
Epoch 7, loss 243.62676694989204
Accuracy: 50.0%
0.01 0.9
Epoch 8, loss 241.41047129034996
Accuracy: 50.0%
0.01 0.9
Epoch 9, loss 242.0322293639183
Accuracy: 50.0%
432.3467676639557


In [18]:
model = torch.load('model3_2.pth')

def interval_eval(model, eps):
    model.eval()
    num_correct = 0
    num_total = 0
    
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            _, l_y, u_y = model(x, x-eps, x+eps)
            num_total += y.shape[0]
            u_y[torch.arange(u_y.shape[0]), y] = l_y[torch.arange(l_y.shape[0]), y] - 100
            # print(y.shape, l_y.shape, l_y[:, y].shape, torch.max(u_y, dim=1)[0].shape, (l_y[:, y] > torch.max(u_y, dim=1)[0]).sum().item())
            num_correct += (l_y[torch.arange(l_y.shape[0]), y] > torch.max(u_y, dim=1)[0]).sum().item()
    print(f"Accuracy: {num_correct / num_total * 100}%")
interval_eval(model, 0.01)

Accuracy: 50.0%
