# Train_Minimal & Apex

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

#from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm

# <font color="Red">Import Apex</font>

In [2]:
from apex.parallel import DistributedDataParallel as DDP
from apex.fp16_utils import *
from apex import amp
import torch.backends.cudnn as cudnn
cudnn.benchmark = True



# DataLoader(CIFAR10)

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=100,
                                          shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=100,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# Model

In [4]:
model = models.vgg11_bn(pretrained=True) 
model.classifier[0] = nn.Linear(512, 4096)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, len(classes))
model = model.cuda()

# <font color="Red">Network to half</font>

In [5]:
model = network_to_half(model)
model = torch.nn.DataParallel(model)

# Optimizer

In [6]:
#optimizer = optim.SGD(model.parameters(),
#                    lr=0.01,
#                    momentum=0.9,
#                    weight_decay=5e-4,
#                    nesterov=True)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# <font color="Red">FP16 Optimizer</font>

In [7]:
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
#optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)

FP16_Optimizer processing param group 0:
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([64, 3, 3, 3])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([64])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([64])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([64])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([128, 64, 3, 3])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([128])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([128])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([128])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([256, 128, 3, 3])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([256])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([256])
FP16_Optimizer received torch.cuda.FloatTensor with torch.Size([256])
FP16_Optimizer received torch.cuda.HalfTensor with torch.Size([256, 256, 3, 3])
FP16_Optimizer rece

# Loss

In [8]:
criterion = nn.CrossEntropyLoss()

# Train 1 epoch function 

In [9]:
def train_loop(model, loader, criterion, optimizer):
    model.train()
    bar = tqdm(total=len(loader), leave=False)
    total_loss, total_acc, total_num = 0, 0, 0
    for idx, feed in enumerate(loader):
        # Prepare data
        inputs, labels = feed
        inputs, labels = inputs.cuda(), labels.cuda()
        # Foward
        outputs = model(inputs)
        # Calcurate Loss
        loss = criterion(outputs, labels)
        # initialize gradient
        optimizer.zero_grad()
        # Backward
        loss.backward()
        # Update Params
        optimizer.step()
        # Update bar
        ## Accuracy
        pred = outputs.data.max(1, keepdim=True)[1]
        acc = pred.eq(labels.data.view_as(pred)).sum()
        ## Calcurate Score
        total_loss += loss.item() * labels.size(0)
        total_acc += acc.item()
        total_num += labels.size(0)
        bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format(
            total_loss / total_num, total_acc / total_num * 100), refresh=True)
        bar.update()
    bar.close()
    return total_loss / total_num, total_acc / total_num * 100

# <font color="Red">Train 1 epoch function for Apex</font>

In [10]:
def train_loop_apex(model, loader, criterion, optimizer):
    amp_handle = amp.init(enabled=True)
    model.train()
    bar = tqdm(total=len(loader), leave=False)
    total_loss, total_acc, total_num = 0, 0, 0
    for idx, feed in enumerate(loader):
        # Prepare data
        inputs, labels = feed
        inputs, labels = inputs.cuda(), labels.cuda()
        inputs, labels = inputs.half(), labels
        # Foward
        outputs = model(inputs)
        # Calcurate Loss
        loss = criterion(outputs, labels)
        # initialize gradient
        optimizer.zero_grad()
        # Backward
        with amp_handle.scale_loss(loss, optimizer) as scaled_loss:
            #scaled_loss.backward()
            optimizer.backward(scaled_loss)
        # Update Params
        optimizer.step()
        # Update bar
        ## Accuracy
        pred = outputs.data.max(1, keepdim=True)[1]
        acc = pred.eq(labels.data.view_as(pred)).sum()
        ## Calcurate Score
        total_loss += loss.item() * labels.size(0)
        total_acc += acc.item()
        total_num += labels.size(0)
        bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format(
            total_loss / total_num, total_acc / total_num * 100), refresh=True)
        bar.update()
    bar.close()
    return total_loss / total_num, total_acc / total_num * 100

# Validation function

In [11]:
def valid_loop(model, loader, criterion):
    model.eval()
    total_loss, total_acc, total_num = 0, 0, 0
    bar = tqdm(test_loader, total=len(test_loader), leave=False)
    for i, feed in enumerate(loader):
        with torch.no_grad():
            # Prepare data
            inputs, labels = feed
            inputs = inputs.cuda()
            labels = labels.cuda()
            # Foward
            outputs = model(inputs)
            # Calcurate Loss
            loss = criterion(outputs, labels)
            # Update bar
            ## Accuracy
            pred = outputs.data.max(1, keepdim=True)[1]
            acc = pred.eq(labels.data.view_as(pred)).sum()
            ## Calcurate Score
            total_loss += loss.item() * labels.size(0)
            total_acc += acc.item()
            total_num += labels.size(0)

            bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format(
                total_loss / total_num, total_acc / total_num * 100), refresh=True)
            bar.update()
    bar.close()
    return total_loss / total_num, total_acc / total_num * 100

In [12]:
max_epochs = 5
for e in range(max_epochs):
    train_loss, train_acc = train_loop_apex(model, train_loader, criterion, optimizer)
    test_loss, test_acc = valid_loop(model, test_loader, criterion)
    print('Epoch: {}, Train Loss: {:.4f}, Train Accuracy: {:.2f}, Test Loss: {:.4f}, Test Accuracy: {:.2f}'.format(
    e + 1, train_loss, train_acc, test_loss, test_acc))


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

OVERFLOW! Skipping step. Attempted loss scale: 4294967296, reducing to 2147483648.0
OVERFLOW! Skipping step. Attempted loss scale: 2147483648.0, reducing to 1073741824.0
OVERFLOW! Skipping step. Attempted loss scale: 1073741824.0, reducing to 536870912.0
OVERFLOW! Skipping step. Attempted loss scale: 536870912.0, reducing to 268435456.0
OVERFLOW! Skipping step. Attempted loss scale: 268435456.0, reducing to 134217728.0
OVERFLOW! Skipping step. Attempted loss scale: 134217728.0, reducing to 67108864.0


HBox(children=(IntProgress(value=0), HTML(value='')))

Epoch: 1, Train Loss: 0.8808, Train Accuracy: 70.68, Test Loss: 0.6714, Test Accuracy: 78.27


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

OVERFLOW! Skipping step. Attempted loss scale: 67108864.0, reducing to 33554432.0
OVERFLOW! Skipping step. Attempted loss scale: 33554432.0, reducing to 16777216.0
OVERFLOW! Skipping step. Attempted loss scale: 16777216.0, reducing to 8388608.0
OVERFLOW! Skipping step. Attempted loss scale: 8388608.0, reducing to 4194304.0
OVERFLOW! Skipping step. Attempted loss scale: 4194304.0, reducing to 2097152.0


HBox(children=(IntProgress(value=0), HTML(value='')))

Epoch: 2, Train Loss: 0.4899, Train Accuracy: 84.42, Test Loss: 0.5312, Test Accuracy: 83.23


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

OVERFLOW! Skipping step. Attempted loss scale: 2097152.0, reducing to 1048576.0
OVERFLOW! Skipping step. Attempted loss scale: 1048576.0, reducing to 524288.0
OVERFLOW! Skipping step. Attempted loss scale: 524288.0, reducing to 262144.0
OVERFLOW! Skipping step. Attempted loss scale: 262144.0, reducing to 131072.0


HBox(children=(IntProgress(value=0), HTML(value='')))

Epoch: 3, Train Loss: 0.3451, Train Accuracy: 88.89, Test Loss: 0.5676, Test Accuracy: 82.42


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

OVERFLOW! Skipping step. Attempted loss scale: 131072.0, reducing to 65536.0
OVERFLOW! Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0
OVERFLOW! Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0
OVERFLOW! Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0


HBox(children=(IntProgress(value=0), HTML(value='')))

Epoch: 4, Train Loss: 0.2523, Train Accuracy: 92.01, Test Loss: 0.5921, Test Accuracy: 82.78


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

OVERFLOW! Skipping step. Attempted loss scale: 8192.0, reducing to 4096.0
OVERFLOW! Skipping step. Attempted loss scale: 4096.0, reducing to 2048.0
OVERFLOW! Skipping step. Attempted loss scale: 2048.0, reducing to 1024.0


HBox(children=(IntProgress(value=0), HTML(value='')))

Epoch: 5, Train Loss: 0.1985, Train Accuracy: 93.75, Test Loss: 0.5183, Test Accuracy: 85.08


In [13]:
torch.save(model.state_dict(), 'params/model.pth')