In [1]:
import torch
import torchvision
from torchvision.utils import save_image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
import numpy as np
import pandas as pd
from torchvision import transforms, datasets
from torch.utils.data import random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

In [2]:
img_rows, img_cols = 224, 224
batch_size = 16
random_state = 51
torch.manual_seed(random_state)

<torch._C.Generator at 0x7f4661d16ad0>

In [3]:
%cd ..

/kaggle


In [4]:
transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.2, 0.2, 0.2))])

PATH = './input/state-farm-distracted-driver-detection/imgs/train'

dataset = datasets.ImageFolder(root=PATH,transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
test_dataset,valid_dataset = random_split(val_dataset, [int(0.5*val_size), val_size - int(0.5*val_size)])

tag2class = {'0': 'normal driving','1': 'texting - right','2': 'talking on the phone - right','3': 'texting - left','4': 'talking on the phone - left','5': 'operating the radio','6': 'drinking','7': 'reaching behind','8': 'hair and makeup','9': 'talking to passenger'}

train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )
validation_dataloader = DataLoader(
            valid_dataset, # The validation samples.
            sampler = SequentialSampler(valid_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )
test_dataloader = DataLoader(
            test_dataset, # The validation samples.
            sampler = SequentialSampler(test_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [5]:
class depthwise_conv(nn.Module):
    def __init__(self, nin, kernel_size, padding, bias=False, stride=1):
        super(depthwise_conv, self).__init__()
        self.depthwise = nn.Conv2d(nin, nin, kernel_size=kernel_size, stride=stride, padding=padding, groups=nin, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        return out

In [6]:
class dw_block(nn.Module):
    def __init__(self, nin, kernel_size, padding=1, bias=False, stride=1):
        super(dw_block, self).__init__()
        self.dw_block = nn.Sequential(
            depthwise_conv(nin, kernel_size, padding, bias, stride),
            nn.BatchNorm2d(nin),
            nn.ReLU(True)
        )
    def forward(self, x):
        out = self.dw_block(x)
        return out

In [7]:
class one_by_one_block(nn.Module):
    def __init__(self, nin, nout, padding=1, bias=False, stride=1):
        super(one_by_one_block, self).__init__()
        self.one_by_one_block = nn.Sequential(
            nn.Conv2d(nin, nout, kernel_size=1, stride=stride, padding=padding, bias=bias),
            nn.BatchNorm2d(nout),
            nn.ReLU(True)
        )
    def forward(self, x):
        out = self.one_by_one_block(x)
        return out

In [8]:
class MobileNet(nn.Module):
    def __init__(self, input_channel, num_classes=10):
        super(MobileNet, self).__init__()
        
        self.network = nn.Sequential(
            nn.Conv2d(input_channel, 32, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(True),
            
            dw_block(32, kernel_size=3),
            one_by_one_block(32, 64),
            
            dw_block(64, kernel_size=3, stride=2),
            one_by_one_block(64, 128),
            
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 128),
            
            dw_block(128, kernel_size=3, stride=2),
            one_by_one_block(128, 256),
            
            dw_block(256, kernel_size=3),
            one_by_one_block(256, 256),
            
            dw_block(256, kernel_size=3, stride=2),
            one_by_one_block(256, 512),
            
            # 5 times 
            dw_block(512, kernel_size=3),
            one_by_one_block(512, 512),
            dw_block(512, kernel_size=3),
            one_by_one_block(512, 512),
            dw_block(512, kernel_size=3),
            one_by_one_block(512, 512),
            dw_block(512, kernel_size=3),
            one_by_one_block(512, 512),
            dw_block(512, kernel_size=3),
            one_by_one_block(512, 512),
            
            dw_block(512, kernel_size=3, stride=2),
            one_by_one_block(512, 1024),
            
            dw_block(1024, kernel_size=3, stride=2),
            one_by_one_block(1024, 1024),
        )
                
        self.linear = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        body_output = self.network(x)
        
        avg_pool_output = F.adaptive_avg_pool2d(body_output, (1, 1))
        avg_pool_flat = avg_pool_output.view(avg_pool_output.size(0), -1)

        output = self.linear(avg_pool_flat)
        
        return output

In [9]:
model = MobileNet(3, 10) 

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [11]:
model.cuda()

MobileNet(
  (network): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): dw_block(
      (dw_block): Sequential(
        (0): depthwise_conv(
          (depthwise): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        )
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (4): one_by_one_block(
      (one_by_one_block): Sequential(
        (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (5): dw_block(
      (dw_block): Sequential(
        (0): depthwise_conv(
          (depthwise): Conv2d(64, 64, kernel_size

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.145, momentum=0.9)

In [13]:
import numpy as np
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [14]:
import time
import datetime

def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [15]:
EPOCHS = 100

training_stats = []
total_eval_accuracy = 0
total_train_accuracy = 0
best_avg_val_accuracy = 0

for epoch in range(EPOCHS):  
    
    t0 = time.time()
    total_train_accuracy = 0
    if epoch == 0:
        lr = 0.145
    elif epoch % 2 == 0 and epoch != 0:
        lr *= 0.94
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    
    running_loss = 0.0
    
    for i, data in enumerate(train_dataloader, 0):
        
        if i % 100 == 0 and not i == 0:
            elapsed = format_time(time.time() - t0)
            print('Epoch {}  Batch {:>5,}  of  {:>5,}.  Elapsed: {:}. Loss {}'.format(epoch,i, len(train_dataloader), elapsed,
                                                                                       avg_train_loss))
            
        inputs, labels = data
        optimizer.zero_grad()

        outputs = model(inputs.cuda())
        loss = criterion(outputs, labels.type(torch.LongTensor).cuda())
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        avg_train_loss = running_loss / len(train_dataloader)
        outputs = outputs.detach().cpu().numpy()
        label_ids = labels.detach().to('cpu').numpy()

        total_train_accuracy += flat_accuracy(outputs, label_ids)

    avg_train_accuracy = total_train_accuracy / len(train_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_train_accuracy))
    
    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))

    print("")
    print("Running Validation...")
    
    
    total_eval_accuracy = 0
    total_eval_loss = 0
    
    for i, data in enumerate(validation_dataloader, 0):
        
        inputs, labels = data
        outputs = model(inputs.cuda())
        
        with torch.no_grad():        
            outputs = model(inputs.cuda())
            
        # Accumulate the validation loss.
        loss = criterion(outputs,labels.type(torch.LongTensor).cuda())
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        outputs = outputs.detach().cpu().numpy()
        label_ids = labels.detach().to('cpu').numpy()

        total_eval_accuracy += flat_accuracy(outputs, label_ids)

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    
    training_stats.append(
        {
            'epoch': epoch + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
        }
    )
    
    if best_avg_val_accuracy < avg_val_accuracy:
        best_avg_val_accuracy = avg_val_accuracy
        torch.save(model.state_dict(), 'working/checkpoint_'+str(epoch)+'.pth')
            
print('Finished Training')

Epoch 0  Batch   100  of  1,122.  Elapsed: 0:00:39. Loss 0.2404259800273467
Epoch 0  Batch   200  of  1,122.  Elapsed: 0:01:17. Loss 0.4474413409802578
Epoch 0  Batch   300  of  1,122.  Elapsed: 0:01:54. Loss 0.6530947810613322
Epoch 0  Batch   400  of  1,122.  Elapsed: 0:02:32. Loss 0.8557108523798924
Epoch 0  Batch   500  of  1,122.  Elapsed: 0:03:10. Loss 1.0577036815956102
Epoch 0  Batch   600  of  1,122.  Elapsed: 0:03:47. Loss 1.2588496535432105
Epoch 0  Batch   700  of  1,122.  Elapsed: 0:04:25. Loss 1.454557888435595
Epoch 0  Batch   800  of  1,122.  Elapsed: 0:05:02. Loss 1.6425933745455614
Epoch 0  Batch   900  of  1,122.  Elapsed: 0:05:39. Loss 1.8157208833889953
Epoch 0  Batch 1,000  of  1,122.  Elapsed: 0:06:17. Loss 1.9825498790026985
Epoch 0  Batch 1,100  of  1,122.  Elapsed: 0:06:54. Loss 2.1357865814856667
  Accuracy: 0.16

  Average training loss: 2.17

Running Validation...
  Accuracy: 0.29
  Validation Loss: 1.98
Epoch 1  Batch   100  of  1,122.  Elapsed: 0:00:35. L

KeyboardInterrupt: 

In [None]:
total_test_accuracy = 0
total_test_loss = 0

for i, batch in enumerate(test_dataloader):

    inputs, labels = batch
    with torch.no_grad():        
        outputs = model(inputs.cuda())

    # Accumulate the validation loss.
    loss = criterion(outputs,labels.type(torch.LongTensor).cuda())
    total_test_loss += loss.item()

    # Move logits and labels to CPU
    outputs = outputs.detach().cpu().numpy()
    label_ids = labels.detach().to('cpu').numpy()

    total_test_accuracy += flat_accuracy(outputs, label_ids)


avg_test_accuracy = total_test_accuracy / len(test_dataloader)
print("  Accuracy: {0:.2f}".format(avg_test_accuracy))

# Calculate the average loss over all of the batches.
avg_test_loss = total_test_loss / len(test_dataloader)
print("  Test Loss: {0:.2f}".format(avg_test_loss))