# **UTMIST: 3D ResNet Model**

## **Import Libraries**

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = "retina"

import torch
import numpy as np
from torchvision import datasets, models
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
from torch import optim
import torch.nn as nn
import torch.nn.functional as F

## **Loading Data**

In [None]:
#Potentially try to get kinetics dataset
!tar xvzf https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz

In [None]:
#Temporary Kinetic-400 dataset only for testing mode. Read more here: https://pytorch.org/docs/stable/torchvision/datasets.html#kinetics-400
# number of subproccesses to use for data loading

#This code isn't working. I will try to research more into this: https://github.com/pytorch/vision/issues/1271 or this to help: https://github.com/NVIDIA/nvvl or to just see if 

num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training data to be used as validation
valid_size = 0.2

# convert data to a normalized torch.FloatTensor
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# choose the training and test datasets
train_data = datasets.Kinetics400(root='data', transform=transform)
test_data = datasets.Kinetics400(root='data', transform=transform)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders(combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size, num_workers=num_workers)


# print out some data stats
print('Num training images: ', len(train_data))
print('Num test images: ', len(test_data))

TypeError: ignored

## **Visualize A Batch Of Training Data**

## **Checking If GPU Is Available**

In [None]:
#Use GPU if it's available, may not work with Azure
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("The ML code in this project will be trained on " + str(device))

## **Defining The ML Model**

This is done as per section **3.1 Network Architecture** in the [paper](https://arxiv.org/pdf/1708.07632.pdf) we are reproducing.

### **3D ResNet-18**

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv_1 = nn.Sequential(nn.Conv3d(in_channels=3, out_channels=64, kernel_size=7, stride=1, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=64),
                                    nn.ReLU())
        
        self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1) #swap w/ avg pool if this doesn't work
        

        self.conv_2 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=64),
                                    nn.ReLU())

        self.conv_3 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU())
        
        self.conv_3_1 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=64, out_channels=128 stride=2,), 
                                    nn.BatchNorm3d(num_features=128))

        self.conv_4 = nn.Sequential(nn.Conv3d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=256),
                                    nn.ReLU())    

        self.conv_4_1 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=128, out_channels=256, stride=2), 
                                    nn.BatchNorm3d(num_features=256))   

        self.conv_5 = nn.Sequential(nn.Conv3d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=512),
                                    nn.ReLU())

        self.conv_5_1 = nn.Sequential(nn.Conv3d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=512),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=256, out_channels=512, stride=2), 
                                    nn.BatchNorm3d(num_features=512))                                            
                                      
        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))

        self.fc = nn.Linear(in_features=512, out_features=400)

        self.downsample = nn.Sequential(conv1x1x1(in_channels=in_channels, out_channels=out_channels, stride=2), nn.BatchNorm3d(planes * block.expansion))  
      
        
    def forward(self, x):
        # flatten image input
        x = x.view(x.size(0), -1)
        #conv_1      
        x = x.conv_1
        #conv_2 
        x = x.conv_2
        x = x.conv_2
        #conv_3
        x = conv_3_1
        x = x.conv_3
        #conv_4
        x = x.conv_4_1
        x = x.conv_4
        #conv_5
        x = x.conv_5_1
        x = x.conv_5
        #avgpool
        x = self.avgpool(x)
        #fully_connected_layer
        x = self.fc(x)
        x = F.softmax(dim=3)
        return x


# initialize the CNN
model = Net()
               
#Defining the loss
criterion = nn.softmax()

# Defining the optimizer
optimizer = optim.SGD(model, lr=0.001)

#Sending model to CPU/GPU, may not work with Azure (check)
model.to(device);


### **3d ResNet-34**

In [None]:
class Net34(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv_1 = nn.Sequential(nn.Conv3d(in_channels=3, out_channels=64, kernel_size=7, stride=1, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=64),
                                    nn.ReLU())
        
        self.maxpool = nn.MaxPool3d((3, 3, 3)) #swap w/ avg pool if this doesn't work

        self.conv_2 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=64),
                                    nn.ReLU())

        self.conv_3 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU())
        
        self.conv_3_1 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=64, out_channels=128 stride=2,), 
                                    nn.BatchNorm3d(num_features=128))

        self.conv_4 = nn.Sequential(nn.Conv3d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=256),
                                    nn.ReLU())    

        self.conv_4_1 = nn.Sequential(nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=128),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=128, out_channels=256, stride=2), 
                                    nn.BatchNorm3d(num_features=256))   

        self.conv_5 = nn.Sequential(nn.Conv3d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=512),
                                    nn.ReLU())

        self.conv_5_1 = nn.Sequential(nn.Conv3d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, bias=False),
                                    nn.BatchNorm3d(num_features=512),
                                    nn.ReLU(),
                                    conv1x1x1(in_channels=256, out_channels=512, stride=2), 
                                    nn.BatchNorm3d(num_features=512))                                            
                                      
        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))

        self.fc = nn.Linear(in_features=512, out_features=400)

        self.downsample = nn.Sequential(conv1x1x1(in_channels=in_channels, out_channels=out_channels, stride=2), nn.BatchNorm3d(planes * block.expansion))  
      
        
    def forward(self, x):
        # flatten image input
        x = x.view(x.size(0), -1)
        #conv_1      
        x = x.conv_1
        #conv_2 
        x = x.conv_2
        x = x.conv_2
        x = x.conv_2
        #conv_3
        x = conv_3_1
        x = x.conv_3
        x = x.conv_3
        x = x.conv_3
        #conv_4
        x = x.conv_4_1
        x = x.conv_4
        x = x.conv_4
        x = x.conv_4
        x = x.conv_4
        x = x.conv_4
        #conv_5
        x = x.conv_5_1
        x = x.conv_5
        x = x.conv_5
        #avgpool
        x = self.avgpool(x)
        #fully_connected_layer
        x = self.fc(x)
        x = F.softmax(dim=3)
        return x


# TODO: Initialize Model, Criterion, Optimizer

# initialize the CNN
model = Net34()
               
#Defining the loss
criterion = nn.softmax()

# Defining the optimizer
optimizer = optim.SGD(model, lr=0.001)

#Sending model to CPU/GPU
model.to(device);


## **Training loop**

In [None]:
# TODO: Training Loop, azure machine learning distributive?

n_epochs = 1
epochs_no_improve = 0
n_epochs_stop = 5

# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity

for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train() # prep model for training
    for i, (data, target) in train_loader:
        # move data and target tensors to the default device
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
    ######################    
    # validate the model #
    ######################
    model.eval() # prep model for evaluation
    for data, target in valid_loader:
        # move data and target tensors to the default device
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update running validation loss 
        valid_loss += loss.item()*data.size(0)
        
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        valid_loss
        ))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_cifar.pt')
        valid_loss_min = valid_loss
        epochs_no_improve = 0
    
    else:
        epochs_no_improve += 1
        # Check early stopping condition
        if epochs_no_improve == n_epochs_stop:
            print('Early stopping!')
            break