## Imports

In [2]:
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2D, BatchNorm, Loss Functions
import torch.optim as optim # All optimization algorithms, SGD, Adam, etc
import torch.nn.functional as F # All functions that don't have any parameter
import torchvision
import torchvision.datasets as datasets # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms # Transformation we can perform on our dataset
from torch.utils.data import Dataset 
from torch.utils.data import DataLoader # Gives easier dataset management and create mini batches
from torch.optim import lr_scheduler
from torch.utils.tensorboard import SummaryWriter # To print to Tensorboard
from torchvision import models

# Utils
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import os
import copy
import re
import cv2
from pathlib import Path

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from PIL import Image
import imageio
from skimage.transform import resize
from skimage import img_as_ubyte, img_as_float32
from sklearn import preprocessing
# import rasterio

%matplotlib inline



In [None]:
# Patch for (https://github.com/pytorch/tutorials/issues/762)
# for Tensorboard projector T-SNE, PCA
# import tensorflow as tf
# import tensorboard as tb
# tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

## Create DNN

In [3]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
# Then we flatten and 4096x4096x1000 Linear Layers

class VGG_net(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG16)
        #make more compact with nn.Sequential
        #Final channel=512 and since it has 5 max pools (224/(2^5)=7.0)
        self.fcs = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
            )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1) # flatten for fcs
        x = self.fcs(x)
        return x
    
    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels
        
        for x in architecture:
            if type(x) == int:
                out_channels = x
                
                layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                                    kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                          nn.BatchNorm2d(x),
                          nn.ReLU()]
                in_channels = x #update in_channel as prev_output
            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]
                
        return nn.Sequential(*layers)        

In [4]:
model = VGG_net(in_channels=3, num_classes=21)
x = torch.randn(32, 3, 224, 224)
print(model(x).shape)

torch.Size([32, 21])


## Set Device

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Data Process

In [6]:
class UCMlanduseDataset(Dataset):
    def __init__(self, train, root_dir, transform=None):
        self.seed = 42
        self.size = (224, 224)
        self.num_channels = 3
        self.num_classes = 21
        self.root_dir = root_dir
        self.transform = transform
        self.test_ratio = 0.2
        self.N = 2100 
        self.train = train
        self._load_data()
        
    def _load_data(self):
        """
        Loads the data from the passes root directory.
        Splits in test/train based on seed
        """
        images = np.zeros([self.N, self.size[0], self.size[1], 3], dtype="uint8")
        labels = []
        filenames = []
        i = 0
        
        ucm_path = Path(self.root_dir)
        
        # read all the files from the image folder
        for item in tqdm(os.listdir(ucm_path)):
            f = ucm_path / item
            if os.path.isfile(f):
                continue
            for tif_name in os.listdir(f):
                sub_f = f / tif_name
                filenames.append(sub_f)
                
                # a few images are a few pixels off, resize them
                image = imageio.imread(sub_f)
                if image.shape[0] != self.size[0] or image.shape[1] != self.size[1]:
#                     print("Resizing image..")
                    image = img_as_ubyte(
                        resize(image, (self.size[0], self.size[1]), anti_aliasing=True)
                    )
                images[i] = img_as_ubyte(image)
                i += 1
                labels.append(item)
        
        labels = np.asarray(labels)
        filenames = np.asarray(filenames)
        
        # sort by filenames
        images = images[filenames.argsort()]
        labels = labels[filenames.argsort()]
        
        # convert to integer labels
        le = preprocessing.LabelEncoder()
        le.fit(np.sort(np.unique(labels)))
        labels = le.transform(labels)
        labels = np.asarray(labels)
        self.label_encoding = list(le.classes_) # remember label encoding
        
        # split into a train and test set as provided data is not presplit
        X_train, X_test, y_train, y_test = train_test_split(
            images,
            labels,
            test_size=self.test_ratio,
            random_state=self.seed,
            stratify=labels,
        )
        
        if self.train:
            self.data = X_train
            self.targets = y_train
        else:
            self.data = X_test
            self.targets = y_test
            

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img = self.data[idx]
        
        # doing this so that it is consistent with all other datasets
        # to return a PIL image
        img = Image.fromarray(img)
        
        if self.transform:
            img = self.transform(img)
            
        return img, self.targets[idx]
    

## Hyperparameters

In [14]:
# Batch Size defines the num of samples to be propagated
# through the network before updating the model parameters

learning_rate = 1e-3
in_channels = 3
num_classes = 21
batch_size = 64
num_epochs = 3
idx_to_class = ['agricultural', 'airplane', 'baseballdiamond', 'beach', 'buildings', 'chaparral', 'denseresidential', 'forest', 'freeway', 'golfcourse', 'harbor', 'intersection', 'mediumresidential', 'mobilehomepark', 'overpass', 'parkinglot', 'river', 'runway', 'sparseresidential', 'storagetanks', 'tenniscourt']

In [15]:
train_dataset = UCMlanduseDataset(train=1, root_dir='./data/UCMerced_LandUse/Images', transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = UCMlanduseDataset(train=0, root_dir='./data/UCMerced_LandUse/Images', transform=transforms.ToTensor())
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:13<00:00,  1.58it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:13<00:00,  1.61it/s]


In [10]:
# Total training samples (images) = 1680
# print(len(train_dataset))
# batch_size = 21
# epochs = 10
# Then,
# 21 samples will be taken at a time to train network
# To go through all 1680 samples it takes 80(1680/21) iterations -> 1 epoch
# This process continues 10 times (epochs).

# One may be limited to small batch sizes based on hardware
# Smaller batches mean each step in gradient descent may be
# less accurate, so it may take longer for the algorithm to 
# converge. But, it has been observed for larger batch size (1k or 2k)
# there is a significant degradation in the quality of the
# model, as measured by its ability to generalize.
# Batch size of 32 or 64 is a good starting point.

1680


## Model and Optimizer

In [11]:
model = VGG_net(in_channels=in_channels, num_classes=num_classes)
model.to(device)

VGG_net(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3),

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)

## Overfit a single batch

In [35]:
# Take out a single batch
data, targets = next(iter(train_loader))

In [37]:
# Initially see if it can overfit single example
batch_size = 1

In [36]:
# Check if all classes are included in batch
sorted(np.unique(targets.numpy()))

[0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

In [38]:
for epoch in range(num_epochs):
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    
    # Get data to cuda if possible
    data = data.to(device)
    targets = targets.to(device)
    
    # forward
    scores = model(data)
    loss = criterion(scores, targets)
    print(loss)
    
    # backward
    optimizer.zero_grad()
    loss.backward()
    
    # gradient descent or adam step
    optimizer.step()

Epoch [1/3]


RuntimeError: CUDA out of memory. Tried to allocate 784.00 MiB (GPU 0; 3.95 GiB total capacity; 2.83 GiB already allocated; 74.50 MiB free; 2.84 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### CLI setup

Make sure run Tensorboard in this directory
```bash
$ docker exec -it ml_lab1 bash
$ conda activate pytorch
$ cd D/pytorch_practice/
$ tensorboard --logdir=runs
```

## Predefined utility functions

In [None]:
def show(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')

In [None]:
def save(model, optimizer, loss, filename):
    save_dict = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss.item()
        }
    torch.save(save_dict, filename)

In [None]:
def get_mean_std(loader):
    # VAR[x] = E[x^2] - E[x]^2
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0

    for data, _ in loader:
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1

    mean = channels_sum/num_batches
    std = (channels_squared_sum/num_batches - mean**2)**0.5

    return mean, std

# mean, std = get_mean_std(train_loader)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=2):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    step = 0

    for epoch in range(num_epochs):
        losses = []
        accuracies = []
        
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
#         for phase in ['train', 'val']:
        for phase in ['train']:
            if phase == 'train':
                dataset_size = dataset_sizes[0]
                dataloader = train_loader
                model.train()  # Set model to training mode
            else:
                dataset_size = dataset_sizes[1]
                dataloader = test_loader
                model.eval()   # Set model to evaluate mode

            # Iterate over data.
            for batch_idx, (inputs, labels) in enumerate(dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                num_corrects = torch.sum(preds == labels.data)
                running_train_acc = float(num_corrects)/float(inputs.shape[0])
                accuracies.append(running_train_acc)
                losses.append(loss.item())
                
                # Tensorboard Running
                ## Data
                img_grid = torchvision.utils.make_grid(inputs)
                writer.add_image('mnist_images', img_grid)
                
                ## Check how the weight changes in specific layer
                writer.add_histogram('fc1', model.fc1.weight)
                
                ## Metrics
                writer.add_scalar('Training Loss', loss.item(), global_step=step)
                writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
                step += 1
                
                ## Check how the model predicts eg. PCA, T-SNE
                features = inputs.reshape(inputs.shape[0], -1)
                class_labels = [idx_to_class[label] for label in preds]
                if batch_idx == 230:
                    writer.add_embedding(features, metadata=class_labels, label_img=inputs, global_step=batch_idx)

            if phase == 'train' and scheduler != None:
                scheduler.step()

            # Tensorboard Epoch
            writer.add_hparams({'lr': learning_rate, 'bsize': batch_size},
                       {'accuracy': sum(accuracies)/len(accuracies), 
                       'loss': sum(losses)/len(losses)})
    
            epoch_loss = sum(losses)/len(losses)
            epoch_acc = sum(accuracies)/len(accuracies)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    # save model
#     save(model, optimizer, loss, 'resnet18-mask-saved.pth')
    
    return model

## Hyper Parameter search

In [None]:
mean, std = get_mean_std(train_loader)
print(mean, std)

In [None]:
# Num of parameters to train
sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
# Test Dataloader and Model
train_iter = iter(train_loader)
samples , labels = train_iter.next()
print(samples.shape, labels.shape)
print([idx_to_class[i] for i in labels[:6]])
img_grid = torchvision.utils.make_grid(samples)
show(img_grid)

print(model(samples.to(device)))

In [None]:
batch_sizes = [256]
learning_rates = [0.01, 0.001]
dataset_sizes = [len(train_dataset), len(test_dataset)]

# scheduler
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        # Reset Model
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        # Dataset and Optimizer according to hyperparameter
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        # Tensorboard file
        writer = SummaryWriter(f'runs/<Project>/MiniBatchSize {batch_size} LR {learning_rate}')
        train_model(model, criterion, optimizer, None, num_epochs)

## Deploy

In [None]:
load_checkpoint = torch.load('resnet18-mask-saved.pth')

In [None]:
resnet = models.resnet18(pretrained=True)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 2)

resnet.load_state_dict(load_checkpoint['model_state_dict'])
resnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=0)

optimizer.load_state_dict(load_checkpoint['optimizer_state_dict'])

In [None]:
# scheduler
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
resnet_trained = train_model(resnet, criterion, optimizer, step_lr_scheduler, num_epochs=2)

## Inference

In [None]:
test_compose = transforms.Compose([transforms.ToPILImage(),
                                   transforms.Resize(224),
                                   transforms.ToTensor(),
                                  ])

In [None]:
def predict(model, numpy_image):
    plt.imshow(numpy_image)
     
    test_image_tensor = test_compose(numpy_image)
    test_image_tensor = test_image_tensor.unsqueeze_(0).to(device)
     
    with torch.no_grad():
        model.eval()
        # Model outputs log probabilities
        out = model(test_image_tensor)
        ps = torch.exp(out)
        topk, topclass = ps.topk(1, dim=1)
        print("Output class :  ", idx_to_class[topclass.cpu().numpy()[0][0]])
        

In [None]:
predict(resnet_trained, test_dataset[47,0])