In [0]:
%matplotlib inline

import numpy as np

from torchvision import utils
from dataloader_resize import *
from utils import *
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable
import torch
import time
from datetime import datetime

import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

In [0]:
class AverageBase(object):
    
    def __init__(self, value=0):
        self.value = float(value) if value is not None else None
       
    def __str__(self):
        return str(round(self.value, 4))
    
    def __repr__(self):
        return self.value
    
    def __format__(self, fmt):
        return self.value.__format__(fmt)
    
    def __float__(self):
        return self.value
    

class RunningAverage(AverageBase):
    """
    Keeps track of a cumulative moving average (CMA).
    """
    
    def __init__(self, value=0, count=0):
        super(RunningAverage, self).__init__(value)
        self.count = count
        
    def update(self, value):
        self.value = (self.value * self.count + float(value))
        self.count += 1
        self.value /= self.count
        return self.value


class MovingAverage(AverageBase):
    """
    An exponentially decaying moving average (EMA).
    """
    
    def __init__(self, alpha=0.99):
        super(MovingAverage, self).__init__(None)
        self.alpha = alpha
        
    def update(self, value):
        if self.value is None:
            self.value = float(value)
        else:
            self.value = self.alpha * self.value + (1 - self.alpha) * float(value)
        return self.value

In [0]:
path = '/content/drive/My Drive/CSE253/PA3/Base_TL/'
dt = datetime.now().strftime("%m_%d_%H_%M")
output_fn = path + "model_output_" + dt + ".txt"
best_model_fn = path + "best_model_" + dt + ".pt"
model_fn = path + "model_" + dt + ".pt"

def print_info(out_str):
    f = open(output_fn,"a")
    print(out_str)
    f.write(out_str)
    f.close()

# print_info("Started: %s\nFrom a previously trained model which left off on start of epoch 9.\n" % datetime.now())
print_info("Started: %s\n" % datetime.now())

In [0]:
class FCN_TL(torch.nn.Module):

    def __init__(self, n_class):
        super(FCN_TL, self).__init__()
        self.n_class = n_class
        self.relu    = nn.ReLU(inplace=True)
        # Parameters of newly constructed modules have requires_grad=True by default
        self.model_conv = torchvision.models.resnet50(pretrained=True)
        self.in_ftrs = 1000 
        for param in self.model_conv.parameters():
          param.requires_grad = False
        
        self.conv1   = nn.Conv2d(self.in_ftrs, 512, kernel_size=2, stride=1, padding=1, dilation=1)
        self.cbnd1   = nn.BatchNorm2d(512)
        self.conv2   = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=2, dilation=1)
        self.cbnd2   = nn.BatchNorm2d(256)
        self.deconv1 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(128)
        self.deconv2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(64)
        self.deconv3 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(32)
        self.deconv4 = nn.ConvTranspose2d(32, 16, kernel_size=5, stride=4, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(16)
        self.deconv5 = nn.ConvTranspose2d(16, 8, kernel_size=5, stride=4, padding=1, dilation=1, output_padding=1)
        self.bn5     = nn.BatchNorm2d(8)
        self.deconv6 = nn.ConvTranspose2d(8, 4, kernel_size=(3,5), stride=(2,4), padding=1, dilation=1, output_padding=1)
        self.bn6     = nn.BatchNorm2d(4)
        self.classifier = nn.Conv2d(4,self.n_class, kernel_size=1, stride=1, padding=0, dilation=1)
        
    def forward(self, x):
        z = self.model_conv(x).unsqueeze_(-1).unsqueeze_(-1)
        z = self.cbnd1(self.relu(self.conv1(z)))
        z = self.cbnd2(self.relu(self.conv2(z)))
        z = self.bn1(self.relu(self.deconv1(z)))
        z = self.bn2(self.relu(self.deconv2(z)))
        z = self.bn3(self.relu(self.deconv3(z)))
        z = self.bn4(self.relu(self.deconv4(z)))
        z = self.bn5(self.relu(self.deconv5(z)))
        z = self.bn6(self.relu(self.deconv6(z)))
        
        out_decoder = self.classifier(z)                  

        return out_decoder  # size=(N, n_class, x.H/1, x.W/1)

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
def print_GPU_stats():
    print("total GPU Mem: ", torch.cuda.get_device_properties(device).total_memory)
    print("total GPU Cached: ", torch.cuda.memory_cached(device))
    print("total GPU Allocated: ", torch.cuda.memory_allocated(device))
    print("Available GB: ", (torch.cuda.get_device_properties(device).total_memory - torch.cuda.memory_allocated(device))/(10**9))
print_GPU_stats()

In [0]:
batch_size = 8
num_wrkrs = 4
train_dataset = CityScapesDataset(csv_file='train_local.csv')
val_dataset = CityScapesDataset(csv_file='val_local.csv')
test_dataset = CityScapesDataset(csv_file='test_local.csv')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)

In [0]:
def init_weights(m):
    if isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        m.bias.data.zero_()



fcn_model = FCN_TL(n_class=34)
fcn_model.apply(init_weights)
fcn_model = fcn_model.to(device)


epochs     = 100
start_epoch = 1
criterion = torch.nn.CrossEntropyLoss()
# fcn_model.load_state_dict(torch.load(path + 'model_02_13_08_47.pt'))
optimizer = optim.Adam(fcn_model.parameters(), lr=5e-4, weight_decay=1e-3)
# optimizer = optim.SGD(model_conv.parameters(), lr=5e-4, momentum=0.9)

In [0]:
use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu:
    fcn_model = fcn_model.to(device)
    
best_loss = float('inf')
prev_loss = float('inf')
loss_inc_cnt = 0
stop_early = False

def train():
    softmax = nn.Softmax(dim=1)
    print("Starting Training")

    for epoch in range(start_epoch, epochs):
        trn_losses = MovingAverage()
        trn_accs = MovingAverage()
        trn_ious = MovingAverage()
        ts = time.time()
        for iter, (X, tar, Y) in enumerate(train_loader):
            optimizer.zero_grad()

            if use_gpu:
                inputs = X.to(device)
                labels_cat = Y.to(device)
            else:
                inputs, labels_cat, labels_enc = X, Y, tar
            
            outputs = softmax(fcn_model(inputs))
            loss = criterion(outputs, labels_cat)
            loss.backward()
            optimizer.step()
            loss = loss.item()

            output_labels = outputs.argmax(dim=1)

            trn_losses.update(loss)
            trn_accs.update(pixel_acc(output_labels, labels_cat))
            trn_ious.update(np.nanmean(iou(output_labels, labels_cat)))
            
            if iter % 10 == 0:
                print_info("epoch{}, iter{}, loss: {} \n".format(epoch, iter, loss))
                
        
        print_info("Finish epoch {}, time elapsed {} \n".format(epoch, time.time() - ts))
    
        loss, acc, IoU = trn_losses.value, trn_accs.value, trn_ious.value 

        print_info("Training Check:\tLoss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
        
        val(epoch)
        if stop_early: return

def evaluate(data_loader, validation=False, verbose=False):

    global best_loss
    global prev_loss
    global loss_inc_cnt
    global stop_early
    
    with torch.no_grad():
        losses = RunningAverage()
        accs = RunningAverage()
        ious = RunningAverage()
        softmax = nn.Softmax(dim=1)
        ts = time.time()
        print("Starting Evaluation")
        
        for iter, (X, tar, Y) in enumerate(data_loader):

            if use_gpu:
                inputs = X.to(device)
                labels_cat = Y.to(device)
            else:
                inputs, labels_cat, labels_enc = X, Y, tar

            outputs = softmax(fcn_model(inputs))

            output_labels = outputs.argmax(dim=1)

            losses.update(criterion(outputs, labels_cat).item())

            accs.update(pixel_acc(output_labels, labels_cat))

            ious.update(np.nanmean(iou(output_labels, labels_cat)))

        print("Finished evaluation. Time elapsed %f" % (time.time() - ts))

        # This probably should not be a straight average, but just doing this for now
        loss = losses.value 
        acc = accs.value 
        IoU = ious.value 
        
        if validation:
            if best_loss > loss:
                best_loss = loss
                print_info("Best Loss: " + str(best_loss) + "\n")
                torch.save(fcn_model.state_dict(), best_model_fn)
            loss_inc_cnt = loss_inc_cnt + 1 if prev_loss < loss else 0
            if loss_inc_cnt > 3: stop_early = True
            torch.save(fcn_model.state_dict(), model_fn)
        
        return loss, acc, IoU

def val(epoch):
    # fcn_model.eval()
    # Complete this function - Calculate loss, accuracy and IoU for every epoch
    # Make sure to include a softmax after the output from your model
    loss, acc, IoU = evaluate(val_loader, validation=True)
    print_info("Validation Results: Loss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
    if stop_early: print_info("Epoch %d:\tStopping Early" % (epoch))
    
def test():
    print(' ')
    # Complete this function - Calculate accuracy and IoU 
    # Make sure to include a softmax after the output from your model
    loss, acc, IoU = evaluate(test_loader)
    print_info("Test Results:\tLoss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
    
if __name__ == "__main__":
    # val(0)  # show the accuracy before training
    # print_info("---------Above is accuracy before training.---------\n")
    train()
    test()