In [0]:
from torchvision import utils
from dataloader import *
from utils import *
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable
import torch
import time
from datetime import datetime

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
def print_GPU_stats():
    print("total GPU Mem: ", torch.cuda.get_device_properties(device).total_memory)
    print("total GPU Cached: ", torch.cuda.memory_cached(device))
    print("total GPU Allocated: ", torch.cuda.memory_allocated(device))
    print("Available GB: ", (torch.cuda.get_device_properties(device).total_memory - torch.cuda.memory_allocated(device))/(10**9))
print_GPU_stats()

cuda:0
total GPU Mem:  17071734784
total GPU Cached:  0
total GPU Allocated:  0
Available GB:  17.071734784


In [0]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [0]:
# Change name to FCN to use this model instead I think

class FCN_bak(torch.nn.Module):

    def __init__(self, n_class):
        super(FCN_bak, self).__init__()
        self.n_class = n_class
        self.conv1   = nn.Conv2d(3, 32, kernel_size=(3,5), stride=(2,4), padding=1, dilation=1)
        self.bnd1    = nn.BatchNorm2d(32)
        self.conv2   = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, dilation=1)
        self.bnd2    = nn.BatchNorm2d(64)
        self.conv3   = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, dilation=1)
        self.bnd3    = nn.BatchNorm2d(128)
        self.conv4   = nn.Conv2d(128,256, kernel_size=3, stride=2, padding=1, dilation=1)
        self.bnd4    = nn.BatchNorm2d(256)
        self.conv5   = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, dilation=1)
        self.bnd5    = nn.BatchNorm2d(512)
        self.relu    = nn.ReLU(inplace=True)
        
        self.deconv1 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(256)
        self.deconv2 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn2     = nn.BatchNorm2d(128)
        self.deconv3 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn3     = nn.BatchNorm2d(64)
        self.deconv4 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.bn4     = nn.BatchNorm2d(32)
        self.deconv5  = nn.ConvTranspose2d(32, 3, kernel_size=(3, 5), stride=(2,4), padding=1, dilation=1, output_padding=1)
        self.bn5= nn.BatchNorm2d(3)
        self.classifier = nn.Conv2d(3,n_class, kernel_size=1, stride=1, padding=0, dilation=1)
        
    def forward(self, x):
        pool = nn.MaxPool2d(2, stride=2,return_indices = True)
        unpool = nn.MaxUnpool2d(2, stride=2)
        
        x1, indice1 = pool(self.relu(self.conv1(x)))
        x2, indice2 = pool(self.relu(self.conv2(self.bnd1(x1))))
        x3, indice3 = pool(self.relu(self.conv3(self.bnd2(x2))))
        x4, indice4 = pool(self.relu(self.conv4(self.bnd3(x3))))
        x5, indice5 = pool(self.relu(self.conv5(self.bnd4(x4))))
        
        z1 = self.deconv1(self.bnd5(self.relu(unpool((x5), indice5))))
        z2 = self.deconv2(self.bn1(self.relu(unpool((z1), indice4))))
        z3 = self.deconv3(self.bn2(self.relu(unpool((z2), indice3))))
        z4 = self.deconv4(self.bn3(self.relu(unpool((z3), indice2))))
        z5 = self.deconv5(self.bn4(self.relu(unpool((z4), indice1))))
        
        out_decoder = self.classifier(self.bn5(z5))                  

        return out_decoder  # size=(N, n_class, x.H/1, x.W/1)

In [0]:
class AverageBase(object):
    
    def __init__(self, value=0):
        self.value = float(value) if value is not None else None
       
    def __str__(self):
        return str(round(self.value, 4))
    
    def __repr__(self):
        return self.value
    
    def __format__(self, fmt):
        return self.value.__format__(fmt)
    
    def __float__(self):
        return self.value
    

class RunningAverage(AverageBase):
    """
    Keeps track of a cumulative moving average (CMA).
    """
    
    def __init__(self, value=0, count=0):
        super(RunningAverage, self).__init__(value)
        self.count = count
        
    def update(self, value):
        self.value = (self.value * self.count + float(value))
        self.count += 1
        self.value /= self.count
        return self.value


class MovingAverage(AverageBase):
    """
    An exponentially decaying moving average (EMA).
    """
    
    def __init__(self, alpha=0.99):
        super(MovingAverage, self).__init__(None)
        self.alpha = alpha
        
    def update(self, value):
        if self.value is None:
            self.value = float(value)
        else:
            self.value = self.alpha * self.value + (1 - self.alpha) * float(value)
        return self.value

In [0]:
batch_size = 7
num_wrkrs= 2
train_dataset = CityScapesDataset(csv_file='train_local.csv')
val_dataset = CityScapesDataset(csv_file='val_local.csv')
test_dataset = CityScapesDataset(csv_file='test_local.csv')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          num_workers=num_wrkrs,
                          shuffle=True)

In [0]:
def dice_loss(pred, target):
    smooth = 1e-7
    num = pred.size(0)
    # print(pred.size())
    # print(target.size())
    m1 = pred.reshape(num, -1)  # Flatten
    m2 = target.reshape(num, -1)  # Flatten
    # print(m1.size())
    intersection = (m1 * m2).sum()

    return 1 - ((2. * intersection + smooth) / (m1.sum() + m2.sum() + smooth))

In [0]:
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        m.bias.data.zero_()
epochs     = 100
start_epoch = 30
fcn_model = FCN_bak(n_class=34)
# fcn_model.apply(init_weights)
fcn_model.load_state_dict(torch.load('/content/drive/My Drive/CSE253/PA3/Dice/model_02_15_23_42.pt'))
optimizer = optim.Adam(fcn_model.parameters(), lr = 1e-4, weight_decay= 1e-2)

In [0]:
dt = datetime.now().strftime("%m_%d_%H_%M")
output_fn = "/content/drive/My Drive/CSE253/PA3/Dice/model_output_" + dt + ".txt"
best_model_fn = "/content/drive/My Drive/CSE253/PA3/Dice/best_model_" + dt + ".pt"
model_fn = "/content/drive/My Drive/CSE253/PA3/Dice/model_" + dt + ".pt"

def print_info(out_str):
    f = open(output_fn,"a")
    print(out_str)
    f.write(out_str)
    f.close()

# print_info("Started: %s\nFrom a previously trained model which left off on start of epoch 9.\n" % datetime.now())

In [0]:
use_gpu = torch.cuda.is_available()
# use_gpu = False
if use_gpu:
    fcn_model = fcn_model.to(device)
    print("GPU works")
    
best_loss = float('inf')
prev_loss = float('inf')
loss_inc_cnt = 0
stop_early = False

def train():
    softmax = nn.Softmax(dim=1)
    print("Starting Training")
    trn_losses = MovingAverage() 
    trn_accs = MovingAverage() 
    trn_ious = MovingAverage() 

    for epoch in range(start_epoch, epochs):
        
        ts = time.time()
        for iter, (X, tar, Y) in enumerate(train_loader):
            optimizer.zero_grad()
            
            if use_gpu:
                inputs = X.to(device)
                labels_cat = Y.to(device)
                labels_enc = tar.to(device)
            else:
                inputs, labels_cat, labels_enc = X, Y, tar

            outputs = softmax(fcn_model(inputs))
            # labels_enc = torch.nn.functional.one_hot(labels_cat,num_classes =34).permute(0,3,1,2)
            # print(labels_enc.size())
            # print(outputs.size())
            
            # print(outputs.requires_grad)
            # output_labels = F.one_hot(softmax(outputs).argmax(dim=1),num_classes=34).float()
            # output_labels = F.one_hot(fcn_model(inputs).argmax(dim=1),num_classes=34).float()
            # output_labels.requires_grad = True
            # print(output_labels.requires_grad)
            loss = dice_loss(outputs, labels_enc)
            loss.backward()
            optimizer.step()
            loss = loss.item()

            output_labels = outputs.argmax(dim=1)

            trn_losses.update(loss)
            trn_accs.update(pixel_acc(output_labels, labels_cat))
            trn_ious.update(np.nanmean(iou(output_labels, labels_cat)))

            if iter % 10 == 0:
                print_info("epoch{}, iter{}, loss: {} \n".format(epoch, iter, loss))

        print_info("Finish epoch {}, time elapsed {} \n".format(epoch, time.time() - ts))

        loss, acc, IoU = trn_losses.value, trn_accs.value, trn_ious.value

        print_info("Training Check:\tLoss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
        
        val(epoch)
        if stop_early: return
  
def evaluate(data_loader, validation=False, verbose=False):

    global best_loss
    global prev_loss
    global loss_inc_cnt
    global stop_early
    
    with torch.no_grad():
        losses = []
        accs = []
        ious = []
        softmax = nn.Softmax(dim=1)
        ts = time.time()
        print("Starting Evaluation")
        
        for iter, (X, tar, Y) in enumerate(data_loader):

            if use_gpu:
                inputs = X.to(device)
                labels_cat = Y.to(device)
                labels_enc = tar.to(device)
            else:
                inputs, labels_cat, labels_enc = X, Y, tar

            outputs = softmax(fcn_model(inputs))
            # labels_enc = torch.nn.functional.one_hot(labels_cat,num_classes =34).permute(0,3,1,2)
            # output_labels = F.one_hot(outputs.argmax(dim=1),num_classes=34)

            losses.append(dice_loss(outputs, labels_enc).item())

            output_labels = outputs.argmax(dim=1)

            accs.append(pixel_acc(output_labels, labels_cat))
            
            ious.append(np.nanmean(iou(output_labels, labels_cat)))
  
        print("Finished evaluation. Time elapsed %f" % (time.time() - ts))

        # This probably should not be a straight average, but just doing this for now
        loss = np.mean(losses)
        acc = np.mean(accs)
        IoU = np.mean(ious)
        
        if validation:
            if best_loss > loss:
                best_loss = loss
                print_info("Best Loss: " + str(best_loss) + "\n")
                torch.save(fcn_model.state_dict(), best_model_fn)
            loss_inc_cnt = loss_inc_cnt + 1 if prev_loss < loss else 0
            if loss_inc_cnt > 3: stop_early = True
            torch.save(fcn_model.state_dict(), model_fn)
        
        return loss, acc, IoU

def val(epoch):
    # fcn_model.eval()
    # Complete this function - Calculate loss, accuracy and IoU for every epoch
    # Make sure to include a softmax after the output from your model
    loss, acc, IoU = evaluate(val_loader, validation=True)
    print_info("Validation Results: Loss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
    if stop_early: print_info("Epoch %d:\tStopping Early" % (epoch))
    
def test():
    print(' ')
    # Complete this function - Calculate accuracy and IoU 
    # Make sure to include a softmax after the output from your model
    loss, acc, IoU = evaluate(test_loader)
    print_info("Test Results:\tLoss: %f\tAccuracy: %f\tIoU: %f \n" % (loss, acc * 100, IoU))
    
if __name__ == "__main__":
    # val(0)  # show the accuracy before training
    # print_info("---------Above is accuracy before training.---------\n")
    train()
    # test()

GPU works
Starting Training
epoch30, iter0, loss: 0.471845805644989 

epoch30, iter10, loss: 0.48630237579345703 

epoch30, iter20, loss: 0.5327383279800415 

epoch30, iter30, loss: 0.4254082441329956 

epoch30, iter40, loss: 0.5251901149749756 

epoch30, iter50, loss: 0.5396333932876587 

epoch30, iter60, loss: 0.49754029512405396 

epoch30, iter70, loss: 0.5022299289703369 

epoch30, iter80, loss: 0.471383273601532 

epoch30, iter90, loss: 0.49218785762786865 

epoch30, iter100, loss: 0.4511209726333618 

epoch30, iter110, loss: 0.48105841875076294 

epoch30, iter120, loss: 0.5135829448699951 

epoch30, iter130, loss: 0.43207961320877075 

epoch30, iter140, loss: 0.45331746339797974 

epoch30, iter150, loss: 0.4256807565689087 

epoch30, iter160, loss: 0.45807355642318726 

epoch30, iter170, loss: 0.4973229765892029 

epoch30, iter180, loss: 0.5460232496261597 

epoch30, iter190, loss: 0.44363176822662354 

epoch30, iter200, loss: 0.5729594230651855 

epoch30, iter210, loss: 0.530315