In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import os.path
import pandas as pd
import numpy as np
import math
import scipy.ndimage as ndimage
import random
import matplotlib
import matplotlib.pyplot as plt
from skimage import measure
%matplotlib inline
?
if os.getcwd() != '/content/drive/My Drive/DL_Final_Project':
  %cd 'drive/My Drive/DL_Final_Project'

from data_helper import LabeledDataset
from torch.utils.data import Dataset

matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from data_helper import UnlabeledDataset, LabeledDataset
from helper import collate_fn, draw_box

/content/drive/My Drive/DL_Final_Project


In [0]:
################################ UNet Model ######################################

def initialize_weights(*models):
    for model in models:
        for module in model.modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight)
                if module.bias is not None:
                    module.bias.data.zero_()
            elif isinstance(module, nn.BatchNorm2d):
                module.weight.data.fill_(1)
                module.bias.data.zero_()
                
class _EncoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=False):
        super(_EncoderBlock, self).__init__()
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        ]
        if dropout:
            layers.append(nn.Dropout())
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        self.encode = nn.Sequential(*layers)

    def forward(self, x):
        return self.encode(x)


class _DecoderBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels):
        super(_DecoderBlock, self).__init__()
        self.decode = nn.Sequential(
            nn.Conv2d(in_channels, middle_channels, kernel_size=3),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(middle_channels, middle_channels, kernel_size=3),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=2, stride=2),
        )

    def forward(self, x):
        return self.decode(x)
    
class _DecoderBlock2(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels):
        super(_DecoderBlock2, self).__init__()
        self.decode = nn.Sequential(
            #nn.Conv2d(in_channels, middle_channels, kernel_size=3),
            nn.Conv2d(in_channels, middle_channels, kernel_size=2),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            #nn.Conv2d(middle_channels, middle_channels, kernel_size=3),
            nn.Conv2d(middle_channels, middle_channels, kernel_size=2),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=2, stride=2),
        )

    def forward(self, x):
        return self.decode(x)
    
class UNet(nn.Module):
    def __init__(self,num_classes, semi_supervised=False):
        super(UNet, self).__init__()
        
        if not semi_supervised: #use regular encoder block
            self.enc1 = _EncoderBlock(3, 64)
        else: #if semi_supervised
            self.enc1 = _EncoderBlock(1, 64)
            
        self.enc2 = _EncoderBlock(64, 128)
        self.enc3 = _EncoderBlock(128, 256)
        self.enc4 = _EncoderBlock(256, 512, dropout=True)
        
        if not semi_supervised: #use regular decoder block
            self.center = _DecoderBlock(512, 1024, 512)
            self.dec4 = _DecoderBlock(1024, 512, 256)
            self.dec3 = _DecoderBlock(512, 256, 128)
            self.dec2 = _DecoderBlock(256, 128, 64)
        else: #if semi_supervised
            self.center = _DecoderBlock2(512, 1024, 512)
            self.dec4 = _DecoderBlock2(1024, 512, 256)
            self.dec3 = _DecoderBlock2(512, 256, 128)
            self.dec2 = _DecoderBlock2(256, 128, 64)
            
        self.dec1 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )
        self.final = nn.Conv2d(64, num_classes, kernel_size=1) #2 because binary 0/1
        initialize_weights(self)

    def forward(self, x):
        enc1 = self.enc1(x)
        enc2 = self.enc2(enc1)
        enc3 = self.enc3(enc2)
        enc4 = self.enc4(enc3)
        center = self.center(enc4)
        dec4 = self.dec4(torch.cat([center, F.interpolate(enc4, center.size()[2:], mode='bilinear',align_corners=True)], 1))
        dec3 = self.dec3(torch.cat([dec4, F.interpolate(enc3, dec4.size()[2:], mode='bilinear',align_corners=True)], 1))
        dec2 = self.dec2(torch.cat([dec3, F.interpolate(enc2, dec3.size()[2:], mode='bilinear',align_corners=True)], 1))
        dec1 = self.dec1(torch.cat([dec2, F.interpolate(enc1, dec2.size()[2:], mode='bilinear',align_corners=True)], 1))
        final = self.final(dec1)
        output = F.interpolate(final, 800, mode='bilinear',align_corners=True) #upsample to be 800x800
        return output

################################ End of UNet Model ###########################################
    
################################ Bounding Box Functions ######################################

def get_bboxes_from_output(model_output): #v2
    test_label = measure.label(model_output.cpu())
    output = test_label.copy()
    bboxes = []

    props = measure.regionprops(test_label)

    for prop in props:
        fy,fx,by,bx = prop.bbox
        fy, fx, by, bx = [min(fy,799), min(fx,799), min(by, 799), min(bx, 799)]
        flx, frx, blx, brx, fly, bly, fry, bry = (fx, fx, bx, bx, fy, fy, by, by)

        output[fy:by, fx-1:fx+1] = 50
        output[fy:by, bx-1:bx+1] = 50
        output[fy-1:fy+1, fx:bx] = 50
        output[by-1:by+1, fx:bx] = 50

        this_bbox = np.array([[flx, frx, blx, brx], [fly, fry, bly, bry]])
        this_bbox = (this_bbox - 400)/10
        bboxes.append(this_bbox)

    return torch.tensor(bboxes)

def bbox_to_label_bionary(target_object):
    categories = target_object[0]['category']
    bboxes = target_object[0]['bounding_box']

    output = np.zeros((800,800))

    for i in range(len(bboxes)):
        this_bbox = bboxes[i]
        flx, frx, blx, brx = this_bbox[0]
        fly, fry, bly, bry = this_bbox[1]
        fx = math.floor(10*((flx + frx)/2) + 400)
        bx = math.floor(10*((blx + brx)/2) + 400)
        fy = math.floor(10*((fly + bly)/2) + 400)
        by = math.floor(10*((fry + bry)/2) + 400)

        output[fy:by, fx:bx] = 1
        output[by:fy, bx:fx] = 1

    return torch.tensor(output)

def frankenstein(image_object):
    this_image = image_object[0]
    front = torch.cat((this_image[0], this_image[1], this_image[2]), 2)
    back = torch.cat((this_image[5], this_image[4], this_image[3]), 2)
    all_images = torch.cat((front, back), 1)
    all_images = all_images.unsqueeze(0)

    return all_images

################################ End Bounding Box Functions ######################################

In [0]:
################################ Pretrained Model/Feature Extractor ######################################
class Unsupervised_Model(nn.Module):
    def __init__(self):
        super(Unsupervised_Model, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=10, stride=1)
        self.conv1_bn = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=10, kernel_size=5, stride=1)
        self.conv2_bn = nn.BatchNorm2d(10)
        self.conv3 = nn.Conv2d(in_channels=10, out_channels=3, kernel_size=5, stride=1)
        self.conv3_bn = nn.BatchNorm2d(3)
        self.linear1 = nn.Linear(in_features=34, out_features=256)
        self.linear2 = nn.Linear(in_features=81, out_features=306)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.conv1_bn(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.conv2(x)
        x = F.relu(self.conv2_bn(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.conv3(x)
        x = F.relu(self.conv3_bn(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = x.view(-1, 81, 34)  
        x = F.relu(self.linear1(x))
        x = x.view(-1, 256, 81)
        x = self.linear2(x)
      
        return x
    
################################ End of Pretrained Model/Feature Extractor ######################################

################################ Freeze/Unfreeze Methods ######################################
def rgb2gray(rgb):
    r, g, b = rgb[0,:,:], rgb[1,:,:], rgb[2,:,:]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray

class Identity(torch.nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

def freeze_model(model):
    for param in model.parameters():
        param.requires_grad = False
        
def unfreeze_model(model):
    for param in model.parameters():
        param.requires_grad = True
        
 ################################ End of Freeze/Unfreeze Methods ######################################


In [0]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")

image_folder = 'data'
annotation_csv = 'data/annotation.csv'

# You shouldn't change the unlabeled_scene_index
# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# The scenes from 106 - 133 are labeled
# You should devide the labeled_scene_index into two subsets (training and validation)
labeled_scene_index = np.arange(106, 134)
train_scene_index = np.arange(106,130)
val_scene_index = np.arange(130,134)

transform = torchvision.transforms.ToTensor()

# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )

labeled_valset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
trainloader = torch.utils.data.DataLoader(labeled_trainset, batch_size=1, shuffle=True, num_workers=2, collate_fn=collate_fn)
valloader = torch.utils.data.DataLoader(labeled_valset, batch_size=1, shuffle=True, num_workers=2, collate_fn=collate_fn)

unlabeled_trainset = UnlabeledDataset(image_folder=image_folder, scene_index=unlabeled_scene_index, first_dim='sample', transform=transform)
unlabeled_trainloader = torch.utils.data.DataLoader(unlabeled_trainset, batch_size=1, shuffle=True, num_workers=2)

def test_supervised_model(model, is_bbox):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    with torch.no_grad():
        total_loss = 0
        model.eval()
        count = 0
        for sample, target, road_image, extra in valloader:
            count += 1
            model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
            model_input = model_input.to(device)
            if is_bbox:
                label = bbox_to_label_bionary(target).unsqueeze(0)
                label = label.to(device)
            else:
                label = road_image[0].unsqueeze(0)
                label = label.to(device)

            outputs = model(model_input)

            loss = criterion_entropy(outputs, label.long())
            total_loss += loss

    return total_loss/len(valloader)

def test_semisupervised_model(model, feature_extractor, is_bbox):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    with torch.no_grad():
        total_loss = 0
        model.eval()
        feature_extractor.eval()

        for sample, target, road_image, extra in valloader:

            model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
            model_input = model_input.to(device)
            if is_bbox:
                label = bbox_to_label_bionary(target).unsqueeze(0)
                label = label.to(device)
            else:
                label = road_image[0].unsqueeze(0)
                label = label.to(device)

            model_input = feature_extractor(model_input).unsqueeze(0)
            outputs = model(model_input)

            loss = criterion_entropy(outputs, label.long())
            total_loss += loss

    return total_loss/len(valloader)

### Supervised Models

In [0]:
#initializing models

supervised_roadgraph = UNet(num_classes=2,semi_supervised=False)
supervised_roadgraph.to(device)


supervised_bbox = UNet(num_classes=2,semi_supervised=False)
supervised_bbox.to(device)

criterion_entropy = nn.CrossEntropyLoss()
criterion_mse = torch.nn.MSELoss(reduction='mean')

In [0]:
#Training roadgraph

optimizer = torch.optim.SGD(supervised_roadgraph.parameters(),lr=.01, momentum=0.9)
for epoch in range(1,11):
    for batch_idx,(sample, target, road_image, extra) in enumerate(trainloader):
        supervised_roadgraph.train()
        label = road_image[0].unsqueeze(0)
        label = label.to(device)
        
        model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
        model_input = model_input.to(device)

        optimizer.zero_grad()
        
        preds = supervised_roadgraph(model_input)
        
        loss = criterion_entropy(preds, label.long())

        loss = loss.type(torch.DoubleTensor)

        loss.backward()
        
        optimizer.step()

        
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
            print('avg. validation loss:',float(test_supervised_model(supervised_roadgraph, is_bbox=False)))

    torch.save(supervised_roadgraph.state_dict(), 'supervised_roadgraph_final')

avg. validation loss: 0.7177907824516296


KeyboardInterrupt: ignored

In [0]:
#Training bboxes

optimizer = torch.optim.SGD(supervised_bbox.parameters(),lr=.01, momentum=0.9)
for epoch in range(1,11):
    for batch_idx,(sample, target, road_image, extra) in enumerate(trainloader):
        supervised_bbox.train()
        label = bbox_to_label_bionary(target).unsqueeze(0)
        label = label.to(device)
        
        model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
        model_input = model_input.to(device)

        optimizer.zero_grad()
        
        preds = supervised_bbox(model_input)
        
        loss = criterion_entropy(preds, label.long())

        loss = loss.type(torch.DoubleTensor)

        loss.backward()
        
        optimizer.step()

        
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
            print('avg. validation loss:',float(test_supervised_model(supervised_bbox, is_bbox=True)))

    torch.save(supervised_bbox.state_dict(), 'supervised_bbox_final')



KeyboardInterrupt: ignored

### Training the Feature Extractor

In [0]:
feature_extractor = Unsupervised_Model()
feature_extractor.to(device)

Unsupervised_Model(
  (conv1): Conv2d(3, 20, kernel_size=(10, 10), stride=(1, 1))
  (conv1_bn): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(20, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2_bn): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(10, 3, kernel_size=(5, 5), stride=(1, 1))
  (conv3_bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=34, out_features=256, bias=True)
  (linear2): Linear(in_features=81, out_features=306, bias=True)
)

In [0]:
#training feature extractor

optimizer = torch.optim.SGD(feature_extractor.parameters(),lr=.01, momentum=0.9)
for epoch in range(1,2):
    for batch_idx,(image) in enumerate(unlabeled_trainloader):
          feature_extractor.train()
          model_input = torch.mean(image[0], axis=0).unsqueeze(0)
          label = rgb2gray(model_input[0]).unsqueeze(0)

          model_input = model_input.to(device)
          label = label.to(device)

          optimizer.zero_grad()

          preds = feature_extractor(model_input)

          loss = criterion_mse(preds, label)


          loss.backward()
        
          optimizer.step()

          if batch_idx % 500 == 0:
                torch.save(feature_extractor.state_dict(), 'feature_extractor_final')
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx, len(unlabeled_trainloader.dataset),
                    100. * batch_idx / len(unlabeled_trainloader), loss.item()))

    torch.save(feature_extractor.state_dict(), 'feature_extractor_final')
          





KeyboardInterrupt: ignored

### Semi-Supervised Models

In [0]:
semisupervised_roadgraph = UNet(num_classes=2,semi_supervised=True)
semisupervised_roadgraph.to(device)

semisupervised_bbox = UNet(num_classes=2,semi_supervised=True)
semisupervised_bbox.to(device)

feature_extractor = Unsupervised_Model()
feature_extractor.load_state_dict(torch.load('feature_extractor_final'))
#feature_extractor.linear2 = Identity()
freeze_model(feature_extractor)
feature_extractor.to(device)

Unsupervised_Model(
  (conv1): Conv2d(3, 20, kernel_size=(10, 10), stride=(1, 1))
  (conv1_bn): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(20, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2_bn): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(10, 3, kernel_size=(5, 5), stride=(1, 1))
  (conv3_bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=34, out_features=256, bias=True)
  (linear2): Linear(in_features=81, out_features=306, bias=True)
)

In [0]:
#training semisupervised roadgraph

optimizer = torch.optim.SGD(semisupervised_roadgraph.parameters(),lr=.01, momentum=0.9)

frozen_indicator = True
for epoch in range(1,7):
    if epoch > 2:
        frozen_indicator = False
        unfreeze_model(feature_extractor)
    for batch_idx,(sample, target, road_image, extra) in enumerate(trainloader):
        semisupervised_roadgraph.train()
        if frozen_indicator:
            feature_extractor.eval()
        else:
            feature_extractor.train()

        label = road_image[0].unsqueeze(0)
        label = label.to(device)
        
        model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
        model_input = model_input.to(device)

        optimizer.zero_grad()

        model_input = feature_extractor(model_input).unsqueeze(0)

        preds = semisupervised_roadgraph(model_input)
        
        loss = criterion_entropy(preds, label.long())

        loss = loss.type(torch.DoubleTensor)

        loss.backward()
        
        optimizer.step()

        
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))

    torch.save(semisupervised_roadgraph.state_dict(), 'semisupervised_roadgraph_final')
    torch.save(feature_extractor.state_dict(), 'roadgraph_feature_extractor_final')
    print('avg. validation loss:',float(test_semisupervised_model(semisupervised_roadgraph, feature_extractor, is_bbox=False)))

avg. validation loss: 0.7018569111824036


KeyboardInterrupt: ignored

In [0]:
#training semi-supervised bboxes

optimizer = torch.optim.SGD(semisupervised_bbox.parameters(),lr=.01, momentum=0.9)

frozen_indicator = True
for epoch in range(1,7):
    if epoch > 2:
        frozen_indicator = False
        unfreeze_model(feature_extractor)
    for batch_idx,(sample, target, road_image, extra) in enumerate(trainloader):
        semisupervised_bbox.train()
        if frozen_indicator:
            feature_extractor.eval()
        else:
            feature_extractor.train()

        label = bbox_to_label_bionary(target).unsqueeze(0)
        label = label.to(device)
        
        model_input = torch.mean(sample[0], axis=0).unsqueeze(0)
        model_input = model_input.to(device)

        optimizer.zero_grad()

        model_input = feature_extractor(model_input).unsqueeze(0)

        preds = semisupervised_bbox(model_input)
        
        loss = criterion_entropy(preds, label.long())

        loss = loss.type(torch.DoubleTensor)

        loss.backward()
        
        optimizer.step()

        
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))

    torch.save(semisupervised_bbox.state_dict(), 'semisupervised_bbox_final')
    torch.save(feature_extractor.state_dict(), 'bbox_feature_extractor_final')
    print('avg. validation loss:',float(test_semisupervised_model(semisupervised_bbox, feature_extractor, is_bbox=True)))