# Colab formalities

## Connect to gDrive

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


## Download PyTorch

In [0]:
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision tensorboardX

tcmalloc: large alloc 1073750016 bytes == 0x5825a000 @  0x7f8af7d972a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


## Change to ConvCRF directory

In [0]:
import os
os.chdir('/content/drive/My Drive/colab/ConvCRF')

## In case of PIL error

In [0]:
# ! pip uninstall -y pillow
# ! pip install pillow==5.3.0
import PIL
PIL.__version__

'5.3.0'

## Tensorboard

In [0]:
os.chdir('/content/drive/My Drive/colab/ConvCRF/logs')
LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6606 &'
    .format(LOG_DIR)
)
os.chdir('/content/drive/My Drive/colab/ConvCRF')

In [0]:
! curl http://localhost:6606

In [0]:
# Install
! npm install -g localtunnel

[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/client
[K[?25h+ localtunnel@1.9.1
added 54 packages from 32 contributors in 2.95s


In [0]:
# Tunnel port 6006 (TensorBoard assumed running)
get_ipython().system_raw('lt --port 6606 >> /content/drive/My\ Drive/url.txt 2>&1 &')

In [0]:
# Get url
! cat /content/drive/My\ Drive/url.txt

## Check GPU Utilization

In [0]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

# This notebook contains the code for training the CRF

In [0]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [0]:
! pip install dotmap

In [0]:
import os
import sys
import timeit
import time
import warnings
import logging as logger
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from dotmap import DotMap
from tensorboardX import SummaryWriter

import torch
import torch.nn as nn

from convcrf.convcrf import GaussCRF, default_conf
from utils.synthetic import augment_label
from utils.metrics import Metrics, Averages
from demo import do_crf_inference

logger.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                    level=logger.INFO,
                    stream=sys.stdout)

warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

logger.info('Device is {}'.format(device))

2019-01-10 08:28:06,358 INFO Device is cuda


## Load Data

In [0]:
from torch.utils.data import DataLoader, WeightedRandomSampler
from utils.pascal_loader import PascalDatasetLoader

path = '/content/drive/My Drive/Research/Datasets/VOCdevkit/VOC2012'
traincrf_dataset = PascalDatasetLoader(path, split='train', sample_size=200)
val_dataset = PascalDatasetLoader(path, split='val')

num_classes = 21
print(default_conf)
traincrf_loader = DataLoader(traincrf_dataset, num_workers=8, shuffle=True)
val_loader = DataLoader(val_dataset, num_workers=8)

print(len(val_loader), len(traincrf_loader))

{'filter_size': 7, 'blur': 1, 'merge': True, 'norm': 'none', 'weight': 'vector', 'unary_weight': 1, 'weight_init': 0.2, 'trainable': True, 'convcomp': False, 'logsoftmax': True, 'softmax': True, 'final_softmax': False, 'pos_feats': {'sdims': 3, 'compat': 3}, 'col_feats': {'sdims': 80, 'schan': 13, 'compat': 10, 'use_bias': False}, 'trainable_bias': False, 'pyinn': False}
1449 200


## Load stored model parameters

In [0]:
save_state = None
save_path = os.path.join("/content/drive/My Drive/", "best_crf_model.pth")

if os.path.isfile(save_path):
  saved_state = torch.load(save_path)

## Define the model

In [0]:
config = default_conf
model = GaussCRF(conf=config, shape=(500, 500), nclasses=num_classes)

if save_state:
  model.load_state_dict(saved_state['model_state'])
  
model = model.to(device)

## Define the loss function and optimizer

In [0]:
import torch.optim as optim

criterion= nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

## Initialize training 

In [0]:
train_loss_avg = Averages()
train_running_metrics = Metrics(num_classes)

val_loss_avg = Averages()
val_running_metrics = Metrics(num_classes)

best_iou = -1

if save_state:
  best_iou = saved_state['best_iou']

logger.info('Starting from iou: {}'.format(best_iou))

num_epochs = 5
# lowest_loss = 0.4491

writer = SummaryWriter(log_dir='./log')

2019-01-10 08:29:31,731 INFO Starting from iou: -1


## Define method for to run augment_labels on a batch

In [0]:
def batch_augment_label(labels):
    array = []
    for label in labels:
#         print(label, label.shape)
        unary = augment_label(label, num_classes=num_classes).transpose(2, 0, 1)
        array.append(unary)
    return np.array(array)

## Define the learning rate decay

In [0]:
# lambda_lr_decay = lambda epoch: ((1 - (epoch / num_epochs)) ** 0.9) ** 2
# scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_lr_decay)

## Train the network

In [0]:
for epoch in range(num_epochs): 
    running_loss = 0.0
    actual_epoch = epoch + 1
#     scheduler.step()

    for i, (images, labels) in tqdm(enumerate(traincrf_loader)):
        iteration = i + 1

        start_ts = time.time()

        images = images.to(device)

        optimizer.zero_grad()

#         labels = labels[0]
#         unary = augment_label(labels, num_classes=num_classes)
        unary = batch_augment_label(labels)
#         unary = unary.transpose(2, 0, 1).reshape([1, num_classes, unary.shape[0], unary.shape[1]])

        unary = torch.from_numpy(unary).float().to(device)
        labels = labels.to(device)
        
        predictions = model(unary=unary, img=images)
        
        outputs = predictions.transpose(1,2).transpose(2,3).contiguous().view(-1, 21)
        labels_temp = labels.view(-1)

        loss = criterion(outputs, labels_temp)
        loss.backward()
        optimizer.step()

        train_preds_np = predictions.data.max(1)[1].detach().cpu().numpy()
        
        train_labels_np = labels.data.detach().cpu().numpy()
        train_running_metrics.update(train_labels_np, train_preds_np)
        
        running_loss += loss.item()
        train_loss_avg.update(loss.item())
        
        writer.add_scalar('running_loss/train_running_loss', loss.item(), i+1)
        
    train_score, _ = train_running_metrics.get_scores()
    
    print('Epoch: {} Train loss: {:.4f}'.format(actual_epoch, train_loss_avg.avg))
    print('Epoch: {} Train Summary'.format(actual_epoch))
    
    for tk, tv in train_score.items():
        print(tk, '{:.4f}'.format(tv))
    
    writer.add_scalar('mIou/train_mIoU', train_score["Mean IoU : \t"], actual_epoch)
    
    train_running_metrics.reset()
    train_loss_avg.reset()
            
    
    with torch.no_grad():
        model.eval()
        val_len = len(val_loader)
        running_val_loss = 0.0
        
        for i_val, (images_val, labels_val) in tqdm(enumerate(val_loader)):
            iter_val = i_val + 1
#             labels_val = labels_val[0] # remove batch dimension
#             unary = augment_label(labels_val, num_classes=num_classes)
            unary = batch_augment_label(labels_val)

#             unary = unary.transpose(2, 0, 1).reshape([1, num_classes, unary.shape[0], unary.shape[1]])
            unary = torch.from_numpy(unary).float().to(device)

            images_val = images_val.to(device)
            labels_val = labels_val.to(device)

            predictions = model(unary=unary, img=images_val)
            pred = predictions.transpose(1,2).transpose(2,3).contiguous().view(-1, 21)

            labels = labels_val.view(-1)
            val_loss = criterion(pred, labels)

            preds_np = predictions.data.max(1)[1].cpu().numpy()
            labels_np = labels_val.data.cpu().numpy()

            val_running_metrics.update(labels_np, preds_np)
            val_loss_avg.update(val_loss.item())
            
            running_val_loss += val_loss.item()
        
            writer.add_scalar('running_loss/val_running_loss', loss.item(), i_val+1)

    val_score, _ = val_running_metrics.get_scores()
    
    print('Epoch: {} Val loss: {:.4f}'.format(actual_epoch, val_loss_avg.avg))
    print('Epoch: {} Val Summary'.format(actual_epoch))
    
    for vk, vv in val_score.items():
        print(vk, '{:.4f}'.format(vv))
    
    writer.add_scalar('mIou/val_mIoU', val_score["Mean IoU : \t"], actual_epoch)
    
    val_running_metrics.reset()
    val_loss_avg.reset()
    
    if val_score["Mean IoU : \t"] >= best_iou:
        best_iou = val_score["Mean IoU : \t"]
        logger.info('Found new best_iou: {}'.format(best_iou))
        state = {
                "epoch": actual_epoch,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "best_iou": best_iou,
                }
        logger.info(save_path)
        torch.save(state, save_path)

200it [04:53,  1.45s/it]

Epoch: 1 Train loss: 0.9877
Epoch: 1 Train Summary
Overall Acc: 	 0.9646
Mean Acc : 	 0.9167
FreqW Acc : 	 0.9351
Mean IoU : 	 0.7839



1449it [22:07,  1.10it/s]

Epoch: 1 Val loss: 0.8305
Epoch: 1 Val Summary
Overall Acc: 	 0.9701
Mean Acc : 	 0.9074
FreqW Acc : 	 0.9434
Mean IoU : 	 0.8402
2019-01-10 08:56:49,200 INFO Found new best_iou: 0.840171133612765
2019-01-10 08:56:49,205 INFO /content/drive/My Drive/best_crf_model.pth



200it [04:55,  1.46s/it]

Epoch: 2 Train loss: 0.7590
Epoch: 2 Train Summary
Overall Acc: 	 0.9723
Mean Acc : 	 0.8952
FreqW Acc : 	 0.9472
Mean IoU : 	 0.8270



1449it [22:18,  1.11it/s]

Epoch: 2 Val loss: 0.7781
Epoch: 2 Val Summary
Overall Acc: 	 0.9709
Mean Acc : 	 0.8973
FreqW Acc : 	 0.9444
Mean IoU : 	 0.8448
2019-01-10 09:24:03,529 INFO Found new best_iou: 0.8448403048737958
2019-01-10 09:24:03,538 INFO /content/drive/My Drive/best_crf_model.pth



200it [04:55,  1.46s/it]

Epoch: 3 Train loss: 0.7191
Epoch: 3 Train Summary
Overall Acc: 	 0.9724
Mean Acc : 	 0.8873
FreqW Acc : 	 0.9472
Mean IoU : 	 0.8297



1449it [22:16,  1.07it/s]

Epoch: 3 Val loss: 0.7397
Epoch: 3 Val Summary
Overall Acc: 	 0.9706
Mean Acc : 	 0.8950
FreqW Acc : 	 0.9438
Mean IoU : 	 0.8439



200it [04:56,  1.47s/it]

Epoch: 4 Train loss: 0.6958
Epoch: 4 Train Summary
Overall Acc: 	 0.9719
Mean Acc : 	 0.8835
FreqW Acc : 	 0.9462
Mean IoU : 	 0.8304



1449it [22:22,  1.10it/s]

Epoch: 4 Val loss: 0.7103
Epoch: 4 Val Summary
Overall Acc: 	 0.9700
Mean Acc : 	 0.8937
FreqW Acc : 	 0.9427
Mean IoU : 	 0.8408



200it [04:56,  1.47s/it]

Epoch: 5 Train loss: 0.6578
Epoch: 5 Train Summary
Overall Acc: 	 0.9719
Mean Acc : 	 0.8841
FreqW Acc : 	 0.9462
Mean IoU : 	 0.8311



1449it [22:29,  1.09it/s]

Epoch: 5 Val loss: 0.6876
Epoch: 5 Val Summary
Overall Acc: 	 0.9697
Mean Acc : 	 0.8897
FreqW Acc : 	 0.9420
Mean IoU : 	 0.8393



200it [04:57,  1.48s/it]

Epoch: 6 Train loss: 0.6357
Epoch: 6 Train Summary
Overall Acc: 	 0.9714
Mean Acc : 	 0.8824
FreqW Acc : 	 0.9453
Mean IoU : 	 0.8287



1449it [22:44,  1.04it/s]

Epoch: 6 Val loss: 0.6648
Epoch: 6 Val Summary
Overall Acc: 	 0.9693
Mean Acc : 	 0.8893
FreqW Acc : 	 0.9414
Mean IoU : 	 0.8374



200it [04:57,  1.46s/it]

Epoch: 7 Train loss: 0.6253
Epoch: 7 Train Summary
Overall Acc: 	 0.9709
Mean Acc : 	 0.8791
FreqW Acc : 	 0.9443
Mean IoU : 	 0.8255



1449it [22:33,  1.08it/s]

Epoch: 7 Val loss: 0.6488
Epoch: 7 Val Summary
Overall Acc: 	 0.9692
Mean Acc : 	 0.8838
FreqW Acc : 	 0.9409
Mean IoU : 	 0.8376



200it [04:58,  1.47s/it]

Epoch: 8 Train loss: 0.5980
Epoch: 8 Train Summary
Overall Acc: 	 0.9709
Mean Acc : 	 0.8783
FreqW Acc : 	 0.9443
Mean IoU : 	 0.8259



1449it [22:34,  1.09it/s]

Epoch: 8 Val loss: 0.6305
Epoch: 8 Val Summary
Overall Acc: 	 0.9686
Mean Acc : 	 0.8842
FreqW Acc : 	 0.9400
Mean IoU : 	 0.8344



200it [04:58,  1.47s/it]

Epoch: 9 Train loss: 0.5789
Epoch: 9 Train Summary
Overall Acc: 	 0.9708
Mean Acc : 	 0.8757
FreqW Acc : 	 0.9441
Mean IoU : 	 0.8243



1449it [22:33,  1.09it/s]

Epoch: 9 Val loss: 0.6145
Epoch: 9 Val Summary
Overall Acc: 	 0.9683
Mean Acc : 	 0.8831
FreqW Acc : 	 0.9395
Mean IoU : 	 0.8328



200it [04:57,  1.48s/it]

Epoch: 10 Train loss: 0.5697
Epoch: 10 Train Summary
Overall Acc: 	 0.9702
Mean Acc : 	 0.8753
FreqW Acc : 	 0.9429
Mean IoU : 	 0.8227



1449it [22:41,  1.07it/s]

Epoch: 10 Val loss: 0.5977
Epoch: 10 Val Summary
Overall Acc: 	 0.9682
Mean Acc : 	 0.8822
FreqW Acc : 	 0.9392
Mean IoU : 	 0.8321



