# VGG16_LargeFOV

## 1. Environment Setup

### 1.0. Check GPU


In [1]:
from data import LungImageDataset, colorDict
from torch.utils.data import DataLoader
import augmentation
import model
import train

import torch
import torchvision

# Ignore Warning
import warnings

warnings.filterwarnings(action="ignore")

test


2024-01-17 00:11:38.174744: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-17 00:11:38.240299: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-17 00:11:38.655598: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-17 00:11:38.655663: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-17 00:11:38.709667: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
use_colab = False
use_gpu = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
if use_colab:
    ! git clone https://github.com/chendonghp/deeplab-v1.git
    !pip install -U cython
    !pip install git+https://github.com/lucasb-eyer/pydensecrf.git

    from google.colab import drive
    drive.mount('/content/drive')

    %cd deeplab-v1/Implementation/
    ! unzip /content/drive/MyDrive/data.zip -d ./dataset
    use_gpu = True

In [4]:
! ls

'DeepLab_v1 - SBD, VOC 2012.ipynb'    __init__.py       best.pt      model.py
 Inference.ipynb		      __pycache__       data.py      train.py
 VGG16_LargeFOV_SBD,_VOC_2012.ipynb   augmentation.py   experiment   utils.py


In [5]:
if use_colab:
    data_root = r"./dataset"

    # dataset
    train_batch_size = 130
    test_batch_size = 60
    train_ratio = 0.9
    size = 12000  # select num of images to put in dataset
    train_size = int(size * train_ratio)
    train_range, val_range = (0, train_size), (train_size, size)

    # model parameters
    num_classes = len(colorDict.keys())
    ignore_index = 255

    # train hyperparameters
    epochs = 100
    lr = 0.001
    momentum = 0.9
    weight_decay = 0.0005
    init_weights = True

    # log and save
    print_freq = 5
    epoch_print = 1
    path = "/content/drive/MyDrive/experiment/vgg16_largefov"
    load_path = path
    save_path = path
    log_path = path
else:
    data_root = r"/mnt/d/data/"
    # dataset
    train_batch_size = 10
    test_batch_size = 10
    train_ratio = 0.9
    size = 120  # select num of images to put in dataset
    train_size = int(size * train_ratio)
    train_range, val_range = (0, train_size), (train_size, size)

    # model parameters
    num_classes = len(colorDict.keys())
    ignore_index = 255

    # train hyperparameters
    epochs = 20
    lr = 0.001
    momentum = 0.9
    weight_decay = 0.0005
    init_weights = True

    # log and save
    print_freq = 5
    epoch_print = 1
    path = "experiment/vgg16_largefov"
    load_path = path
    save_path = path
    log_path = path

import os

if not os.path.exists(path):
    os.makedirs(path)

## 2. Data Preprocessing


In [6]:
train_tf = augmentation.Mask_Aug(
    transforms=[
        augmentation.ToTensor(),
        augmentation.PILToTensor(),  # HWC to CHW
        # augmentation.Resize((256, 256)),
        augmentation.RandomCrop((224, 224)),
        augmentation.RandomHorizontalFlip(),
        # augmentation.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

val_tf = augmentation.Mask_Aug(
    transforms=[
        augmentation.ToTensor(),
        augmentation.PILToTensor(),
        # augmentation.Resize((256, 256)),
        # augmentation.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

train_dataset = LungImageDataset(data_root, size=train_range, transform=train_tf)
val_dataset = LungImageDataset(data_root, size=val_range, transform=val_tf)


train_loader = DataLoader(
    train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=test_batch_size,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
)


# train_dataset = torchvision.datasets.SBDataset(root='./', image_set='train_noval', mode='segmentation', download=False, transforms=train_tf)
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4, pin_memory=True)

# val_dataset = torchvision.datasets.VOCSegmentation(root='./', year='2012', image_set='val', download=False, transforms=val_tf)
# val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [7]:
train_features, train_labels = next(iter(train_loader))

In [8]:
train_labels.shape, train_features.shape

(torch.Size([10, 1, 224, 224]), torch.Size([10, 3, 224, 224]))

## 3. Train Model


In [9]:
vgg16_largefov = train.VGG16_LargeFOV(
    num_classes=num_classes,
    init_weights=init_weights,
    ignore_index=ignore_index,
    use_gpu=use_gpu,
    device=device,
    print_freq=print_freq,
    epoch_print=epoch_print,
)

# VGG16_LargeFOV = train.VGG16_LargeFOV(num_classes=num_classes, init_weights=init_weights, ignore_index=ignore_index,
#                                       gpu_id=gpu_id, print_freq=print_freq, epoch_print=epoch_print)

In [12]:
if use_colab:
    %load_ext tensorboard
    %tensorboard --logdir  {path}/runs
else:
    %load_ext tensorboard
    %tensorboard --logdir  {path}/runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 6239), started 0:03:48 ago. (Use '!kill 6239' to kill it.)

In [11]:
vgg16_largefov.train(
    train_loader,
    val_loader,
    load_path=None,
    save_path=save_path,
    log_path=log_path,
    epochs=epochs,
    lr=lr,
    momentum=momentum,
    weight_decay=weight_decay,
)
# VGG16_LargeFOV.train(train_loader, val_loader, save=save, epochs=epochs, lr=lr, momentum=momentum, weight_decay=weight_decay)

Epoch 0 Started...
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.

 *********************************** Best mIoU Updated ***********************************
Iteration : 0 - Train Loss : 1.826583, Test Loss : 1.802277, Test mIoU : 0.8244, Test mpa : 0.8706
Saved Model at experiment/vgg16_largefov/vgg16_large_fov_best.pt.

Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.

 *********************************** Best mIoU Updated ***********************************
Iteration : 1 - Train Loss : 1.813102, Test Loss : 1.798792, Test mIoU : 1.2279, Test mpa : 1.3119
Saved Model at experiment/vgg16_largefov/vgg16_large_fov_best.pt.

Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.

 *********************************** Best mIoU Updated ***********************************
Iteration : 2 - Train Loss : 1.804427, Test Loss : 1.793897, Test mIoU : 2.0515, Test mpa : 2.2382
Saved Model at experiment/vgg16_large

Bad pipe message: %s [b'nnection: keep-alive\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mo', b'lla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/0.22.1 Chrome/114.0.5735.289 Electron', b'5.9.7 Safari/537.36\r\nAccept: text/html,applica']


Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Iteration : 4 - Train Loss : 1.729167, Test Loss : 1.704724, Test mIoU : 7.7139, Test mpa : 14.0193
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Iteration : 9 - Train Loss : 1.681343, Test Loss : 1.622072, Test mIoU : 7.8371, Test mpa : 15.1909
Training log saved to experiment/vgg16_largefov/vgg_largefov_training_log.csv.
Saved Model at experiment/vgg16_largefov/vgg16_large_fov_latest.pt.
Epoch 2 Started...
Tr

KeyboardInterrupt: 