<a href="https://colab.research.google.com/github/hardyjeremy98/FloodNet/blob/main/FloodNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

---

In [None]:
# Import PyTorch
import torch
import torch.nn as nn

# Import torchvision
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

# Import matplotlib for visualization
import matplotlib.pyplot as plt
import numpy as np

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
input_channels = 3
num_classes = 10

---
# Getting data

In [None]:
# !cp -r /content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data /content/local_dataset-floodnet

In [None]:
# data_folder = r'/content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data'
# train_folder = r'/content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data/train'

# train_image_folder = r'/content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data/train/train-org-img'
# train_mask_folder = r'/content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data/train/train-label-img'

In [None]:
data_folder = r'/content/local_dataset-floodnet'
train_folder = r'/content/local_dataset-floodnet/train'

train_image_folder = r'/content/local_dataset-floodnet/train/train-org-img'
train_mask_folder = r'/content/local_dataset-floodnet/train/train-label-img'

In [None]:
import os

def walk_through_dir(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} files/images in {dirpath}")

In [None]:
walk_through_dir(data_folder)

In [None]:
# prompt: Write code for a function that will plot a random image from the folder given

import torchvision.io as tv
import matplotlib.pyplot as plt
from matplotlib.pyplot import imread

import os
import random

def plot_random_image(im_folder, label_folder):
  """
  This function plots a random image from the given folder.

  Args:
    folder: The folder path containing the images.
  """

  # Get a list of all files in the folder
  files = os.listdir(im_folder)

  # Choose a random image from the list
  random_image_index = random.randint(0, len(files) - 1)
  random_image_name = files[random_image_index][:-4]
  random_image_path = os.path.join(im_folder, random_image_name + '.jpg')

  random_label_path = os.path.join(label_folder, random_image_name + '_lab.png')

  # Load the image using matplotlib
  img = plt.imread(random_image_path)
  label = plt.imread(random_label_path)

  # OR
  # Load the image using pytorch
  # img = tv.read_image(random_image_path)
  # label = tv.read_image(random_label_path)
  # img = img.permute(1, 2, 0)
  # label = label.permute(1, 2, 0)

  # Display the image
  plt.imshow(img)
  plt.axis('off')
  plt.show()

  plt.imshow(label)
  plt.axis('off')
  plt.show()

In [None]:
plot_random_image(train_image_folder, train_mask_folder)

---
# Creating data loaders

In [None]:
from torchvision.transforms import v2

data_transformer = v2.Compose([
    v2.Resize((128, 128)),
    # v2.RandomHorizontalFlip(p=0.5),
    # v2.RandomRotation(30),
    v2.ToTensor()
])

In [None]:
from torch.utils.data.dataset import Dataset
from PIL import Image

class floodDataset(Dataset):
  def __init__(self, img_path, mask_path, transform = None):
    self.image_paths = sorted([os.path.join(img_path, path) for path in os.listdir(img_path)])#[:160]
    self.mask_paths = sorted([os.path.join(mask_path, path) for path in os.listdir(mask_path)])#[:160]

    self.transform = transform

  def load_image_pair(self, index):
    image = self.image_paths[index]
    mask = self.mask_paths[index]
    return Image.open(image), Image.open(mask)

  def __getitem__(self, index):
    img, mask = self.load_image_pair(index)

    if self.transform:
      img = self.transform(img)
      mask = self.transform(mask)
      mask = torch.max(mask, dim=0)[0]
      mask *= 255
      return img, mask.long()

  def __len__(self):
    return len(self.image_paths)

In [None]:
train_dataset = floodDataset(train_image_folder, train_mask_folder, data_transformer)

In [None]:
print(len(train_dataset))

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(dataset=train_dataset, # use custom created train Dataset
                                     batch_size=4, # how many samples per batch?
                                     num_workers=0, # how many subprocesses to use for data loading? (higher = more)
                                     prefetch_factor=None, # how many batches per epoch?
                                     shuffle=True) # shuffle the data?

# test_dataloader_custom = DataLoader(dataset=test_dataset, # use custom created test Dataset
#                                     batch_size=1,
#                                     num_workers=0,
#                                     shuffle=False) # don't usually need to shuffle testing data

train_dataloader

In [None]:
for img_batch, mask_batch in train_dataloader:
  img = img_batch[0,:,:,:]
  mask = mask_batch[0,:,:]

  print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
  print(f"Image shape: {mask.shape} -> [batch_size, color_channels, height, width]")

  print(mask)

  break

In [None]:
import matplotlib.pyplot as plt

# Display the image
plt.imshow(img.permute(1, 2, 0))  # Transpose the image to display it correctly
plt.axis('off')
plt.show()

# Display the mask
plt.imshow(mask)  # Display the mask in grayscale
plt.axis('off')
plt.show()

---
# Class weights

In [None]:
from collections import Counter

def compute_class_weights(dataset, num_classes):
    # Initialize a counter for each class
    class_counts = Counter()

    # Iterate through the dataset and count each class
    for _, mask in dataset:
        class_counts.update(mask.flatten().tolist())

    # Create a numpy array of class counts
    class_counts = np.array([class_counts[i] for i in range(num_classes)])

    # Compute weights as the inverse of class frequency
    class_weights = 1. / class_counts
    class_weights /= class_weights.sum()  # Normalize to sum to 1

    return torch.tensor(class_weights, dtype=torch.float32).to(device)

# num_classes = 10  # Number of classes in your dataset
# class_weights = compute_class_weights(train_dataset, num_classes)

---
# Create UNet

In [None]:
class DoubleConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.double_conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU()
    )

  def forward(self, x):
    return self.double_conv(x)

In [None]:
class Down(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.double_conv = DoubleConv(in_channels, out_channels)
    self.down = nn.MaxPool2d(2)

  def forward(self, x):
    skip_con = self.double_conv(x)
    down = self.down(skip_con)
    return (down, skip_con)

In [None]:
class Up(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
    self.double_conv = DoubleConv(in_channels, out_channels)

  def forward(self, x, skip_con):
    x = self.up(x)
    x = torch.cat([x, skip_con], dim=1)
    x = self.double_conv(x)
    return x

In [None]:
class UNet(nn.Module):
  def __init__(self, in_channels, num_classes):
    super().__init__()
    self.down1 = Down(in_channels, 64)
    self.down2 = Down(64, 128)
    self.down3 = Down(128, 256)
    self.down4 = Down(256, 512)

    self.bottleneck = DoubleConv(512, 1024)

    self.up1 = Up(1024, 512)
    self.up2 = Up(512, 256)
    self.up3 = Up(256, 128)
    self.up4 = Up(128, 64)

    self.out = nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size=1)

  def forward(self, x):
    down_1, skip_con1 = self.down1(x)
    down_2, skip_con2 = self.down2(down_1)
    down_3, skip_con3 = self.down3(down_2)
    down_4, skip_con4 = self.down4(down_3)

    b = self.bottleneck(down_4)

    up_1 = self.up1(b, skip_con4)
    up_2 = self.up2(up_1, skip_con3)
    up_3 = self.up3(up_2, skip_con2)
    up_4 = self.up4(up_3, skip_con1)

    out = self.out(up_4)

    return out

---
# Dummy run UNET and get summary

In [None]:
model = UNet(input_channels, num_classes).to(device)

In [None]:
img_batch, mask_batch = next(iter(train_dataloader))

img_single, mask_single = img_batch[0].unsqueeze(dim=0).to(device), mask_batch[0].unsqueeze(dim=0).to(device)
print(f"Single image shape: {img_single.shape}\n")
print(f"Single mask shape: {mask_single.shape}\n")

In [None]:
output = model(img_single)
print(output)

In [None]:
try:
    import torchinfo
except:
    !pip install torchinfo
    import torchinfo

from torchinfo import summary
summary(model, input_size=[1,3,128,128])

---
# Train UNet

In [None]:
from tqdm.auto import tqdm
from torch.nn import functional as F

def train_step(model, dataloader, loss_fn, optimizer):
  # Put model in training mode
  model.train()

  # Initiate train loss/accuracy
  train_loss, correct_pixels, total_pixels = 0, 0, 0

  for n_batch, img_mask in enumerate(tqdm(dataloader)):

    img = img_mask[0].to(device)
    mask = img_mask[1].to(device)

    # Forward pass
    mask_pred = model(img)

    # Calculate and accumulate loss
    loss = loss_fn(mask_pred, mask)
    train_loss += loss.item()

    # Reset optimizer to zero gradient
    optimizer.zero_grad()

    # Back prop
    loss.backward()

    # Optimizer step (change params)
    optimizer.step()

    mask_pred_prob = F.softmax(mask_pred, dim=1)
    _, predicted = torch.max(mask_pred_prob, 1)

    correct_pixels += (predicted == mask).sum().item()
    total_pixels += mask.numel()

  train_loss = train_loss / len(dataloader)
  train_acc = correct_pixels / total_pixels

  return train_loss, train_acc

In [None]:
from timeit import default_timer as timer

def train(model, dataloader, loss_fn, optimizer, epochs=5):
  start_time = timer()

  results = {"train_loss": [],
             "train_acc": []}

  for epoch in range(epochs):
    train_loss, train_acc = train_step(model, dataloader, loss_fn, optimizer)

    print(
        f"Epoch: {epoch+1} | train_loss: {train_loss:.4f} | train_acc: {train_acc:.4f}"
    )

    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)

  end_time = timer()

  print(f"Total training time: {(end_time - start_time):.3f} seconds")
  return results

In [161]:
model_1 = UNet(input_channels, num_classes).to(device)

loss_fn = nn.CrossEntropyLoss(weight=None)
optimizer = torch.optim.Adam(params=model_1.parameters(), lr=0.001)

with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], profile_memory=True) as prof:
    # Run the training loop
    train(model_1, train_dataloader, loss_fn, optimizer, epochs=10)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

  0%|          | 0/362 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.5173 | train_acc: 0.5287


  0%|          | 0/362 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 1.1510 | train_acc: 0.6041


  0%|          | 0/362 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 1.1008 | train_acc: 0.6222


  0%|          | 0/362 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7e3f99d870>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7e3f99d870>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 4 | train_loss: 1.0613 | train_acc: 0.6301


  0%|          | 0/362 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7e3f99d870>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7e3f99d870>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 5 | train_loss: 1.0327 | train_acc: 0.6407


  0%|          | 0/362 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
for img_batch, mask_batch in train_dataloader:
  img = img_batch[0,:,:,:]
  mask = mask_batch[0,:,:]

  mask_pred = model_1(img.unsqueeze(dim=0).to(device))
  mask_pred_prob = F.softmax(mask_pred, dim=1)
  _, predicted = torch.max(mask_pred_prob, 1)
  predicted = predicted.squeeze().cpu().numpy()

  print(predicted.shape)
  print(mask.shape)
  print(img.shape)

  plt.imshow(img.permute(1, 2, 0))
  plt.axis('off')
  plt.show()

  plt.imshow(mask)
  plt.axis('off')
  plt.show()

  plt.imshow(predicted)
  plt.axis('off')
  plt.show()

  break

In [None]:
# import os
# import shutil

# # Path to the shared folder (add the shared folder to your drive first)
# shared_drive_path = '/content/gdrive/MyDrive/FloodNet-Supervised_v1.0'

# # Path to your own Google Drive
# my_drive_path = '/content/gdrive/MyDrive/Colab_Notebooks/FloodNet/FloodNet_Data'

# # Create the destination directory if it doesn't exist
# os.makedirs(my_drive_path, exist_ok=True)

# # Function to copy files and directories
# def copy_files_and_directories(src, dst):
#     if os.path.isdir(src):
#         # If the item is a directory, copy it recursively
#         shutil.copytree(src, dst, dirs_exist_ok=True)
#         print(f"Directory copied: {src} to {dst}")
#     else:
#         # If the item is a file, copy it
#         shutil.copy(src, dst)
#         print(f"File copied: {src} to {dst}")

# # Copy files and directories from shared drive to your own drive
# for item in os.listdir(shared_drive_path):
#     src_path = os.path.join(shared_drive_path, item)
#     dst_path = os.path.join(my_drive_path, item)

#     # Call the function to copy files and directories
#     copy_files_and_directories(src_path, dst_path)

# print("Files and directories copied successfully.")