In [None]:
import numpy as np 
import pandas as pd
import os
import PIL
import glob 
import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.optim as optim

import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

import time
from tqdm.notebook import tqdm
import torchvision.transforms as transforms

from torchsummary import summary

from google.colab import drive

drive = drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import data
train_images = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/train'
train_masks = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/Masks'
test_images = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/test'

In [None]:
train_files = os.listdir(train_images)
train_files = [x.split('.')[0] for x in train_files if x[0].isnumeric()]
train_files = np.array(train_files)
#train_files = np.random.choice(train_files, size=1000, replace=False)
# Split data
X_train, X_val = train_test_split(train_files, test_size=0.2)

In [None]:
print(len(X_train),len(X_val))

239 60


In [None]:
dim = 2048
#img_list = []

#for im_file in train_files:
#    img = cv2.imread(f'{train_images}/{im_file}.tif')
#    img = cv2.resize(img,(dim,dim))
#    img = img / 255.0

#    img_list.append(img)

#img_list = np.stack(img_list)
#print(img_list.shape)

#means = np.mean(img_list,axis=(0,1,2))
#stdevs = np.std(img_list,axis=(0,1,2))

#print(means,stdevs)

In [None]:
class UAVDataset(Dataset):
    
    def __init__(self, img_path, mask_path, X, mean, std, aug=None):
        self.img_path = img_path
        self.mask_path = mask_path
        self.X = X
        self.aug = aug
        self.mean = mean
        self.std = std
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        img = cv2.imread(f'{self.img_path}/{self.X[idx]}.tif')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  
        mask = cv2.imread(f'{self.mask_path}/{self.X[idx]}.png', cv2.IMREAD_GRAYSCALE)
        if self.aug is not None:
            aug = self.aug(image=img, mask=mask)
            img = Image.fromarray(aug['image'])
            mask = aug['mask']
        
        if self.aug is None:
            img = Image.fromarray(img)
        
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(self.mean, self.std)])
        img = transform(img)

        mask = torch.from_numpy(mask).long()
            
        return img, mask

In [None]:
means = [0.38980951, 0.46497215, 0.43926388]
stdevs = [0.21997646, 0.19580692, 0.21212297]

transform_train = A.Compose([A.Resize(dim, dim, interpolation=cv2.INTER_NEAREST)])                              

transform_val = A.Compose([A.Resize(dim, dim, interpolation=cv2.INTER_NEAREST)])

In [None]:
#datasets
train_set = UAVDataset(train_images, train_masks, train_files, means, stdevs, transform_train)
val_set = UAVDataset(train_images, train_masks, X_val, means, stdevs, transform_val)

#dataloader
batch_size = 4

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers = 1, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)

## Model

In [None]:
# Define double convolution structure
def double_conv(in_c, out_c):
    conv = nn.Sequential(
        nn.Conv2d(in_c,out_c,3,1,padding='same'),
        # Add 2 extra batch normalization layers
        nn.BatchNorm2d(out_c),
        nn.ReLU(),
        nn.Conv2d(out_c,out_c,3,1,padding='same'),
        nn.BatchNorm2d(out_c),
        nn.ReLU()
    )
    return conv

class Generator(nn.Module):
    
    def __init__(self):
        super(Generator, self).__init__()
        
        #Downsampling 
        
        self.downlayer1 = double_conv(3, 16)
        self.maxpool = nn.MaxPool2d(2)
        
        #Add dropout layer
        self.dropout = nn.Dropout(p=0.1)
        
        self.downlayer2 = double_conv(16, 32)
        
        self.downlayer3 = double_conv(32, 64)
        
        self.downlayer4 = double_conv(64, 128)
        
        self.downlayer5 = double_conv(128, 256)
        
        
        #UpSampling
        self.up_trans1 = nn.ConvTranspose2d(256,128,2,2)
        self.up_conv1 = double_conv(256, 128)
        
        self.up_trans2 = nn.ConvTranspose2d(128,64,2,2)
        self.up_conv2 = double_conv(128, 64)
        
        self.up_trans3 = nn.ConvTranspose2d(64,32,2,2)
        self.up_conv3 = double_conv(64, 32)
        
        self.up_trans4 = nn.ConvTranspose2d(32,16,2,2)
        self.up_conv4 = double_conv(32, 16)
        
        self.output = nn.Conv2d(16,27,kernel_size = 1)
        
        #normaliza output
        self.softmax = nn.Softmax(dim=1)
        
        
    def forward(self,x):
        
        #encoder
        x1 = self.downlayer1(x) #
        x2 = self.maxpool(x1)
        x3 = self.dropout(x2)
        
        x4 = self.downlayer2(x3) #
        x5 = self.maxpool(x4)
        x6 = self.dropout(x5)
        
        x7 = self.downlayer3(x6) #
        x8 = self.maxpool(x7)
        x9 = self.dropout(x8)
 
        x10 = self.downlayer4(x9) #
        x11 = self.maxpool(x10)
        x12 = self.dropout(x11)
        
        x13 = self.downlayer5(x12)
        
        
        #decoder
        x = self.up_trans1(x13)
        x = self.up_conv1(torch.cat([x,x10],1))
        
        x = self.up_trans2(x)
        x = self.up_conv2(torch.cat([x,x7],1))
        
        x = self.up_trans3(x)
        x = self.up_conv3(torch.cat([x,x4],1))
        
        x = self.up_trans4(x)
        x = self.up_conv4(torch.cat([x,x1],1))
        
        output = self.output(x)
        output = self.softmax(output)
        
        return output

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
lr = 0.001

model = Generator()
model.to(device)

optimizer = optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()
epochs = 25

In [None]:
total_train_losses = []
total_val_losses = []

for epoch in range(epochs):
      train_losses = []
      val_losses = []

      model.train()
      for i, (images, labels) in enumerate(train_loader):
          
          images = images.to(device=device, dtype=torch.float)
          labels = labels.to(device=device, dtype=torch.int64)
          
          # Clear gradients
          optimizer.zero_grad()
          
          # Forward propagation
          outputs = model(images)
        
          # Calculate softmax and ross entropy loss
          loss = criterion(outputs, labels)
          
          # Calculating gradients
          loss.backward()
          
          # Update parameters
          optimizer.step()
          
          train_losses.append(loss.item())

      train_loss_mean = np.mean(train_losses)
      total_train_losses.append(train_loss_mean)

      model.eval()
      for i, (images, labels) in enumerate(val_loader):
        
        images = images.to(device)
        labels = labels.to(device)
       
        target = model(images)

        loss = criterion(target, labels)
        
        val_losses.append(loss.item())
        
      val_loss_mean = np.mean(val_losses)
      total_val_losses.append(val_loss_mean)

      print(f"Epoch {epoch} Training loss: {train_loss_mean} Validation loss: {val_loss_mean}")




OutOfMemoryError: ignored

In [None]:
plt.figure()
plt.plot(total_train_losses)
plt.plot(total_val_losses)
plt.title("CNN: Training & Val Loss VS Number of iteration")
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

Predictions on testing data

In [None]:
test_files = os.listdir(test_images)
test_files = [x.split('.')[0] for x in test_files if x[0].isnumeric()]
test_files.sort()
test_files = np.array(test_files)

#dim = 512
#img_list = []

#for im_file in test_files:
#    img = cv2.imread(f'{test_images}/{im_file}.tif')
#    img = cv2.resize(img,(dim,dim))
#    img = img / 255.0

#    img_list.append(img)

#img_list = np.stack(img_list)
#print(img_list.shape)

#means = np.mean(img_list,axis=(0,1,2))
#stdevs = np.std(img_list,axis=(0,1,2))

#print(means,stdevs)

In [None]:
means = [0.3720225,  0.44640904, 0.42115532]
stdevs = [0.21046157, 0.18250105, 0.20091524]

In [None]:
class testDataset(Dataset):
    
    def __init__(self, img_path, X, mean, std, aug=None):
        self.img_path = img_path
        self.X = X
        self.aug = aug
        self.mean = mean
        self.std = std
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        img = cv2.imread(f'{self.img_path}/{self.X[idx]}.tif')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  
        if self.aug is not None:
            aug = self.aug(image=img)
            img = Image.fromarray(aug['image'])
        
        if self.aug is None:
            img = Image.fromarray(img)
        
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(self.mean, self.std)])
        img = transform(img)
            
        return img

In [None]:

test_set = testDataset(test_images, test_files, means, stdevs, transform_val)

#dataloader
batch_size= 1

test_loader = DataLoader(test_set, batch_size=batch_size, num_workers = 1, shuffle=False)

In [None]:
from itertools import islice
#Predict the test set maps
model.eval()
test_predictions = []
for image in test_loader:
    with torch.no_grad():
        image = image.to(device)
        prediction = model(image)
        test_predictions.append(np.uint8(prediction.argmax(1).cpu().numpy()[0]))


In [None]:
#Saving images to output path 
test_dir = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/double_conv/pred/'
test_img_names = list(test_files)
d = dict(zip(test_img_names,test_predictions))
for name, pred in d.items():
    im = Image.fromarray(pred)
    im.save(test_dir+name+'.png')
    print("Image saved: ", test_dir + name + '.png')

In [None]:
# Dictionary to store the image sizes

images = os.listdir(test_images)
images = [x for x in images if x[0].isnumeric()]

def get_size(images):
  image_size = {}
  for image in images:
    image_id = image.split('.')[0]
    
    with PIL.Image.open(f'{test_images}/{image}') as im:
      width, height = im.size
      image_size[image_id] = (width, height)

  return image_size

image_size = get_size(images)

In [None]:
image_paths = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/double_conv/pred'
reshape_dir = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/double_conv/pred_resize'

images = os.listdir(image_paths)
images = [x for x in images if x[0].isnumeric()]

# the target size is image_size
for file_path in images:
  image_id = file_path.split('.')[0]
  width,height = image_size[image_id]

  im = cv2.imread(f'{image_paths}/{file_path}', cv2.IMREAD_GRAYSCALE)
  im = cv2.resize(im,(width, height),interpolation = cv2.INTER_NEAREST)
  cv2.imwrite(f'{reshape_dir}/{file_path}',im)

In [None]:
#palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
#colors = torch.as_tensor([i for i in range(27)])[:, None] * palette
#colors = (colors % 255).numpy().astype("uint8")

#reshape_dir = r'/content/drive/MyDrive/DSBA/Hurricane_Harvey/rasters/double_conv/pred_resize'
#images = os.listdir(reshape_dir)
#images = [x for x in images if x[0].isnumeric()]

#for img in images:
#  image = cv2.imread(f'{reshape_dir}/{img}', cv2.IMREAD_GRAYSCALE)
#  plt.figure()
#  plt.imshow(image)
#  plt.title(img)


Preparing data for submission

In [None]:
# zip file
import tarfile  

tar = tarfile.open("submission.zip", "w")  
for root, dir, files in os.walk(reshape_dir):
  for file in files:
    fullpath = os.path.join(root, file)
    tar.add(fullpath, arcname=file)
tar.close()

In [None]:
# check if reshape is successful

f = os.listdir(reshape_dir)

image_size = {}
for image in f:
  image_id = image.split('.')[0]
  with PIL.Image.open(f'{reshape_dir}/{image}') as im:
    width, height = im.size
    image_size[image_id] = (width, height)
print(image_size)