<a href="https://colab.research.google.com/github/alongstar518/cs221_finalproject/blob/master/cs221_final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### CS221 Final Project: Steel Defect Detection 


### install packages

In [0]:
# No for now

### Import necessary libraries

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from tqdm import tqdm_notebook
import cv2
from PIL import Image
import shutil

import torch
import torch.nn as nn
import torch.optim as optim 
import torchvision
from torchvision import models
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torchvision import transforms
import torch.nn.functional as F
import tensorboard
from tensorboard import notebook
from torch.utils.tensorboard import SummaryWriter

### Print torch and cuda information

In [0]:
print("torch version: {}".format(torch.__version__))
print("Cuda device avaliablity: {}".format(torch.cuda.is_available()))
print("Cuda device name: {}".format(torch.cuda.get_device_name()))
print("Cuda device Capilibility: {}".format(
    torch.cuda.get_device_capability(device=None)
  )
)
gpu_ram = (
    torch.cuda.get_device_properties(device=None).total_memory / (1024) **3
)
print(f"GPU RAM: {gpu_ram} GB")

### configs


basic config:

In [0]:
reload_data = True
clear_tensor_board_history = False
reload_last_model = False
leave_progress_bar = False
inference_mode = False
# path to local project folder contains the data
path = "/home/user/git/cs221_finalproject"
data_path = os.path.join(path, "data")
train_data_percent = 0.8
eval_data_percent = 0.1
test_data_percent = 0.1 # used only when we don`t have any eval set.

hyperparamters:

In [0]:
batch_size = 32
n_classes = 1
total_epochs = 10
weight_decay=1e-4
lr = 0.01
momentum=0.9
eval_interval = 200

### download data

In [0]:
try:
  # delay import so we can run on local machine
  # this also check if run it on google drive or not . if it is not, it will 
  # avoiding runing from google colab, but running from local.
  from google.colab import drive
  path = "/content/drive/My Drive/share/"
  remote_data_path = os.path.join(path, "data")
  data_path = "/content/data"
  data_zip_path = os.path.join(
        remote_data_path, "severstal-steel-defect-detection.zip"
      )
  model_save_path = path
  data_zip_path_dst = "/content/severstal-steel-defect-detection.zip"
  res18_path = os.path.join(remote_data_path, "resnet18")
  os.makedirs(data_path, exist_ok=True)
  res18_path_dst = os.path.join(data_path, "resnet18")
  # you may find res18 folder here:
  # "https://drive.google.com/drive/folders/
  #  1KFKRraGbNUICgkgSabzrfHeJzYc5YE0W?usp=sharing"

  if reload_data:
    print("login to google drive")
    drive.mount('/content/drive')
    print("Copy res18 folder:")
    shutil.copytree(res18_path, res18_path_dst)
    print("Copy raw data from {} to {}".format(
        data_zip_path, data_zip_path_dst)
    )
    shutil.copy(data_zip_path, "/content")

    print("unzip raw data...")
    os.system("unzip {} -d {}".format(data_zip_path_dst, data_path))
    
    print("give back missing permissions for unzipping")
    os.system(
      "chmod +wrx /content/data/train_images.zip /content/data/test_images.zip"
    )
    
    print("unzipping data...")
    os.system("unzip {} -d {}".format(
        os.path.join(data_path,"train_images.zip"), 
        os.path.join(data_path,"train_images")
        )
    )
    os.system("unzip {} -d {}".format(
        os.path.join(data_path,"test_images.zip"), 
        os.path.join(data_path,"test_images")
      )
    )
    !ls /content/data
except Exception as e:
  print(e)
  print("Running on local machine.")
finally:
  model_save_path = os.path.join(path, "best.pth")


In [0]:
tr = pd.read_csv(os.path.join(data_path, 'train.csv'))
print(len(tr))
tr.head()

In [0]:
df_train = tr[tr['EncodedPixels'].notnull()].reset_index(drop=True)
df_train = df_train[df_train['ImageId_ClassId']
.apply(lambda x: x.split('_')[1] == '4')].reset_index(drop=True)
print(len(df_train))
df_train.head()

### data preprocessing function

In [0]:
# Decod
def rle2mask(rle, imgshape):
    width = imgshape[0]
    height= imgshape[1]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
        
    return np.flipud( np.rot90( mask.reshape(height, width), k=1 ) )

# Encode
def mask2rle(img):
    tmp = np.rot90( np.flipud( img ), k=3 )
    rle = []
    lastColor = 0;
    startpos = 0
    endpos = 0

    tmp = tmp.reshape(-1,1)   
    for i in range( len(tmp) ):
        if (lastColor==0) and tmp[i]>0:
            startpos = i
            lastColor = 1
        elif (lastColor==1)and(tmp[i]==0):
            endpos = i-1
            lastColor = 0
            rle.append( str(startpos)+' '+str(endpos-startpos+1) )
    return " ".join(rle)

### Display some sample images

In [0]:
columns = 1
rows = 4
fig = plt.figure(figsize=(20,columns*rows+6))
for i in range(1,columns*rows+1):
    fn = df_train['ImageId_ClassId'].str[:-2].iloc[i]
    fig.add_subplot(rows, columns, i).set_title(fn)
    img = cv2.imread(data_path + '/train_images/'+fn)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = rle2mask(df_train['EncodedPixels'].iloc[i], (256, 1600))
    img[mask==1,0] = 255
    plt.imshow(img)
plt.show()

### Create train Dataset argumentation and DataLoader

In [0]:
class ImageData(Dataset):
    def __init__(self, df, transform, subset="train"):
        super().__init__()
        self.df = df
        self.transform = transform
        self.subset = subset
        
        if self.subset == "train":
            self.data_path = data_path + '/train_images/'
        elif self.subset == "test":
            self.data_path = data_path + '/test_images/'

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):                      
        fn = self.df['ImageId_ClassId'].iloc[index].split('_')[0]         
        img = Image.open(self.data_path + fn)
        img = self.transform(img)

        if self.subset == 'train': 
            mask = rle2mask(self.df['EncodedPixels'].iloc[index], (256, 1600))
            mask = transforms.ToPILImage()(mask)            
            mask = self.transform(mask)
            return img, mask
        else: 
            mask = None
            return img  

In [0]:
# Not enable augumentation yet
# This will need argument the ground truth.
data_transf = transforms.Compose(
    [
     transforms.Resize((256, 256)),
     transforms.ToTensor()
    ]
)

train_indx = int(len(df_train) * train_data_percent)
df_tr = df_train[0: train_indx]

eval_indx = int(len(df_train) * eval_data_percent) + train_indx
df_eval = df_train[train_indx: eval_indx]

df_test = df_train[eval_indx:]

train_data = ImageData(df = df_tr, transform = data_transf)
train_loader = DataLoader(
    dataset = train_data, batch_size=batch_size, shuffle=True
  )

eval_data = ImageData(df = df_eval, transform = data_transf)
eval_loader = DataLoader(dataset = eval_data)

test_data = ImageData(df = df_test, transform = data_transf)
test_loader = DataLoader(dataset = test_data)

### Show some image and ground truth (masks)

In [0]:
plt.imshow(train_data[3][0].permute(1, 2, 0))

In [0]:
plt.imshow(np.squeeze(train_data[3][1].permute(1, 2, 0)))

In [0]:
plt.imshow(np.squeeze(eval_data[10][1].permute(1,2,0)))

### Models

UNet:

In [0]:
def convrelu(in_channels, out_channels, kernel, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
        nn.ReLU(inplace=True),
    )


class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        self.base_model = models.resnet18()
        self.base_model.load_state_dict(torch.load(os.path.join(data_path, "resnet18/resnet18.pth")))
        self.base_layers = list(self.base_model.children())

        self.layer0 = nn.Sequential(*self.base_layers[:3])
        self.layer0_1x1 = convrelu(64, 64, 1, 0)
        self.layer1 = nn.Sequential(*self.base_layers[3:5])
        self.layer1_1x1 = convrelu(64, 64, 1, 0)
        self.layer2 = self.base_layers[5]
        self.layer2_1x1 = convrelu(128, 128, 1, 0)
        self.layer3 = self.base_layers[6]
        self.layer3_1x1 = convrelu(256, 256, 1, 0)
        self.layer4 = self.base_layers[7]
        self.layer4_1x1 = convrelu(512, 512, 1, 0)

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
        self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
        self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
        self.conv_up0 = convrelu(64 + 256, 128, 3, 1)

        self.conv_original_size0 = convrelu(3, 64, 3, 1)
        self.conv_original_size1 = convrelu(64, 64, 3, 1)
        self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)

        self.conv_last = nn.Conv2d(64, n_class, 1)

    def forward(self, input):
        x_original = self.conv_original_size0(input)
        x_original = self.conv_original_size1(x_original)

        layer0 = self.layer0(input)
        layer1 = self.layer1(layer0)
        layer2 = self.layer2(layer1)
        layer3 = self.layer3(layer2)
        layer4 = self.layer4(layer3)

        layer4 = self.layer4_1x1(layer4)
        x = self.upsample(layer4)
        layer3 = self.layer3_1x1(layer3)
        x = torch.cat([x, layer3], dim=1)
        x = self.conv_up3(x)

        x = self.upsample(x)
        layer2 = self.layer2_1x1(layer2)
        x = torch.cat([x, layer2], dim=1)
        x = self.conv_up2(x)

        x = self.upsample(x)
        layer1 = self.layer1_1x1(layer1)
        x = torch.cat([x, layer1], dim=1)
        x = self.conv_up1(x)

        x = self.upsample(x)
        layer0 = self.layer0_1x1(layer0)
        x = torch.cat([x, layer0], dim=1)
        x = self.conv_up0(x)

        x = self.upsample(x)
        x = torch.cat([x, x_original], dim=1)
        x = self.conv_original_size2(x)

        out = self.conv_last(x)

        return out

### Train Preparation

In [0]:
model = UNet(n_class=1).cuda()
if reload_last_model and os.path.exists(model_save_path):
  print("Loading model from {}".format(model_save_path))
  model.load_state_dict(torch.load(model_save_path))
else:
  print("Not load model, train from scrath")
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(
    model.parameters(), weight_decay=weight_decay, lr = lr, momentum=momentum
)

### training eval and test functions

Get LR value if we use lr decay

In [0]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

Eval Matrics:

In [0]:
SMOOTH = 1e-6

def dice_coefficient(x, y):
  """
  :param X: numpy array
  :param y: numpy array
  :return: float, dice coefficient for one x, y pair
  x, y is mask the preidiction values (mask)
  """
  x = x.squeeze()
  y = y.squeeze()
  #print(sum(((x*y) > 0)))
  #print(sum((x+y) > 0))
  return 2. * ((x == 1) & (y == 1)).sum(1, 2) / (np.sum(x == 1) + np.sum(y == 1)).sum(1, 2)


def iou_numpy(outputs: np.array, labels: np.array):
    outputs = outputs.squeeze(1)
    
    intersection = ((outputs == 1) & (labels == 1)).sum((1, 2))
    union = ((outputs == 1) | (labels == 1)).sum((1, 2))
    
    iou = (intersection + SMOOTH) / (union + SMOOTH)
    
    thresholded = np.ceil(np.clip(20 * (iou - 0.5), 0, 10)) / 10
    
    return thresholded


Inference:

In [0]:
def predict(model, data_loader, resize=False, out_mask=True):
    predict = []
    Y = []
    raw = []
    model.eval()
    print("Predicting...")
    total = 0
    losses = []
    with tqdm_notebook(total=len(data_loader.dataset), leave=leave_progress_bar) as pbar:
      for data, y in data_loader:
        o = model(data.cuda())  
        o = o.cpu().detach().numpy() * (-1)
        tmp = np.copy(o)
        loss = criterion(torch.tensor(tmp), y)
        losses.append(loss)

        mn = np.mean(o)*1.2
        tmp[tmp < mn] = 0
        tmp[tmp > mn] = 1
        y_tmp = np.mean(y.numpy())*1.2
        y[y < y_tmp] = 0
        y[y > y_tmp] = 1

        predict.append(tmp)
        raw.append(data)
        Y.append(y)

        pbar.update(1)
    predict_loss = np.average(losses)
    print(f"Predict Loss = {predict_loss}")
    return predict, Y, raw, predict_loss


Eval Data:

In [0]:
def eval_data_set(model, data_loader, matrics):
  corr = []
  predictions, Y, raw, loss = predict(model, data_loader)
  for i, mask in enumerate(predictions):
    corr.append(matrics(mask, Y[i].numpy()))
  return np.mean(corr), predictions, Y, raw, loss

Tensorbaord Functions

In [0]:
if clear_tensor_board_history:
  shutil.rmtree("/content/runs", ignore_errors=True)
  os.mkdir("/content/runs")

summary_writter = SummaryWriter() # in ./run by default

def write_image_to_tb(
    predictions, 
    raw_truth, 
    raw, 
    epoch,
    category="eval", 
    update_frequency=10
):
  images_to_add_raw = []
  images_to_add_predict = []
  images_to_add_y = []
  for i, prediction in enumerate(predictions):
      if i // update_frequency == 0:
        images_to_add_raw.append(raw[i].squeeze(0))
        images_to_add_predict.append(torch.tensor(prediction).squeeze(1))
        images_to_add_y.append(raw_truth[i].squeeze(1))

  summary_writter.add_image(
    f'{epoch}_{category}_raw_img', 
    torchvision.utils.make_grid(images_to_add_raw, nrow=100), 
    epoch
  )
  summary_writter.add_image(
    f'{epoch}_{category}_prediction', 
    torchvision.utils.make_grid(images_to_add_predict, nrow=100), 
    epoch
  )
  summary_writter.add_image(
      f'{epoch}_{category}_ground_truth', 
      torchvision.utils.make_grid(images_to_add_y, nrow=100), 
      epoch
  )
  summary_writter.flush()

Launch Tensorbaord:

In [0]:
%load_ext tensorboard
%tensorboard --logdir=runs

### Training

In [0]:
if inference_mode: 
  total_epochs = 0

total_it = 0
last_result = 0
current_it = 1
for epoch in range(total_epochs):
  print(f"Epoch: {epoch}")
  model.train()
  with tqdm_notebook(total=len(train_loader.dataset), leave=leave_progress_bar) as pbar:   
    for it, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        data_len = len(data)
        pbar.update(len(data))
        pbar.set_postfix(epoch=epoch, loss=loss.item())
        total_it += data_len
        summary_writter.add_scalar("Loss/train", loss.item(), total_it)
        summary_writter.add_scalar("LR/train", get_lr(optimizer), total_it)
        summary_writter.flush()
    # eval after each epoch
    corr, predictions, raw_truth, raw, loss = eval_data_set(
          model, eval_loader, iou_numpy
        )
  write_image_to_tb(predictions, raw_truth, raw, epoch, "eval")
  print("Result = {}".format(corr))
  if corr > last_result:
    print("Saving Model...")
    torch.save(model.state_dict(), model_save_path)
    last_result = corr
  summary_writter.flush()

### Test on Test Set

In [0]:
corr, predictions, raw_truth, raw, loss = eval_data_set(model, test_loader, iou_numpy)
print("Test set Corr = {}".format(corr))
write_image_to_tb(predictions, raw_truth, raw, 1, "test")

In [0]:
summary_writter.close()