In [1]:
!gdown --id 1wjrfo7xljD3qUmoXHr2pyigSzHCxHMlv --output hw1_data.zip
!mkdir hw1_data
!unzip -q hw1_data.zip -d hw1_data
!rm hw1_data.zip

Downloading...
From: https://drive.google.com/uc?id=1wjrfo7xljD3qUmoXHr2pyigSzHCxHMlv
To: /content/hw1_data.zip
100% 168M/168M [00:01<00:00, 156MB/s]


In [2]:
# import packages
import os
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
from torch.utils import data
from torchvision.transforms import transforms
import torchvision.models as models
import torchvision.transforms.functional as tvF
from PIL import Image
import glob
import imageio
from tqdm.auto import tqdm
import random
import matplotlib.pyplot as plt

In [3]:
# seed
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [4]:
# get files path
path = "./hw1_data/p2_data/"
train_files_path = os.path.join(path, 'train')
test_files_path = os.path.join(path, 'validation')

split = [4, 9]
train_files = glob.glob(path + 'train/*')
valid_files = [train_file for train_file in train_files if (int(train_file.split('/')[-1].split('_')[0]) % 10) in split]
train_files = [train_file for train_file in train_files if (int(train_file.split('/')[-1].split('_')[0]) % 10) not in split]
# test_files = glob.glob(path + 'validation/*')
test_files = ['./hw1_data/p2_data/validation/0010_sat.jpg', './hw1_data/p2_data/validation/0097_sat.jpg', './hw1_data/p2_data/validation/0107_sat.jpg', './hw1_data/p2_data/validation/0010_mask.png', './hw1_data/p2_data/validation/0097_mask.png', './hw1_data/p2_data/validation/0107_mask.png']

train_inputs_files = [train_file for train_file in train_files if train_file.split('.')[-1] == "jpg"]
train_labels_files = [train_file for train_file in train_files if train_file.split('.')[-1] == "png"]

valid_inputs_files = [valid_file for valid_file in valid_files if valid_file.split('.')[-1] == "jpg"]
valid_labels_files = [valid_file for valid_file in valid_files if valid_file.split('.')[-1] == "png"]

test_inputs_files = [test_file for test_file in test_files if test_file.split('.')[-1] == "jpg"]
test_labels_files = [test_file for test_file in test_files if test_file.split('.')[-1] == "png"]

train_inputs_files.sort()
train_labels_files.sort()
valid_inputs_files.sort()
valid_labels_files.sort()
test_inputs_files.sort()
test_labels_files.sort()

In [5]:
# define dataset
class SegDataset(data.Dataset):
  def __init__(self, inputs_path: list, labels_path: list, transforms=None):
    self.inputs_path = inputs_path
    self.labels_path = labels_path
    self.transforms = transforms
  
  def __len__(self):
    return len(self.inputs_path)
  
  def __getitem__(self, index: int):
    input_path = self.inputs_path[index]
    label_path = self.labels_path[index]

    input = Image.open(input_path)
    label = Image.open(label_path)

    if self.transforms is not None:
      transform = transforms.ToTensor()
      if random.random() > 0.5:
        input = tvF.hflip(input)
        label = tvF.hflip(label)
      
      input, label = self.transforms(input), transform(label)

    else:
      transform = transforms.ToTensor()
      input, label = transform(input), transform(label)
    
    return input, label

In [6]:
# transforms
train_transforms = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

test_transforms = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
image_size = 512

In [7]:
# dataloader
n = 12
m = 12
train_set = SegDataset(train_inputs_files[:n], train_labels_files[:n], train_transforms)
valid_set = SegDataset(valid_inputs_files[:m], valid_labels_files[:m], test_transforms)
test_set = SegDataset(test_inputs_files, test_labels_files, test_transforms)
print(test_set[0])

batch_size = 4
n_workers = 0
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_workers, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=n_workers, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

(tensor([[[-1.1247, -1.0733, -1.1247,  ..., -1.1932, -1.2274, -1.3302],
         [-1.0904, -1.1418, -1.1418,  ..., -1.0390, -1.0219, -1.0904],
         [-1.1075, -1.1760, -1.1589,  ..., -0.9192, -0.9192, -0.9705],
         ...,
         [-0.9534, -1.0048, -0.9705,  ..., -0.3369, -0.3198, -0.3198],
         [-0.9363, -1.0048, -0.9363,  ..., -0.3369, -0.3198, -0.3198],
         [-0.9705, -1.0390, -1.0048,  ..., -0.3369, -0.3369, -0.3198]],

        [[-0.6877, -0.6877, -0.7752,  ..., -0.8803, -0.9153, -1.0203],
         [-0.7052, -0.7577, -0.8102,  ..., -0.7227, -0.7052, -0.7752],
         [-0.7227, -0.8277, -0.8277,  ..., -0.6176, -0.6001, -0.6527],
         ...,
         [-0.7227, -0.7752, -0.7402,  ..., -0.3550, -0.3375, -0.3375],
         [-0.7052, -0.7752, -0.7052,  ..., -0.3550, -0.3375, -0.3375],
         [-0.7402, -0.8102, -0.7752,  ..., -0.3550, -0.3550, -0.3375]],

        [[-0.9504, -0.9330, -1.0027,  ..., -0.8633, -0.8981, -1.0027],
         [-0.9504, -1.0027, -1.0376,  ..., -

In [8]:
print(len(train_loader))

3


In [None]:
for i, batch in enumerate(test_loader):
  imgs, labels = batch
  if i == 0:
    break

# size = 512
# transform = transforms.ToTensor()
# test = transform(Image.open(test_labels_files[0]))

a = imgs.argmax(dim=1).cpu().numpy()
# print(a.shape)
# print(a.argmax(dim=1).cpu().numpy())

  # masks[index].save(f'{index}.png')

# img = Image.open(test_labels_files[:1])
# transform = transforms.ToTensor()
# img = transform(img)
# print(read_masks(img))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)


In [None]:
class SegNet(nn.Module):
  def __init__(self, num_classes):
    super().__init__()

    resnet_50 = models.resnet50(pretrained=True)

    self.resnet = nn.Sequential(
        resnet_50.conv1,
        resnet_50.bn1,
        resnet_50.relu,
        resnet_50.maxpool,
        resnet_50.layer1,
        resnet_50.layer2,
        resnet_50.layer3,
        resnet_50.layer4,
    )

    def decode(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
      dec = nn.Sequential(
          nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
          nn.BatchNorm2d(out_channels),
          nn.ReLU(inplace=True)
      )
      return dec

    self.decode = nn.Sequential(
        decode(2048, 512, 4, 2, 1),
        decode(512, 512),
        decode(512, 512),
        decode(512, 128, 4, 2, 1),
        decode(128, 128),
        decode(128, 128),
        decode(128, 32, 4, 2, 1),
        decode(32, 32),
        decode(32, 16, 4, 2, 1),
        decode(16, 16),
        decode(16, 8, 4, 2, 1),
        decode(8, num_classes),
    )

  def forward(self, x):
    x = self.resnet(x)
    x = self.decode(x)
    return x

In [None]:
def read_masks(data, image_size=512):
  masks = torch.rand((data.shape[0], image_size, image_size)) * 6
  masks = torch.floor(masks).numpy()
  # masks = (masks - np.min(masks)) / np.max(masks) * 6
  for i, d in enumerate(data):
    # mask = d.cpu().detach().numpy() * 255
    mask = d.cpu().detach().numpy()
    # mask = (mask >= 128).astype(int)
    mask = 4 * mask[0, :, :] + 2 * mask[1, :, :] + mask[2, :, :]
    masks[i, mask == 3] = 0  # (Cyan: 011) Urban land 
    masks[i, mask == 6] = 1  # (Yellow: 110) Agriculture land 
    masks[i, mask == 5] = 2  # (Purple: 101) Rangeland 
    masks[i, mask == 2] = 3  # (Green: 010) Forest land 
    masks[i, mask == 1] = 4  # (Blue: 001) Water 
    masks[i, mask == 7] = 5  # (White: 111) Barren land 
    masks[i, mask == 0] = 6  # (Black: 000) Unknown 
  return masks

def mean_iou_score(pred, labels):
    tp_fps = []
    tp_fns = []
    tps = []
    for i in range(6):
      tp_fp = np.sum(pred == i)
      tp_fn = np.sum(labels == i)
      tp = np.sum((pred == i) * (labels == i))
      tp_fps.append(tp_fp)
      tp_fns.append(tp_fn)
      tps.append(tp)
    return tp_fps, tp_fns, tps

In [None]:
# train


In [None]:

device = "cuda" if torch.cuda.is_available() else "cpu"
num_classes = 7
# model = vgg16_fcn32(num_classes).to(device)
resnet_models = []
resnet_models.append(models.resnet50(pretrained=True))
resnet_models.append(models.resnet101(pretrained=True))
resnet_models.append(models.resnet152(pretrained=True))
for model_num, resnet_model in enumerate(resnet_models):
  model = SegNet(resnet_model, num_classes).to(device)
  model.device = device

  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters())

  n_epochs = 2

  # accum_steps = 4
  max_valid_miou = 0

  if os.path.isfile(f'./model_{model_num}.ckpt'):
    ckpt = torch.load(f'./model_{model_num}.ckpt')
    model.load_state_dict(ckpt['model'])
    optimizer.load_state_dict(ckpt['optimizer'])
    # scheduler.load_state_dict(ckpt['scheduler'])
    start_epoch = ckpt['last_epoch'] + 1
    max_valid_miou = ckpt['max_valid_miou']
  else:
    start_epoch = 0
    with open(f'./record_{model_num}.txt', 'w') as f:
      f.write('')


  for epoch in range(start_epoch, n_epochs):

    model.train()
    train_loss = []
    train_fps = []
    train_fns = []
    train_tps = []
    optimizer.zero_grad()
    index = 0
    for batch in tqdm(train_loader):

      inputs, labels = batch
      
      pred = model(inputs.to(device))

      labels = read_masks(labels)
      labels = torch.from_numpy(labels).long().to(device)
      
      loss = loss_fn(pred, labels)
      loss.backward()

      tp_fps, tp_fns, tps = mean_iou_score(pred.argmax(dim=1).cpu().numpy(), labels.cpu().numpy())
      train_fps.append(tp_fps)
      train_fns.append(tp_fns)
      train_tps.append(tps)

      # if ((index + 1) % accum_steps == 0) or ((index + 1) == len(train_loader)):
      optimizer.step()
      optimizer.zero_grad()
      
      train_loss.append(loss.item())
      index += 1

    train_loss = sum(train_loss) / len(train_loss)
    train_fps = np.sum(np.array(train_fps), axis=0)
    train_fns = np.sum(np.array(train_fns), axis=0)
    train_tps = np.sum(np.array(train_tps), axis=0)
    train_mious = train_tps / (train_fps + train_fns - train_tps)
    train_miou = np.sum(train_mious) / len(train_mious)

    with open(f'./record_{model_num}.txt', 'a') as f:
      f.write(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, mean iou = {train_miou:.5f}\n")
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, mean iou = {train_miou:.5f}\n")

    model.eval()
    valid_fps = []
    valid_fns = []
    valid_tps = []
    valid_loss = []

    with torch.no_grad():
      for batch in tqdm(valid_loader):
        
        inputs, labels = batch
        
        pred = model(inputs.to(device))
        
        labels = read_masks(labels)
        labels = torch.from_numpy(labels).long().to(device)
        loss = loss_fn(pred, labels)

        tp_fps, tp_fns, tps = mean_iou_score(pred.argmax(dim=1).cpu().numpy(), labels.cpu().numpy())
        valid_fps.append(tp_fps)
        valid_fns.append(tp_fns)
        valid_tps.append(tps)

        valid_loss.append(loss.item())
    
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_fps = np.sum(np.array(valid_fps), axis=0)
    valid_fns = np.sum(np.array(valid_fns), axis=0)
    valid_tps = np.sum(np.array(valid_tps), axis=0)
    valid_mious = valid_tps / (valid_fps + valid_fns - valid_tps)
    valid_miou = np.sum(valid_mious) / len(valid_mious)

    with open(f'./record{model_num}.txt', 'a') as f:
      f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, mean iou = {valid_miou:.5f}\n")
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, mean iou = {valid_miou:.5f}\n")

    if max_valid_miou < valid_miou:
        max_valid_miou = valid_miou
        torch.save({'last_epoch': epoch + 1,
              'model': model.state_dict(),
              'optimizer': optimizer.state_dict(),
              'max_valid_miou': max_valid_miou,
              # 'scheduler': scheduler.state_dict(),
              }, f'./model_{model_num}.ckpt')
        with open('./record2.txt', 'a') as f:
          f.write(f'Saving model_{model_num}\n')

    with open(f'./record{model_num}.txt', 'a') as f:
      f.write(f"max_valid_miou = {max_valid_miou:.5f}\n")
    print(f"max_valid_miou = {max_valid_miou:.5f}\n")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


  0%|          | 0.00/230M [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[ Train | 001/002 ] loss = 2.03145, mean iou = 0.05987



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 001/002 ] loss = 2.00242, mean iou = 0.05076

max_valid_miou = 0.05076



  0%|          | 0/3 [00:00<?, ?it/s]

[ Train | 002/002 ] loss = 1.88212, mean iou = 0.08033



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 002/002 ] loss = 10.78954, mean iou = 0.04776

max_valid_miou = 0.05076



  0%|          | 0/3 [00:00<?, ?it/s]

[ Train | 001/002 ] loss = 2.05702, mean iou = 0.04600



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 001/002 ] loss = 2.64528, mean iou = 0.02694

max_valid_miou = 0.02694



  0%|          | 0/3 [00:00<?, ?it/s]

[ Train | 002/002 ] loss = 1.92253, mean iou = 0.07145



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 002/002 ] loss = 2.00670, mean iou = 0.01256

max_valid_miou = 0.02694



  0%|          | 0/3 [00:00<?, ?it/s]

[ Train | 001/002 ] loss = 2.05458, mean iou = 0.04813



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 001/002 ] loss = 2.93076, mean iou = 0.08926

max_valid_miou = 0.08926



  0%|          | 0/3 [00:00<?, ?it/s]

[ Train | 002/002 ] loss = 1.91952, mean iou = 0.07090



  0%|          | 0/3 [00:00<?, ?it/s]

[ Valid | 002/002 ] loss = 2.81505, mean iou = 0.08496

max_valid_miou = 0.08926



In [None]:
print(test_inputs_files)

['./hw1_data/p2_data/validation/0000_sat.jpg', './hw1_data/p2_data/validation/0001_sat.jpg', './hw1_data/p2_data/validation/0002_sat.jpg', './hw1_data/p2_data/validation/0003_sat.jpg', './hw1_data/p2_data/validation/0004_sat.jpg', './hw1_data/p2_data/validation/0005_sat.jpg', './hw1_data/p2_data/validation/0006_sat.jpg', './hw1_data/p2_data/validation/0007_sat.jpg', './hw1_data/p2_data/validation/0008_sat.jpg', './hw1_data/p2_data/validation/0009_sat.jpg', './hw1_data/p2_data/validation/0010_sat.jpg', './hw1_data/p2_data/validation/0011_sat.jpg', './hw1_data/p2_data/validation/0012_sat.jpg', './hw1_data/p2_data/validation/0013_sat.jpg', './hw1_data/p2_data/validation/0014_sat.jpg', './hw1_data/p2_data/validation/0015_sat.jpg', './hw1_data/p2_data/validation/0016_sat.jpg', './hw1_data/p2_data/validation/0017_sat.jpg', './hw1_data/p2_data/validation/0018_sat.jpg', './hw1_data/p2_data/validation/0019_sat.jpg', './hw1_data/p2_data/validation/0020_sat.jpg', './hw1_data/p2_data/validation/00

In [None]:
# test
device = "cuda" if torch.cuda.is_available() else "cpu"
output_repo = './drive/MyDrive/DLCV/output3'
os.makedirs(output_repo, exist_ok=True)
with open('./record2.txt', 'a') as f:
    f.write("testing\n")
test_mious = []
num_classes = 7
model = SegNet(num_classes).to(device)
ckpt = torch.load(f'./drive/MyDrive/DLCV/model_34.ckpt', map_location='cpu')
model.load_state_dict(ckpt['model'])
model.eval()
test_fps = []
test_fns = []
test_tps = []
preds = []

for batch in tqdm(test_loader):

    inputs, labels = batch
    
    with torch.no_grad():
        pred = model(inputs.to(device))
    
    labels = read_masks(labels)
    labels = torch.from_numpy(labels).long().to(device)
    preds.append(pred.argmax(dim=1).cpu().numpy())

    tp_fps, tp_fns, tps = mean_iou_score(pred.argmax(dim=1).cpu().numpy(), labels.cpu().numpy())
    test_fps.append(tp_fps)
    test_fns.append(tp_fns)
    test_tps.append(tps)

test_fps = np.sum(np.array(test_fps), axis=0)
test_fns = np.sum(np.array(test_fns), axis=0)
test_tps = np.sum(np.array(test_tps), axis=0)
test_mious = test_tps / (test_fps + test_fns - test_tps)
test_miou = np.sum(test_mious) / len(test_mious)

with open('./record2.txt', 'a') as f:
  f.write(f"test miou = {test_miou:.5f}\n")

for j, pred in enumerate(preds):
  masks = np.zeros((pred.shape[0], 512, 512, 3))
  for index, i in enumerate(pred):
    masks[index, i == 0, 2] = 1
    masks[index, i == 2, 2] = 1
    masks[index, i == 4, 2] = 1
    masks[index, i == 5, 2] = 1

    masks[index, i == 0, 1] = 1
    masks[index, i == 1, 1] = 1
    masks[index, i == 3, 1] = 1
    masks[index, i == 5, 1] = 1

    masks[index, i == 1, 0] = 1
    masks[index, i == 2, 0] = 1
    masks[index, i == 5, 0] = 1

    fn = test_inputs_files[j * 4 + index].split('/')[-1].split('.')[0] + '.png'
    output_path = os.path.join(output_repo, fn)
    plt.imsave(output_path, masks[index])

# masks[i, mask == 3] = 0  # (Cyan: 011) Urban land 
# masks[i, mask == 6] = 1  # (Yellow: 110) Agriculture land 
# masks[i, mask == 5] = 2  # (Purple: 101) Rangeland 
# masks[i, mask == 2] = 3  # (Green: 010) Forest land 
# masks[i, mask == 1] = 4  # (Blue: 001) Water 
# masks[i, mask == 7] = 5  # (White: 111) Barren land 
# masks[i, mask == 0] = 6  # (Black: 000) Unknown 

  0%|          | 0/1 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
