In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
from glob import glob
import copy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision import transforms, utils
from torchsummary import summary

In [6]:
device = None

if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

print('Using PyTorch Version: ', torch.__version__, 'Device: ', device)

Using PyTorch Version:  1.8.0+cu101 Device:  cuda


In [7]:
data_path = './drive/MyDrive/northpole/data/'
train_data_path = os.path.join(data_path, "train")

print(train_data_path)

./drive/MyDrive/northpole/data/train


In [8]:
file_list = os.listdir(train_data_path)
file_list.sort()

In [9]:
torch.set_printoptions(threshold=10000)

In [10]:
class SealceDataset(Dataset):
  def __init__(self, data_dir, transform, data_type="train", frame_num=6, predict_num=2, stride=1):
    super(SealceDataset, self).__init__()
    data_to_path = os.path.join(data_dir, data_type)
    filenames = os.listdir(data_to_path)
    self.filepaths = [os.path.join(data_to_path, filename) for filename in sorted(filenames)]
    self.transform = transform
    self.frame_num = frame_num
    self.predict_num = predict_num
    self.stride = stride

  def __len__(self):
    return len(self.filepaths) - (self.frame_num + self.predict_num - 1) * self.stride
  
  def __getitem__(self, idx):
    dataset = []

    for id in range(idx, idx+self.frame_num+self.predict_num, self.stride):
      cur_npy = np.load(self.filepaths[id])[:, :, 0] / 250
      cur_tensor = self.transform(cur_npy)
      dataset.append(cur_tensor)
    
    x = torch.stack(dataset[:self.frame_num])
    x = x.transpose(0, 1).to(dtype=torch.float)

    y = torch.stack(dataset[self.frame_num:])
    y = y.transpose(0, 1)

    return x, y
  
def getTransform():
  return transforms.Compose([transforms.ToTensor()])

transform = getTransform()

ice_dataset = SealceDataset(data_path, transform, 'train', 6, 2, 1)

a, b = ice_dataset[1]

print(len(ice_dataset))
print(a.shape, b.shape)

475
torch.Size([1, 6, 448, 304]) torch.Size([1, 2, 448, 304])


In [11]:
len_ice_dataset = len(ice_dataset)

len_ice_train = int(0.8 * len_ice_dataset)
len_ice_valid = len_ice_dataset - len_ice_train

train_dataset, valid_dataset = random_split(ice_dataset, [len_ice_train, len_ice_valid])

print(f"train dataset length : {len(train_dataset)}")
print(f"valid dataset length : {len(valid_dataset)}")

train dataset length : 380
valid dataset length : 95


In [12]:
BATCH_SIZE = 12

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_dataloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

for x, y in train_dataloader:
  print(x.shape, y.shape)
  break

torch.Size([12, 1, 6, 448, 304]) torch.Size([12, 1, 2, 448, 304])


In [16]:
MODEL_PARAMS = {
    "shape": (6, 1, 448, 304),
    "init_filters": 8,
    "dropout_rate": 0.5
}

In [17]:
class CustomNet(nn.Module):
  def __init__(self, params):
    super(CustomNet, self).__init__()
    input_frames, input_channel, input_height, input_width = params["shape"]
    init_filters = params["init_filters"]
    self.dropout_rate = params["dropout_rate"]
    self.conv1 = nn.Conv3d(input_channel, init_filters, kernel_size=3, padding=1)
    self.conv2 = nn.Conv3d(init_filters, init_filters*2, kernel_size=3, padding=1)
    self.conv3 = nn.ConvTranspose3d(init_filters*2, 1, kernel_size=3, padding=1)

  def forward(self, x):
    input = x
    x = F.relu(self.conv1(x))
    x = F.max_pool3d(x, 2, 2)
    x = F.relu(self.conv2(x))
    x = F.relu(self.conv3(x))
    x = F.upsample(x, size=(2, 448, 304))
    print("input : ", input.shape)
    print("output : ", x.shape)
    return x


In [19]:
my_model = CustomNet(MODEL_PARAMS).to(device)
print(my_model)

CustomNet(
  (conv1): Conv3d(1, 8, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv2): Conv3d(8, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv3): ConvTranspose3d(16, 1, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
)


In [20]:
summary(my_model, input_size=(1, 6, 448, 304), device=device.type)

input :  torch.Size([2, 1, 6, 448, 304])
output :  torch.Size([2, 1, 2, 448, 304])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 8, 6, 448, 304]             224
            Conv3d-2      [-1, 16, 3, 224, 152]           3,472
   ConvTranspose3d-3       [-1, 1, 3, 224, 152]             433
Total params: 4,129
Trainable params: 4,129
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 3.12
Forward/backward pass size (MB): 63.12
Params size (MB): 0.02
Estimated Total Size (MB): 66.26
----------------------------------------------------------------




In [21]:
def mae_score(true, pred):
    true, pred = numpy_to_tensor(true, pred)
    score = np.mean(np.abs(true-pred))
    
    return score

# metrics
def f1_score(true, pred):
    true, pred = numpy_to_tensor(true, pred)

    target = np.where((1*0.05 < true)&(true < 1*0.5))
    # target = np.where((true>1*0.05)<1*0.5))
    
    true = true[target]
    pred = pred[target]
    true = np.where(true < 1*0.15, 0, 1)
    pred = np.where(pred < 1*0.15, 0, 1)
    
    right = np.sum(true * pred == 1)
    precision = right / np.sum(true+1e-8)
    recall = right / np.sum(pred+1e-8)
    score = 2 * precision*recall/(precision+recall+1e-8)
    
    return score
    
# loss function
def mae_over_f1(true, pred):
    mae = mae_score(true, pred)
    f1 = f1_score(true, pred)
    score = mae/(f1+1e-8)
    
    return score

def numpy_to_tensor(true, pred):
    return true.cpu().detach().numpy(), pred.cpu().detach().numpy()

In [22]:
opt_adam = optim.Adam(my_model.parameters(), lr=3e-4)

def get_lr(opt):
  for param_group in opt.param_groups:
    return param_group["lr"]

current_lr = get_lr(opt_adam)
print(f"current_lr = {current_lr}")

current_lr = 0.0003


In [23]:
lr_scheduler = ReduceLROnPlateau(opt_adam, mode="min", factor=0.5, patience=20, verbose=1)

In [24]:
for i in range(100):
  lr_scheduler.step(1)

Epoch    22: reducing learning rate of group 0 to 1.5000e-04.
Epoch    43: reducing learning rate of group 0 to 7.5000e-05.
Epoch    64: reducing learning rate of group 0 to 3.7500e-05.
Epoch    85: reducing learning rate of group 0 to 1.8750e-05.


In [25]:
# Training 

def metrics_batch(pred, true, metrics):
    # if needed add param "metrics" to custom
    """
    output will be pred
    target will be corrects
    """
    if metrics:
        return list(map(lambda x: x(true, pred), metrics))
    mae_score = mae_score(true, pred)
    f1_score = f1_score(true, pred)
    return (mae_score, f1_score)

def loss_batch(loss_func, pred, true, opt=None):
    """
    loss_func => mae_over_f1
    """
    loss = loss_func(true, pred)
    with torch.no_grad():
        metrics = metrics_batch(pred, true, [mae_score, f1_score])
    if opt is not None:
        opt.zero_grad()
        # loss.backward()
        opt.step()  # 학습이 이뤄지는 곳
    return loss, metrics

def loss_epoch(model, loss_func, dataset_dataloader, sanity_check=False, opt=None):
    running_loss = 0.0
    running_metric = [0.0, 0.0]
    len_data = len(dataset_dataloader.dataset)

    for x, y in dataset_dataloader:
        x = x.to(device)
        y = y.to(device)
        # 모델 결과
        pred = model(x)
        # 손실함수 구하기
        loss, metrics = loss_batch(loss_func, pred, y, opt)
        # 손실함수 
        running_loss += loss
        if metrics is not None:
            for idx, metric_value in enumerate(metrics):
                running_metric[idx] += metric_value
        
        # 문제 있으면 break, 여기서는 True 일때 바로 break
        if sanity_check is True:
            break
    
    loss = running_loss / float(len_data)
    metrics = list(map(lambda x: x/float(len_data), metrics))
    print(loss, metrics)
    return loss, metrics

In [26]:
loss_func = mae_over_f1
opt_adam = optim.Adam(my_model.parameters(), lr=3e-4)
lr_scheduler = ReduceLROnPlateau(opt_adam, mode="min", factor=0.5, patience=20, verbose=1)

TRAIN_PARAMS = {
    "num_epochs" : 10,
    "loss_func" : loss_func,
    "optimizer" : opt_adam,
    "train_dataloader" : train_dataloader,
    "valid_dataloader" : valid_dataloader,
    "sanity_check" : True,
    "lr_scheduler" : lr_scheduler,
    "save_path" : "./weights.pt"
}

In [27]:
def train(model, params):
    num_epochs = params['num_epochs']
    loss_func = params['loss_func']
    opt = params["optimizer"]
    train_dataloader = params['train_dataloader']
    valid_dataloader = params['valid_dataloader']
    sanity_check = params['sanity_check']
    lr_scheduler = params['lr_scheduler']
    save_path = params['save_path']

    # keep history of the loss and metric
    loss_hist = {
        "train" : [],
        "valid" : []
    }

    metrics_hist = {
        "train" : [],
        "valid" : []
    }

    # copy best weights
    best_model_weights = copy.deepcopy(model.state_dict())
    # init best loss
    best_loss = float("inf")

    for epoch in range(num_epochs):
        current_lr = get_lr(opt)
        print(f'Epoch:{epoch}/{num_epochs-1}, current lr:{current_lr}')
        model.train()
        train_loss, train_metrics = loss_epoch(model, loss_func, train_dataloader, sanity_check, opt)

        # save history
        loss_hist["train"].append(train_loss)
        metrics_hist["train"].append(train_metrics)

        # model.eval()
        # with torch.no_grad():
    

    return model, loss_hist, metrics_hist

In [28]:
my_model, loss_hist, metrics_hist = train(my_model, TRAIN_PARAMS)

Epoch:0/9, current lr:0.0003




input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])
0.0009662873702467364 [0.00037221453585803253, 0.0010136859294660745]
Epoch:1/9, current lr:0.0003
input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])
0.0009662873700507026 [0.0003722145357825202, 0.0010136859294660745]
Epoch:2/9, current lr:0.0003
input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])
0.0009662873701573348 [0.000372214535823595, 0.0010136859294660745]
Epoch:3/9, current lr:0.0003
input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])
0.0009662873701392008 [0.0003722145358166097, 0.0010136859294660745]
Epoch:4/9, current lr:0.0003
input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])
0.0009662873701349806 [0.0003722145358149841, 0.0010136859294660745]
Epoch:5/9, current lr:0.0003
input :  torch.Size([12, 1, 6, 448, 304])
output :  torch.Size([12, 1, 2, 448, 304])


In [29]:
print(loss_hist)

{'train': [0.0009662873702467364, 0.0009662873700507026, 0.0009662873701573348, 0.0009662873701392008, 0.0009662873701349806, 0.0009662873701037502, 0.0009662873700445594, 0.0009662873701575684, 0.000966287370042932, 0.0009662873700496437], 'valid': []}
