In [1]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd drive/My Drive/data/open

/content/drive/My Drive/data/open


In [3]:
import warnings
warnings.simplefilter("ignore")

In [4]:
!pip install albumentations
!pip install --upgrade albumentations
!pip install torchinfo 

Installing collected packages: torchinfo
Successfully installed torchinfo-0.0.8


#Import

In [5]:
import pandas as pd
import numpy as np
import os
import torchvision
from torchvision import transforms
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim 
from torchvision import datasets, models, transforms
from sklearn.model_selection import train_test_split
# For image-keypoints data augmentation
import albumentations as A
from albumentations.pytorch import ToTensor
import cv2 
import torch.onnx
import logging
import time
import copy
from tqdm import tqdm
import matplotlib.pyplot as plt

[Reproducable Code](https://hoya012.github.io/blog/reproducible_pytorch/)

In [6]:
random_seed = 42

import random
random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)

In [7]:
# logging.basicConfig(filename='./train.log', filemode='w',level=logging.INFO)
# logger = logging.getLogger()  

In [19]:


# Prefix data directory
prefix_dir = '.'

# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
train_dir = f'{prefix_dir}/train_imgs'

# Models to choose from torchvision
model_name = 'SoftGateModel' 

# Number of classes in the dataset
num_classes = 48

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs and earlystop to train for
num_epochs = 30

num_splits = 20
num_earlystop = 20

# Iput size for resize image
width =  512
height = 512


# Learning rate for optimizer
learning_rate = 0.01

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device name {device}')

device name cuda:0


#DataLoad

In [9]:
# dir = os.listdir(train_dir)
# img = Image.open(os.path.join(train_dir,dir[0]))
# np.array(img).shape

In [10]:
df = pd.read_csv('train_df.csv') 
imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]  
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns] 
keypoints = motions.to_numpy().reshape(-1,24,2) 
keypoints.shape

(4195, 24, 2)

In [11]:


# Data augmentation and normalization for training with Albumentations
A_transforms = {
    'train':
        A.Compose([
            A.augmentations.transforms.PadIfNeeded(min_height=1920, min_width=1920,border_mode=0),
            A.Resize(height, width, always_apply=True),
            A.OneOf([A.HorizontalFlip(p=1),
                     A.RandomRotate90(p=1),
                     A.VerticalFlip(p=1)            
            ], p=0.5),
            A.OneOf([A.MotionBlur(p=1),
                     A.GaussNoise(p=1)                 
            ], p=0.5),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()],\
            keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    'val':
        A.Compose([ 
            A.augmentations.transforms.PadIfNeeded(min_height=1920, min_width=1920,border_mode=0),
            A.Resize(height, width, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'test':
        A.Compose([ 
            A.augmentations.transforms.PadIfNeeded(min_height=1920, min_width=1920,border_mode=0),
            A.Resize(height, width, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()
        ])
    ,
    'no_tf':
        A.Compose([
            A.augmentations.transforms.PadIfNeeded(min_height=1920, min_width=1920,border_mode=0),
            A.Resize(height, width, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()],\
            keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
   
}


In [12]:

class Dataset(torch.utils.data.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, 
                 data_dir : "image directory",
                 imgs : "x", 
                 keypoints : "y", 
                 phase : "train or val", 
                 class_labels : "name of labels"=None , 
                 data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
        keypoints = self.keypoints[idx]
    
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img, keypoints=keypoints, class_labels=self.class_labels)
            img = augmented['image']
            keypoints = augmented['keypoints']
        keypoints = np.array(keypoints).flatten()

        return img, keypoints
    
    def __len__(self):
        return len(self.imgs)

#Model
keras의 padding same 을 활용하기 때문에 conv 및 maxpool 후 같은 크기로 만들어 준다. 

conv output = (input + 2p -k) / s + 1
if k = 3 -> p = 1 , s = 1
if k = 5 -> p = 2, s = 1

maxpool output = (input + 2p - k) /s + 1

In [13]:
class SoftGateModel(nn.Module):
    def __init__(self, width, height, num_classes):
        super(SoftGateModel, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(64,64, 3, padding=1),
            nn.ReLU(),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 1, 3, padding=1),
            nn.ReLU(),
        )
        self.pool = nn.MaxPool2d(2)  
        self.upsample = nn.Upsample(scale_factor=2)
        self.relu = nn.ReLU()
        self.decode = nn.Sequential(
          self.conv2,
          self.relu,
          self.upsample
      )
        self.fc = nn.Linear(width * height, num_classes)
        
        self.width = width
        self.height = height
    def forward(self, x):
        # x = self.pool(F.relu(self.conv1(x))) 
      x1 = self.conv1(x) 
      x2 = self.conv2(self.pool(x1))
      x3 = self.conv2(self.pool(x2))
      x4 = self.conv2(self.pool(x3))
      encoded = self.pool(x4)

      #decoding
      u0 = self.decode(encoded)
      w  = torch.add(u0,self.conv2(x4))

      u1 = self.decode(w)   
      w  = torch.add(u1, self.conv2(x3))

      u2 = self.decode(w)  
      w  = torch.add(u2, self.conv2(x2))

      u3 = self.decode(w)  
      w  = torch.add(u3, self.conv2(x1))

      decoded = self.conv3(w) 
      out = decoded.view(-1, self.width * self.height)  
      out = self.fc(out)

      return out


In [14]:
model = SoftGateModel(width, height, num_classes).to(device)
from torchinfo import summary
# summary(model, (16, 3,512,512))

In [15]:

# Setup the loss fxn
criterion = nn.MSELoss()
# Observe that all parameters are being optimized
optimizer = optim.Adam(model.parameters(), lr=learning_rate) 

X_train, X_val, y_train, y_val = train_test_split(imgs, keypoints, test_size=1/num_splits, random_state=random_seed)

train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
no_tf_train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')


train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
no_tf_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)

dataloaders = {'train': train_loader, 'val': val_loader, 'no_tf': no_tf_loader} 

#Train

In [17]:


def train_model(model, dataloaders, criterion, optimizer, earlystop=0, num_epochs=25):
    since = time.time()
    
    val_acc_history = []
    val_loss_history = []
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0
    best_loss = 999999999
    
    #scheduler
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, 
                                              T_mult=1, eta_min=0.00001)
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs), end=' ')

        # Each epoch has a training and validation phase
        for phase in ['train', 'val','no_tf']:
            if phase == 'val':
                model.eval()   # Set model to evaluate mode 
            else:
                model.train()  # Set model to training mode
        
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for i,(inputs, labels) in enumerate(dataloaders[phase]): 
                inputs = inputs.to(device)
                labels = labels.to(device) 
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train' or phase == 'no_tf'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    outputs = model(inputs) 
                    loss = criterion(outputs.float(), labels.float())

                    # backward + optimize only if in training phase
                    if phase == 'train' or phase == 'no_tf':
                        if i % 30 == 0:
                          print('#',end='')
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # for regression
                running_corrects += torch.sum(outputs == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
              
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    earlystop_value = 0
                    torch.save(model.state_dict(), f'{prefix_dir}/models/{model_name}_val.pt')
                else:
                    earlystop_value += 1
                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation Acc: {:4f}\n'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, {'acc': val_acc_history, 'loss': val_loss_history}


In [None]:
since = time.time()

# Train and evaluate
model, hists = train_model(
    model, dataloaders, criterion, optimizer,
    num_epochs=num_epochs, earlystop=num_earlystop)

torch.save(model.state_dict(), f'{prefix_dir}/models/{model_name}_best_model.pt')
time_elapsed = time.time() - since

print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))


Epoch 1/30 #################train (3985) Loss: 2707.0461 Elapsed time: 31m 16s
val (210) Loss: 1423.6858 Elapsed time: 32m 45s
#################no_tf (3985) Loss: 1083.5811 Elapsed time: 39m 36s
Epoch 2/30 #################train (3985) Loss: 734.9625 Elapsed time: 6m 47s
val (210) Loss: 416.2091 Elapsed time: 6m 58s
#################no_tf (3985) Loss: 627.4568 Elapsed time: 13m 43s
Epoch 3/30 #################train (3985) Loss: 521.9095 Elapsed time: 6m 45s
val (210) Loss: 281.1134 Elapsed time: 6m 56s
#################no_tf (3985) Loss: 450.2137 Elapsed time: 13m 43s
Epoch 4/30 #################train (3985) Loss: 335.4170 Elapsed time: 6m 51s
val (210) Loss: 213.2727 Elapsed time: 7m 1s
#################no_tf (3985) Loss: 363.4924 Elapsed time: 13m 49s
Epoch 5/30 #################train (3985) Loss: 295.0937 Elapsed time: 6m 47s
val (210) Loss: 175.9058 Elapsed time: 6m 57s
#################no_tf (3985) Loss: 273.2380 Elapsed time: 13m 43s
Epoch 6/30 #################train (3985) Loss:

In [None]:
plt.figure(1)
plt.plot(hists.acc)
# plt.xlabel
plt.title('validation accuracy')

plt.figure(2)
plt.plot(hists.loss)
# plt.xlabel
plt.title('validation loss')

plt.show()

In [None]:


def evaluate(model, dataloaders, criterion, optimizer):
    since = time.time()
    
    val_acc_history = []
    val_loss_history = []
    earlystop_value = 0
  

    model.eval()   # Set model to evaluate mode
        
    running_loss = 0.0
    running_corrects = 0
    # Iterate over data.
    for inputs, labels in tqdm(dataloaders): 
        inputs = inputs.to(device)
        labels = labels.to(device) 
        # zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs.float(), labels.float())

            # statistics
        running_loss += loss.item() * inputs.size(0)
        # for regression
        running_corrects += torch.sum(outputs == labels.data) 

        epoch_loss = running_loss / len(dataloaders.dataset)
        epoch_acc = running_corrects.double() / len(dataloaders.dataset)
        
        print(f'loss {epoch_loss} acc {epoch_acc}') 

 evaluate(
    model, dataloaders, criterion, optimizer)


#Test

In [None]:
test_dir = f'{prefix_dir}/test_imgs'
test_imgs = os.listdir(test_dir)
print(len(test_imgs), device)
model = SoftGateModel(width, height, num_classes).to(device) 
model.load_state_dict(torch.load(f'{prefix_dir}/models/{model_name}_best_model.pt',map_location=device), strict=False) 
model.eval() 

In [None]:
class TestDataset(torch.utils.data.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))

        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img
    
    def __len__(self):
        return len(self.imgs)

In [None]:
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size * 4, shuffle=False)

In [None]:
all_predictions = []
files = []
with torch.no_grad():
    for filenames, inputs in tqdm(test_loader):
        predictions = list(model(inputs.to(device)).cpu().numpy())
        files.extend(filenames)
        all_predictions.extend(predictions)
all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] /= width / 1920
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= height / 1080


#Submission

In [None]:
df_sub = pd.read_csv(f'{prefix_dir}/sample_submission.csv')
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

In [None]:
df.to_csv(f'{prefix_dir}/submission_{model_name}.csv', index=False)