In [None]:
# import package

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchsummary import summary


from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import os

from torchvision import utils
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np
import time
import copy

from tqdm import tqdm

## hyperparams

In [None]:
batch_size = 128

## dataloader

In [None]:
# CIFAR10, train_ds, val_ds
transform = transforms.Compose(
    [transforms.ToTensor()])

train_ds = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)

val_ds = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [None]:
# Depthwise, Pointwise convolution - mobilenet을 구현하기 위한 depthwise seperable convolution
class depth_point(nn.Module):
    def __init__(self, input_size, output_size, stride=1):
        super().__init__()
        
        #groups = input_size로 depthwise convolution을 진행
        self.depthwise = nn.Sequential(
            nn.Conv2d(input_size, input_size, 3, stride=stride, padding=1, groups=input_size),
            nn.BatchNorm2d(input_size),
            nn.ReLU6(),
        )
        # kernel size를 1로 하여 pointwise convolution을 진행
        self.pointwise = nn.Sequential(
            nn.Conv2d(input_size, output_size, 1, stride=1, padding=0),
            nn.BatchNorm2d(output_size),
            nn.ReLU6()
        )
    
    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x


## shufflenet

In [None]:
# Channel shuffle and group convolution ---------------------------------
# 처음 group convolution 후 channel shuffle을 위한 함수
def channel_shuffle(x, groups):
    batch_size, num_channels, height, width = x.size()
    assert (num_channels % groups == 0), ('num_channels should be '
                                          'divisible by groups')
    channels_per_group = num_channels // groups

    x = x.view(batch_size, groups, channels_per_group, height, width)
    x = torch.transpose(x, 1, 2).contiguous()
    x = x.view(batch_size, -1, height, width)
    return x

# group convolution layer - group=8로 설정하였고 batchnorm, relu
class g_conv(nn.Module):
    def __init__(self, input_size, output_size, stride=1):
        super().__init__() 

        self.gconv = nn.Sequential(
            nn.Conv2d(input_size, output_size, 3, stride=stride, padding=1, groups=8),
            nn.BatchNorm2d(output_size),
            nn.ReLU6(),
        )
    
    def forward(self, x):
        x = self.gconv(x)
        return x
#-----------------------------------------------------------------------
# MobileNetV1_shuffleNet
class MobileNetv4(nn.Module):
    def __init__(self, group=2):
        super().__init__()
        
        self.group = group
        self.init = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        # dw, pw를 g_conv 하나로 변경
        self.conv4 = g_conv(32, 512, stride=2)
        
        self.conv5 = nn.Sequential(
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
        )
        # dw, pw를 g_conv 하나로 변경
        self.conv6 = nn.Sequential(
            g_conv(512, 1024, stride=2)
        )

        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1024, 10)

    def forward(self, x):
        x = self.init(x)
        x = self.conv4(x)
        # g_conv 후 channel shuffle
        x = channel_shuffle(x, self.group)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        # x = self.fc(x)
        return x

## resnet

In [None]:
# Depthwise, Pointwise convolution + dilation 
class depth_point2(nn.Module):
    def __init__(self, input_size, output_size, padding ,stride=1):
        super().__init__()

        self.depthwise = nn.Sequential(
            nn.Conv2d(input_size, input_size, 3, stride=stride, padding=padding, dilation=2, groups=input_size),
            nn.BatchNorm2d(input_size),
            nn.ReLU6(),
        )

        self.pointwise = nn.Sequential(
            nn.Conv2d(input_size, output_size, 1, stride=1, padding=0),
            nn.BatchNorm2d(output_size),
            nn.ReLU6()
        )
    
    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

# MobileNetV1
class MobileNetv1(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.init = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1, dilation=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.conv1 = depth_point(32, 64)

        self.conv2 = nn.Sequential(
            depth_point(64, 128 , stride=2),
            depth_point(128, 128)
        )

        self.conv3 = nn.Sequential(
            depth_point(128, 256, stride=2),
            depth_point(256, 256)
        )
        
        self.conv4 = depth_point(256, 512, stride=2)
        
        self.conv5 = nn.Sequential(
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
        )

        self.conv6 = nn.Sequential(
            depth_point(512, 1024, stride=2)
        )

        self.conv7 = nn.Sequential(
            depth_point(1024, 1024, stride=2)
        )

        self.convd1 = nn.Conv2d(3, 32, 1, stride=1, padding=0, dilation=1)      ## channel
        self.pool1 = nn.AvgPool2d(2,2)
      
        self.convd2 = nn.Conv2d(32, 64, 1, stride=1, padding=0, dilation=1)      
        self.pool2 = nn.AvgPool2d(1,1)

        self.convd3 = nn.Conv2d(64, 128, 1, stride=1, padding=0, dilation=1)      

        self.convd4 = nn.Conv2d(128, 256, 1, stride=1, padding=0, dilation=1)
        self.convd5 = nn.Conv2d(256, 512, 1, stride=1, padding=0, dilation=1)
        self.convd7 = nn.Conv2d(512, 1024, 1, stride=1, padding=0, dilation=1)

        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1024, 10)

    def forward(self, x):
        x1 = self.init(x) + self.pool1(self.convd1(x))
        x2 = self.conv1(x1) + self.pool2(self.convd2(x1))
        x3 = self.conv2(x2) + self.pool1(self.convd3(x2))
        x4 = self.conv3(x3) + self.pool1(self.convd4(x3))
        x5 = self.conv4(x4) + self.pool1(self.convd5(x4))       
        x6 = self.conv5(x5) + x5
        x7 = self.conv6(x6) + self.pool1(self.convd7(x6)) 
        x8 = self.conv7(x7) + x7
        x9 = self.pool(x8)
        x10 = x9.view(x9.size(0), -1)
        # x11 = self.fc(x10)
        return x11


## densenet

In [None]:
# Depthwise, Pointwise convolution + dilation 
class depth_point2(nn.Module):
    def __init__(self, input_size, output_size, padding ,stride=1):
        super().__init__()

        self.depthwise = nn.Sequential(
            nn.Conv2d(input_size, input_size, 3, stride=stride, padding=padding, dilation=2, groups=input_size),
            nn.BatchNorm2d(input_size),
            nn.ReLU6(),
        )

        self.pointwise = nn.Sequential(
            nn.Conv2d(input_size, output_size, 1, stride=1, padding=0),
            nn.BatchNorm2d(output_size),
            nn.ReLU6()
        )
    
    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x



# MobileNetV1
class MobileNetv2(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.init = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1, dilation=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.conv1 = depth_point(32, 64)

        self.conv2 = nn.Sequential(
            depth_point(64, 128 , stride=2),
        )

        self.conv3 = nn.Sequential(
            depth_point(128, 256, stride=2),
        )
        
        self.conv4 = depth_point(256, 512, stride=2)
        
        self.conv5 = nn.Sequential(
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512),
            depth_point(512, 512)
        )

        self.conv6 = nn.Sequential(
            depth_point(512, 1024, stride=2)
        )

        self.conv7 = nn.Sequential(
            depth_point(1024, 1024, stride=2)
        )

        self.convd1 = nn.Conv2d(35, 32, 1, stride=1, padding=0, dilation=1)      
        self.pool1 = nn.AvgPool2d(2,2)
         
        self.convd2 = nn.Conv2d(99, 64, 1, stride=1, padding=0, dilation=1)      
        self.pool2 = nn.AvgPool2d(4,4)

        self.convd3 = nn.Conv2d(163, 128, 1, stride=1, padding=0, dilation=1)      
        self.pool3 = nn.AvgPool2d(8,8)

        self.convd4 = nn.Conv2d(419, 256, 1, stride=1, padding=0, dilation=1)
        self.convd5 = nn.Conv2d(931, 512, 1, stride=1, padding=0, dilation=1)

        self.pool4= nn.AvgPool2d(16,16)
        self.convd6 = nn.Conv2d(1443, 512, 1, stride=1, padding=0, dilation=1)

        self.convd7 = nn.Conv2d(1955, 1024, 1, stride=1, padding=0, dilation=1)
        self.pool5= nn.AvgPool2d(32,32)
        self.convd8 = nn.Conv2d(2979, 1024, 1, stride=1, padding=0, dilation=1)
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1024, 10)

    def forward(self, x):
        x1 = self.convd1(torch.cat([self.init(x) , self.pool1(x)], dim=1))
        x2 = self.convd2(torch.cat([self.conv1(x1), x1, self.pool1(x)], dim =1))
        x3 = self.convd3(torch.cat([self.conv2(x2), self.pool1(x1), self.pool2(x)], dim=1))
        x4 = self.convd4(torch.cat([self.conv3(x3), self.pool1(x3), self.pool2(x1), self.pool3(x)], dim=1))
        x5 = self.convd5(torch.cat([self.conv4(x4), self.pool1(x4), self.pool2(x3), self.pool3(x1), self.pool4(x)], dim=1))
        x6 = self.convd6(torch.cat([self.conv5(x5), x5 , self.pool1(x4), self.pool2(x3), self.pool3(x1), self.pool4(x)], dim=1))
        x7 = self.convd7(torch.cat([self.conv6(x6) , self.pool1(x5), self.pool2(x4), self.pool3(x3), self.pool4(x1), self.pool5(x)], dim=1))
        x8 = self.convd8(torch.cat([self.conv7(x7), x7 , self.pool1(x5), self.pool2(x4), self.pool3(x3), self.pool4(x1), self.pool5(x) ], dim=1))
        x9 = self.pool(x8)
        x10 = x9.view(x9.size(0), -1)
        # x11 = self.fc(x10)
        return x11
    

## resnext

In [None]:
#MobileNet에 적용할 ResNext Block의 한 unit 
class BottleNeck(nn.Module):
    def __init__(self, in_planes,inner_planes, group = 32):
        super(BottleNeck, self).__init__()       
        ## mobilenet에서 resnext를 적용한 layer 구간: 
        ## (depthwise + pointwise conv w/ ch = 512)이 5개 연속인 구간
        ## group을 32개로 나눠 각 unit은 첫 layer를 제외하고 16개의 ch를 갖는다.
        ## 첫 layer에는 input ch이 512로 들어와 모든 unit의 input ch이 512dim이다. 
        self.dw = nn.Sequential(
          depth_point(in_planes, inner_planes, stride=1),
          depth_point(inner_planes, inner_planes, stride=1),
          depth_point(inner_planes, inner_planes, stride=1),
          depth_point(inner_planes, inner_planes, stride=1),
          depth_point(inner_planes, inner_planes, stride=1),
        )

    def forward(self, x):
        out = self.dw(x)
        return out

#MobileNet에 적용할 ResNext Block (32 groups)
class MobileNetBottleNeck(nn.Module):
  def __init__(self, in_planes=512, inner_planes=16, out_plane = 256, group = 32):
    super(MobileNetBottleNeck,self).__init__()
    self.group = group
    self.bottleneck_list = [BottleNeck(in_planes,inner_planes, group).to("cuda") for _ in range(self.group)]
  
  def forward(self, x):
    out = [self.bottleneck_list[i](x) for i in range(self.group)]   ## 32개의 서로 다른 unit 생성
    out = torch.cat(out, dim=1)                                     ## concatenate block units
    out += x                                                        ## skipped connection
    return out

# MobileNetV1
class MobileNetv3(nn.Module):
    def __init__(self):
        super().__init__()

        self.init = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.conv1 = depth_point(32, 64)
        # down sample
        self.conv2 = nn.Sequential(
            depth_point(64, 128 , stride=2),
            depth_point(128, 128)
        )
        # down sample
        self.conv3 = nn.Sequential(
            depth_point(128, 256, stride=2),
            depth_point(256, 256)
        )
        self.conv4 = depth_point(256, 512, stride=2)

        self.conv5 = MobileNetBottleNeck(512, 16, 512, 32)
        # down sample
        self.conv6 = nn.Sequential(
            depth_point(512, 1024, stride=2)
        )
        # down sample
        self.conv7 = nn.Sequential(
            depth_point(1024, 1024, stride=2)
        )

        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1024, 10)

    def forward(self, x):
        x = self.init(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        # x = self.fc(x)
        return x

## load checkpoints

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
model1_chkp_pth = ""
model2_chkp_pth = ""
model3_chkp_pth = ""
model4_chkp_pth = ""

In [None]:
model2 = MobileNetv2().to(device)
model1 = MobileNetv1().to(device)
model3 = MobileNetv3().to(device)
model4 = MobileNetv4().to(device)


In [None]:
def get_ypred(trainloader,model, device):
    ypred_list = []
    model.eval()
    for i, data in enumerate(tqdm(trainloader)):
        inputs, _ = data

        inputs = inputs.to(device)

        outputs = model(inputs).detach().cpu().item()
        ypred_list.append(outputs)

    return ypred_list

        


## ensemble w/ xgboost

In [None]:
import xgboost as xgb
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score

In [None]:

ypred_model1 = get_ypred(train_dl, model1, device) # bs, 1024
ypred_model2 = get_ypred(train_dl, model2, device) # bs, 1024
ypred_model3 = get_ypred(train_dl, model3, device) # bs, 1024
ypred_model4 = get_ypred(train_dl, model4, device) # bs, 1024


In [None]:
xgb_estimator = xgb.XGBClassifier(objective='binary:logistic')