In [7]:
import glob
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader,random_split,SubsetRandomSampler, ConcatDataset
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
from torchsummary import summary
import tqdm



In [8]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/MyDrive/challenge
!pwd

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
/gdrive/MyDrive/challenge
/gdrive/MyDrive/challenge


In [9]:
# !pip install matplotlib torchvision torchsummary pandas
# !pip list | grep torch
# !pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116

In [10]:
args={}
kwargs={}
args['batch_size']=64
args['test_batch_size']=64
args['epochs']=250  #The number of Epochs is the number of times you go through the full dataset. 
args['lr']=0.01 #Learning rate is how fast it will decend. 
args['momentum']=0.5 #SGD momentum (default: 0.5) Momentum is a moving average of our gradients (helps to keep direction).

args['seed']=1 #random seed
args['log_interval']=100 // args['batch_size']
args['cuda']=False


In [11]:
#load npy files
train_dataset = np.load('trainset.npy')
train_label = np.load('trainlabel.npy')
test_dataset = np.load('testset.npy')

print(train_dataset.shape)


(50000, 32, 32, 3)


In [12]:
train_data=[]
for idx, data in enumerate(train_dataset):
    train_data.append([data,train_label[idx]])

In [13]:
# Function: Split Data
def shuffle_train_valid_split(X, test_size=0.2, shuffle=True, random_state=1004):
    test_num = int(len(X) * test_size)
    train_num = len(X) - test_num
    
    if shuffle:
        np.random.seed(random_state)
        np.random.shuffle(X)
        X_train = X[:train_num]
        X_valid = X[train_num:]
    else:
        X_train = X[:train_num]
        X_valid = X[train_num:]
        
    return X_train, X_valid

In [14]:
train_data, valid_data = shuffle_train_valid_split(train_data)

print(len(train_data))

40000


In [15]:
# torch.manual_seed(42)


# dataset = ConcatDataset([train_data,valid_data])

# num_epochs=10
# batch_size=128
# k=10
# splits=KFold(n_splits=k,shuffle=True,random_state=42)
# foldperf={}

# print(len(train_data))
# print(len(valid_data))

In [16]:
#custom dataset 
class CustomDataset(Dataset):
    def __init__(self, input, transform=None):
        super().__init__()
        self.x_data = [data[0] for data in input]
        self.y_data = [data[1] for data in input]
        self.transform = transform 

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx]
        y = self.y_data[idx]
        if self.transform:
            x = self.transform(x)

        return x, y
        

In [17]:
#custom dataset 
class testDataset(Dataset):
    def __init__(self, input, transform=None):
        super().__init__()
        self.x_data =input
        self.transform = transform 

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        x = self.x_data[idx]
        if self.transform:
            x = self.transform(x)

        return x
        

In [18]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(28),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.CenterCrop(28),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [19]:
traindata = CustomDataset(train_data, transform=transform_train)
validdata = CustomDataset(valid_data, transform=transform_test)
testdata = testDataset(test_dataset, transform=transform_test)


#data loader
train_loader = DataLoader(traindata, batch_size=args['batch_size'], shuffle=True)
valid_loader = DataLoader(validdata, batch_size=args['batch_size'], shuffle=True)
test_loader = DataLoader(testdata, batch_size=args['batch_size'], shuffle=False)


In [20]:
import torch
import torch.nn as nn


class DepthSeperabelConv2d(nn.Module):

    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
        super().__init__()
        self.depthwise = nn.Sequential(
            nn.Conv2d(
                input_channels,
                input_channels,
                kernel_size,
                groups=input_channels,
                **kwargs),
            nn.BatchNorm2d(input_channels),
            nn.Mish()
        )

        self.pointwise = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, 1),
            nn.BatchNorm2d(output_channels),
            nn.Mish()
        )

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)

        return x


class BasicConv2d(nn.Module):

    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):

        super().__init__()
        self.conv = nn.Conv2d(
            input_channels, output_channels, kernel_size, **kwargs)
        self.bn = nn.BatchNorm2d(output_channels)
        self.relu = nn.Mish()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)

        return x


class MobileNet(nn.Module):

    """
    Args:
        width multipler: The role of the width multiplier α is to thin
                         a network uniformly at each layer. For a given
                         layer and width multiplier α, the number of
                         input channels M becomes αM and the number of
                         output channels N becomes αN.
    """

    def __init__(self, width_multiplier=1, class_num=100):
       super().__init__()

       alpha = width_multiplier
       self.stem = nn.Sequential(
           BasicConv2d(3, int(32 * alpha), 3, padding=1, bias=False),
           DepthSeperabelConv2d(
               int(32 * alpha),
               int(32 * alpha),
               3,
               padding=1,
               bias=False
           )
       )

       #downsample
       self.conv1 = nn.Sequential(
           DepthSeperabelConv2d(
               int(32 * alpha),
               int(32 * alpha),
               3,
               stride=1,
               padding=0,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(32 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           )
       )

       #downsample
       self.conv2 = nn.Sequential(
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               stride=1,
               padding=0,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           )
       )

       #downsample
       self.conv3 = nn.Sequential(
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               stride=2,
               padding=1,
               bias=False
           ),

           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(64 * alpha),
               3,
               padding=1,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(64 * alpha),
               int(128 * alpha),
               3,
               padding=1,
               bias=False
           )
       )

       #downsample
       self.conv4 = nn.Sequential(
           DepthSeperabelConv2d(
               int(128 * alpha),
               int(128 * alpha),
               3,
               stride=2,
               padding=1,
               bias=False
           ),
           DepthSeperabelConv2d(
               int(128 * alpha),
               int(128 * alpha),
               3,
               padding=1,
               bias=False
           )
       )

       self.fc = nn.Linear(int(128 * alpha), class_num)
       self.avg = nn.AdaptiveAvgPool2d(1)
       #weight initialization with he_normal
       for m in self.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()
        

    def forward(self, x):
        x = self.stem(x)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


def mobilenet(alpha=1, class_num=100):
    return MobileNet(alpha, class_num)

In [21]:

class Net_1(nn.Module):#마음만은 열넷 v4.0.5
    def __init__(self):
        super(Net_1, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3,padding=1)#32*32*3 -> 32*32*32
        self.pool = nn.MaxPool2d(2, 2)#32*32*32 -> 16*16*32
        self.batchnorm1 = nn.BatchNorm2d(32)#
        self.conv2 = nn.Conv2d(32, 32, 3,padding=1)#16*16*32 -> 16*16*64
        self.pool2 = nn.MaxPool2d(2, 2)#16*16*64 -> 8*8*64
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 32, 3,padding=1)#16*16*32 -> 16*16*64
        self.batchnorm3 = nn.BatchNorm2d(32)
        self.conv3_1 = nn.Conv2d(32, 32, 3,padding=1)#16*16*32 -> 16*16*64
        self.batchnorm3_1 = nn.BatchNorm2d(32)
        self.conv3_2 = nn.Conv2d(32, 32, 3,padding=1)#16*16*32 -> 16*16*64
        self.batchnorm3_2 = nn.BatchNorm2d(32)
        self.conv3_3 = nn.Conv2d(32, 32, 3,padding=1)#16*16*32 -> 16*16*64
        self.batchnorm3_3 = nn.BatchNorm2d(32)
        self.pool3 = nn.MaxPool2d(2, 2)#16*16*64 -> 8*8*64
        self.conv4 = nn.Conv2d(32, 128, 3,padding=1)#8*8*64 -> 6*6*100
        self.pool4 = nn.MaxPool2d(2, 2)#6*6*100 -> 3*3*100
        self.batchnorm4 = nn.BatchNorm2d(128)
        self.gap = nn.AdaptiveAvgPool2d((1,1))#3*3*100 -> 1*1*100
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(128, 100)
        self.gelu = nn.GELU()
        
        #weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


    def forward(self, x):
        x = self.conv1(x)#32*32*3 -> 32*32*32 -> 16*16*32
        x = self.batchnorm1(x)
        x = self.gelu(x)
        x = self.conv2(x)#16*16*32 -> 16*16*64 -> 8*8*64
        #x = self.pool2(x)
        x = self.batchnorm2(x)
        x = self.gelu(x)
        x = self.conv3(x)
        x = self.gelu(x)
        x = self.batchnorm3(x)
        x = self.conv3_1(x)
        x = self.gelu(x)
        x = self.batchnorm3_1(x)
        x = self.conv3_2(x)
        #x = self.pool3(x)
        x = self.gelu(x)
        x = self.batchnorm3_2(x)
        x = self.conv3_3(x)
        x = self.gelu(x)
        x = self.batchnorm3_3(x)
        
        x = self.conv4(x)
        x = self.gelu(x)
        #x = self.pool4(x)
        x = self.batchnorm4(x)
        x = self.gap(x)#3*3*100 -> 1*1*100
        x = x.view(x.size(0), -1)#1*1*100 -> 100
        x = self.dropout(x)
        x = self.fc1(x)#100 -> 100

        return x

In [22]:
for batch, (X,y)in enumerate(train_loader):
    print(batch)
    print(X.shape)
    print(y.shape)
    break

0
torch.Size([64, 3, 28, 28])
torch.Size([64])


In [23]:
device =  "cuda:0"

In [24]:

def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    size = len(dataloader.dataset)
    for batch, (X, y) in tqdm.tqdm(enumerate(dataloader)):
        # Compute prediction and loss
        pred = model(X.to(device))
        loss = loss_fn(pred, y.to(device))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        
        #print("Factor = ",i," , Learning Rate = ",optimizer.param_groups[0]["lr"])
#         if batch % 128 == 0:
#             loss, current = loss.item(), batch * len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
     
    lrs.append(optimizer.param_groups[0]["lr"]) 
    print(optimizer.param_groups[0]["lr"])
    scheduler.step()


def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct



In [25]:
import math
from torch.optim.lr_scheduler import _LRScheduler

class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [26]:

print("Using {} device".format(device))

model = mobilenet().to(device)


loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0)
scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=150, T_mult=1, eta_max=0.1,  T_up=10, gamma=0.5)
lrs = []


Using cuda:0 device


In [27]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

summary(model, (3, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             864
       BatchNorm2d-2           [-1, 32, 28, 28]              64
              Mish-3           [-1, 32, 28, 28]               0
       BasicConv2d-4           [-1, 32, 28, 28]               0
            Conv2d-5           [-1, 32, 28, 28]             288
       BatchNorm2d-6           [-1, 32, 28, 28]              64
              Mish-7           [-1, 32, 28, 28]               0
            Conv2d-8           [-1, 32, 28, 28]           1,056
       BatchNorm2d-9           [-1, 32, 28, 28]              64
             Mish-10           [-1, 32, 28, 28]               0
DepthSeperabelConv2d-11           [-1, 32, 28, 28]               0
           Conv2d-12           [-1, 32, 26, 26]             288
      BatchNorm2d-13           [-1, 32, 26, 26]              64
             Mish-14           [-1, 

In [None]:

base_correct = 0
for t in range(args['epochs']):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    print("test loop")
    correct = test_loop(valid_loader, model, loss_fn)
    if correct > base_correct:
        print('save model')
        torch.save(model.state_dict(), 'model2.pt')
        base_correct = correct
print("Done!")

Epoch 1
-------------------------------


467it [00:32, 14.90it/s]

In [None]:
plt.plot(lrs)

In [None]:
import pandas as pd
model.load_state_dict(torch.load('model2.pt'))
id_list = []
pred_list = []
_id = 0
model.eval()
with torch.no_grad():
    for x in test_loader:
        preds = model(x.cuda())
        preds = preds.argmax(1)
        for pred in preds:
            id_list.append(_id)
            pred_list.append(pred.item())
            _id += 1

res = pd.DataFrame({
    'id_idx': id_list,
    'label': pred_list
}) 

res.sort_values(by='id_idx', inplace=True)

res.to_csv('./result.csv', index=False) 
print("result saved")