In [1]:
import os, time, sys
import numpy as np
import torch
from torchvision import transforms, datasets
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from torchsummary import summary
import imgaug
from AdaBound import adabound
device = "cuda"

# Se_Net3

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class Sq_Ex_Block(nn.Module):
    def __init__(self, in_ch, r=16):
        super(Sq_Ex_Block, self).__init__()
        self.se = nn.Sequential(
            GlobalAvgPool(),
            nn.Linear(in_ch, in_ch//r),
            nn.ReLU(inplace=True),
            nn.Linear(in_ch//r, in_ch),
            nn.Sigmoid()
        )

    def forward(self, x):
        se_weight = self.se(x).unsqueeze(-1).unsqueeze(-1)
#         print(f'x:{x.sum()}, x_se:{x.mul(se_weight).sum()}')
        return x.mul(se_weight)

class GlobalAvgPool(nn.Module):
    def __init__(self):
        super(GlobalAvgPool, self).__init__()
    def forward(self, x):
        return x.view(*(x.shape[:-2]),-1).mean(-1)

class SE_Net3(nn.Module):
    def __init__(self,in_channels):
        super(SE_Net3,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,3,1,1)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c4 = nn.Conv2d(64,64,5,1,2)
        self.bn4 = nn.BatchNorm2d(64,1e-3,0.01)        
        
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.5)
        
        self.c5 = nn.Conv2d(64,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,3,1,0)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c7 = nn.Conv2d(128,128,3,1,1)
        self.bn7 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c8 = nn.Conv2d(128,128,5,1,2)
        self.bn8 = nn.BatchNorm2d(128,1e-3,0.01)
        
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.5)
        
        self.c9 = nn.Conv2d(128,256,3,1,0)
        self.bn9 = nn.BatchNorm2d(256,1e-3,0.01)
        self.c10 = nn.Conv2d(256,256,3,1,1)
        self.bn10 = nn.BatchNorm2d(256,1e-3,0.01)
        
        self.se1 = Sq_Ex_Block(in_ch=256,r=16)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.5)

        self.fc1 = nn.Linear(256*1*1,256)
        self.bn11 = nn.BatchNorm1d(256,1e-3,0.01)
        self.out = nn.Linear(256,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.leaky_relu(self.c1(x),0.05))
        x = self.bn2(F.leaky_relu(self.c2(x),0.05))
        x = self.bn3(F.leaky_relu(self.c3(x),0.05))
        x = self.bn4(F.leaky_relu(self.c4(x),0.05))
        x = self.d1(self.m1(x))
        
        x = self.bn5(F.leaky_relu(self.c5(x),0.05))
        x = self.bn6(F.leaky_relu(self.c6(x),0.05))
        x = self.bn7(F.leaky_relu(self.c7(x),0.05))
        x = self.bn8(F.leaky_relu(self.c8(x),0.05))
        x = self.d2(self.m2(x))
        
        x = self.bn9(F.leaky_relu(self.c9(x),0.05))
        x = self.bn10(F.leaky_relu(self.c10(x),0.05))
        x = self.se1(x)
        x = self.d3(self.m3(x))
        
        x = x.view(-1, 256*1*1) #reshape
        x = self.bn11(F.leaky_relu(self.fc1(x),0.05))
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')


# Resnet-pretrained

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class Kmnist_resnet(nn.Module):
    def __init__(self,pretrained=False):
        super().__init__()
        self.docker = nn.Conv2d(1,3,kernel_size=1)
        self.resnet = models.resnet18(pretrained=pretrained)
         
    def forward(self,x):
        x = self.docker(x)
        x = self.resnet(x)
        return x

# Squeeze and Excitation Net

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Seq_Ex_Block(nn.Module):
    def __init__(self, in_ch, r=16):
        super(Seq_Ex_Block, self).__init__()
        self.se = nn.Sequential(
            GlobalAvgPool(),
            nn.Linear(in_ch, in_ch//r),
            nn.ReLU(inplace=True),
            nn.Linear(in_ch//r, in_ch),
            nn.Sigmoid()
        )

    def forward(self, x):
        se_weight = self.se(x).unsqueeze(-1).unsqueeze(-1)
#         print(f'x:{x.sum()}, x_se:{x.mul(se_weight).sum()}')
        return x.mul(se_weight)

class GlobalAvgPool(nn.Module):
    def __init__(self):
        super(GlobalAvgPool, self).__init__()
    def forward(self, x):
        return x.view(*(x.shape[:-2]),-1).mean(-1)

class SE_Net2(nn.Module):
    def __init__(self,in_channels):
        super(SE_Net2,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,5,1,2)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.se1 = Seq_Ex_Block(in_ch=64,r=8)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.4)
        
        self.c4 = nn.Conv2d(64,128,3,1,0)
        self.bn4 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c5 = nn.Conv2d(128,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,5,1,2)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)
        self.se2 = Seq_Ex_Block(in_ch=128,r=8)
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.4)
        
        self.c7 = nn.Conv2d(128,256,3,1,0)
        self.bn7 = nn.BatchNorm2d(256,1e-3,0.01)
        self.se3 = Seq_Ex_Block(in_ch=256,r=8)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.4)

        self.fc1 = nn.Linear(256*1*1,256)
        self.bn8 = nn.BatchNorm1d(256,1e-3,0.01)
        
        self.out = nn.Linear(256,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.leaky_relu(self.c1(x),0.1))
        x = self.bn2(F.leaky_relu(self.c2(x),0.1))
        x = self.bn3(F.leaky_relu(self.c3(x),0.1))
        x = self.se1(x)
        x = self.d1(self.m1(x))
        
        x = self.bn4(F.leaky_relu(self.c4(x),0.1))
        x = self.bn5(F.leaky_relu(self.c5(x),0.1))
        x = self.bn6(F.leaky_relu(self.c6(x),0.1))
        x = self.se2(x)
        x = self.d2(self.m2(x))
        
        x = self.bn7(F.leaky_relu(self.c7(x),0.1))
        x = self.se3(x)
        x = self.d3(self.m3(x))
        
        x = x.view(-1, 256*1*1) #reshape
        x = self.bn8(F.relu(self.fc1(x),0.1))
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')


# SE_Resnet

In [None]:
import torch.nn as nn
import torch.nn.functional as F

def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class SEBasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None,
                 *, reduction=16):
        super(SEBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes, 1)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SELayer(planes, reduction)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.se(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out

def se_resnet18(num_classes=10):
    model = models.ResNet(SEBasicBlock, [2, 2, 2, 2], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model

def se_resnet34(num_classes=10):
    model = models.ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model


# Conv model definitions

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class convNet(nn.Module):
    def __init__(self,in_channels):
        super(convNet,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,5,1,2)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.2)
        
        self.c4 = nn.Conv2d(64,128,3,1,0)
        self.bn4 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c5 = nn.Conv2d(128,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,5,1,2)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)        
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.2)
        
        self.c7 = nn.Conv2d(128,256,3,1,0)
        self.bn7 = nn.BatchNorm2d(256,1e-3,0.01)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.2)

        self.fc1 = nn.Linear(256*1*1,256)
        self.bn8 = nn.BatchNorm1d(256,1e-3,0.01)
        
        self.fc2 = nn.Linear(256,128)
        self.bn9 = nn.BatchNorm1d(128,1e-3,0.01)
        
        self.out = nn.Linear(128,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.relu(self.c1(x)))
        x = self.bn2(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.m1(x)
        x = self.d1(x)
        
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        x = self.bn6(F.relu(self.c6(x)))
        x = self.m2(x)
        x = self.d2(x)
        
        x = self.bn7(F.relu(self.c7(x)))
        x = self.m3(x)
        x = self.d3(x)        
        
        x = x.view(-1, 256*1*1) #reshape
        
        x = self.bn8(self.fc1(x))
        x = self.bn9(self.fc2(x))
        
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in')
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')

# Conv model by Chris Deotte (replace max pooling with average pooling)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class convNet_avp(nn.Module):
    def __init__(self,in_channels):
        super(convNet_avp,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,5,2,2)  #Use strides 2 instead of maxpooling
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.d1 = nn.Dropout(0.2)
        
        self.c4 = nn.Conv2d(64,128,3,1,0)
        self.bn4 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c5 = nn.Conv2d(128,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,5,2,2)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)        
        self.d2 = nn.Dropout(0.2)
        
        self.c7 = nn.Conv2d(128,256,4,1,0)
        self.bn7 = nn.BatchNorm2d(256,1e-3,0.01)
        self.d3 = nn.Dropout(0.2)
        
        self.fc1 = nn.Linear(256*1*1,256)
        self.bn8 = nn.BatchNorm1d(256,1e-3,0.01)
        
        self.out = nn.Linear(256,10)
#         self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.relu(self.c1(x)))
        x = self.bn2(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.d1(x)
        
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        x = self.bn6(F.relu(self.c6(x)))
        x = self.d2(x)
        
        x = self.bn7(F.relu(self.c7(x)))
        x = self.d3(x)

        x = x.view(-1, 256*1*1) #reshape
        x = self.bn8(self.fc1(x))
        return self.out(x)
    
#     def init_linear_weights(self):
#         nn.init.kaiming_normal_(self.out.weight, mode='fan_in')

# Convnet native old

In [None]:
class convNet_native(nn.Module):
    def __init__(self,in_channels):
        super(convNet_native,self).__init__()
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=5,stride=1,padding=2)
        self.bn1 = nn.BatchNorm2d(num_features=64,momentum=0.1)
        self.c2 = nn.Conv2d(64,64,5,1,2)
        self.bn2 = nn.BatchNorm2d(num_features=64,momentum=0.1)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.2)
        
        self.c3 = nn.Conv2d(64,128,5,1,2)
        self.bn3 = nn.BatchNorm2d(128,0.1)
        self.c4 = nn.Conv2d(128,128,5,1,2)
        self.bn4 = nn.BatchNorm2d(128,0.1)
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.2)
        
        self.c5 = nn.Conv2d(128,256,3,1,1)
        self.bn5 = nn.BatchNorm2d(256,0.1)
        self.c6 = nn.Conv2d(256,256,3,1,1)
        self.bn6 = nn.BatchNorm2d(256,0.1)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.2)

        self.fc = nn.Linear(256*3*3,256)  #layer for binary entropy
        self.d4 = nn.Dropout(0.2)
        self.out = nn.Linear(256,2)
        
    def forward(self,x):
        x = F.leaky_relu(self.bn1(self.c1(x)),negative_slope=0.1)
        x = F.leaky_relu(self.bn2(self.c2(x)),0.1)
        x = self.m1(x)
        x = self.d1(x)
        
        x = F.leaky_relu(self.bn3(self.c3(x)),0.1)
        x = F.leaky_relu(self.bn4(self.c4(x)),0.1)
        x = self.m2(x)
        x = self.d2(x)
        
        x = F.leaky_relu(self.bn5(self.c5(x)),0.1)
        x = F.leaky_relu(self.bn6(self.c6(x)),0.1)
        x = self.m3(x)
        x = self.d3(x)
        
        x = x.view(-1, 256*3*3) #reshape
        x_b = F.leaky_relu(self.fc(x),0.1)
        x_b = self.d4(x_b)
        return self.out(x_b)

In [None]:
# model = convNet(in_channels=1)
# model = SE_Net2(in_channels=1)
# model = convNet_avp(in_channels=1)
# model = Kmnist_resnet(pretrained=True)
# model = se_resnet34(num_classes=10)
# model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)\
model = SE_Net3(in_channels=1)
model.cuda()
summary(model, input_size=(1, 28, 28))

In [None]:
from imgaug import augmenters as iaa
# from imgaug.augmentables.segmaps import SegmentationMapOnImage

class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
#         iaa.Scale((640, 480)),
#         iaa.Fliplr(0.5),
            
#         iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.6))),
#         iaa.Sometimes(0.1, iaa.AverageBlur(1.2)),
        iaa.Sometimes(1, iaa.Affine(rotate=(-20, 20),order=[0, 1],translate_px={"x":(-2, 2),"y":(-2,2)},mode='symmetric')),
        iaa.Sometimes(0.2,iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.25))),
#         iaa.Sometimes(0.1, iaa.SaltAndPepper(0.05,False)),
        iaa.Invert(0.5),
#         iaa.Add((-5, 5)), # change brightness of images (by -10 to 10 of original value)
#         iaa.AdditiveGaussianNoise(-1,1)
#         iaa.Sometimes(0.2,iaa.GammaContrast(2))
            
#         iaa.AddToHueAndSaturation(from_colorspace="GRAY",value=(-20, 20))  #Hue-> color, saturation -> saido
    ])
    def __call__(self, img, mask=None):
        img = np.array(img)        
        return self.aug.augment_image(image=img)
#         return self.aug(image=img, segmentation_maps=label)

# Trans and Dataset definition

In [1]:
trans = transforms.Compose([
#         transforms.ColorJitter(0.2,0.2,0.2,0.5),
#         transforms.RandomAffine(degrees=20,translate=(0.25,0.25),scale=[0.65,1.1],shear=15), #after 60k
        transforms.RandomAffine(degrees=15,translate=(0.25,0.25),scale=[0.7,1.1],shear=8), #60k baseline
    
#         transforms.RandomAffine(degrees=35,translate=(0.2,0.2),scale=[0.6,1.1],shear=15), #Try 1
#         transforms.RandomAffine(degrees=15,translate=(0.1,0.1),scale=(0.9,1.1),shear=5),  #for Se_res18
#         transforms.Resize((224,224)), #For resnet
#         transforms.RandomAffine(degrees=2,translate=(0.05,0.05)), #For 01 classifier
#         transforms.RandomAffine(degrees=10,translate=(0.1,0.1),scale=(0.9,1.1)),  #Used in Chris Deotte avgpool
#         transforms.RandomAffine(degrees=10,translate=(0.2,0.2),scale=[0.9,1.1]), #For native distinguisher
#         ImgAugTransform(),
#         lambda x: Image.fromarray(x),
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1  
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
#         transforms.Normalize(mean=[0.11156191],std=[0.2794967]) #dig distribution
    ])

trans_val = transforms.Compose([
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
    ])

trans_test = transforms.Compose([
        transforms.ToTensor(),
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
])

# global_data = pd.read_csv("./dataset/train.csv")
# global_dig_aug_data = pd.read_csv("./dataset/Dig-Mnist-Augmented.csv")
global_dig_data = pd.read_csv("./dataset/Dig-MNIST.csv")
# global_data_large = pd.read_csv("./dataset/train_large.csv")
# global_data_train_test_psuedo = pd.read_csv("./dataset/train_test_psuedo_aug.csv")
# global_data_train_test_psuedo = pd.read_csv("./dataset/train_test_psuedo_65k.csv")
# global_critic01_data = pd.read_csv("./dataset/critic01_20k.csv")
# global_critic01_data = pd.read_csv("./dataset/critic01_20k_hard.csv")
# global_data = pd.read_csv("./dataset/train_digtop1_69548.csv")
# global_data = pd.read_csv("./dataset/train_psuedo_digtop1_74367.csv")
# global_data = pd.read_csv("./dataset/train_pseu_dig_75k_acc9906.csv")
# global_data = pd.read_csv("./dataset/train_pseu_dig_75k_acc9910.csv")

###Final submit
# global_data = pd.read_csv("./dataset_final/train.csv")
# global_data = pd.read_csv("./dataset_final/train_pseu_dig_74340_s1.csv")

class KMnistDataset(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None):
        self.is_validate = is_validate
        self.data = global_data
#         print("data shape:", np.shape(self.data))
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
#         print(idx)
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        label = self.data.iloc[idx, 0]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, label

    def __len__(self):
        return self.len

class KMnistDataset_binary_aid(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None):
        self.is_validate = is_validate
        self.data = global_data_large
        
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
#         print(idx)
        img = self.data.iloc[idx, 2:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        native_label = self.data.iloc[idx, 0]  #(num,)
        label = self.data.iloc[idx, 1]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
#         native_label = torch.as_tensor(native_label, dtype=torch.uint8).unsqueeze(0)    #value: 0~9, shape(1,1) for BCE loss
        native_label = torch.as_tensor(native_label, dtype=torch.uint8)    #value: 0~9, shape(1) for CSE loss
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, native_label, label
    def __len__(self):
        return self.len

    
class TestDataset(Dataset):
    def __init__(self,data_len=None):
        self.data = pd.read_csv("./dataset/test.csv")
        print("data shape:", np.shape(self.data))
        self.transform = trans_test
        if data_len == None:
            self.len = len(self.data)
        
    def __getitem__(self, idx):
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        return img, torch.Tensor([])

    def __len__(self):
        return self.len
    

NameError: name 'transforms' is not defined

# Get kfold dataset loader

In [5]:
def get_kfold_dataset_loader(k=5,val_rate=0.1,indices_len=None, batch_size=None,num_workers=None, binary_aid=False):
    ###Return [list of train dataset_loader, list of val dataset_loader]
    train_loader_list = []
    val_loader_list = []
    indices = np.arange(indices_len)
    val_len = indices_len//k
    idx = 0
    
    for i in range(k):
#         np.random.shuffle(indices)  #Random cross validation
        ind = np.concatenate([indices[:idx],indices[idx+val_len:],indices[idx:idx+val_len]])
        idx += val_len
#         print(ind)
        
        if binary_aid == True:
            train_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=False, validate_rate=val_rate,indices=ind)
            val_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=True, validate_rate=val_rate, indices=ind)
        else:
            train_dataset = KMnistDataset(data_len=None,is_validate=False, validate_rate=val_rate,indices=ind)
            val_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=val_rate, indices=ind)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        
        train_loader_list.append(train_loader)
        val_loader_list.append(val_loader)
        
    return train_loader_list, val_loader_list


# Get models

In [6]:
def get_model(native_net=False):
    #Basic cnn
    if native_net == True:
        model = convNet_native(in_channels=1)
    else:
#         model = convNet(in_channels=1)
        model = SE_Net3(in_channels=1)
#         model.out = nn.Linear(256,2)   #For critic cases classification
        #pretrained model
#         model = Kmnist_resnet(pretrained=True)  #remember to resize image to (224,224)
#         model = se_resnet18(num_classes=10)
#         model = se_resnet34(num_classes=10)
#         model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)
    #     summary(model, input_size=(1, 28, 28))

    if device == "cuda":
        model.cuda()
    
    return model


# Get dataset distribution

In [None]:
# train distribution: mean=[0.08229437],std=[0.23876116]
# dig augmented distribution: mean=[0.09549136],std=[0.24336776]
# train large distribution: mean=[0.08889286],std=[0.24106438]

def get_dataset_mean_std(dataloader):
    print("Calculate distribution:")
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in dataloader:
        img = data[0].to(device)
        batch_samples = img.size(0)
        img = img.contiguous().view(batch_samples, img.size(1), -1)
        mean += img.mean(2).sum(0)
        std += img.std(2).sum(0)
        nb_samples += batch_samples
        if nb_samples%5120 == 0:
            print("Finished:", nb_samples)
            
    print("num of samples:",nb_samples)
    mean /= nb_samples
    std /= nb_samples
#     print("Average mean:",mean)
#     print("Average std:", std)
    return mean.cpu().numpy(), std.cpu().numpy()

# Get train and val loaders

In [7]:
batch_size = 1024
num_workers = 8
k = 5
# indices_len = 75124    #from acc9910
# indices_len = 75035  #from acc9906
# indices_len = 74367

indices_len = 74340
# indices_len = 60000
# indices_len = 10240

###Final submit
# indices_len = 74268

vr = (indices_len//k)/indices_len
print("validation rate:",vr)

# indices_len = 10240  ################Temp Revised Caution##############
# indices_len = 120000

###Single dataset
# indices = np.arange(indices_len)
# train_dataset = KMnistDataset(data_len=None,is_validate=False,validate_rate=vr,indices=indices)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(train_loader)
# print("train distribution: mean={},std={}".format(mean, std))

# indices = np.arange(10240)
# dig_val_dataset = DigValDataset(data_len=None,indices=indices)
# dig_val_loader = DataLoader(dig_val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(dig_val_loader)
# print("validate distribution:",mean, std)

# test_dataset = TestDataset(data_len=None)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(test_loader)
# print("test distribution:",mean, std)

###K-fold dataset
train_loaders, val_loaders = get_kfold_dataset_loader(k, vr, indices_len, batch_size, num_workers, binary_aid=False)

validation rate: 0.2


# Train native classifier

In [None]:
if __name__ == "__main__":
    epochs = 120
    period = 40
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    
    criterion_b = torch.nn.CrossEntropyLoss()

    print("Fold:",len(train_loaders))
    
    for fold in range(len(train_loaders)):
        train_loader = train_loaders[fold]
        val_loader = val_loaders[fold]
        
        model = get_model(native_net=True)
        torch.cuda.empty_cache()    #Need further check
            
        max_acc_b = 0
        min_loss_b = 10000
        best_model_dict = None
        data_num = 0
        loss_avg_b = 0

#         optimizer = torch.optim.SGD(model.parameters(),lr=lr)
#         optimizer = torch.optim.RMSprop(model.parameters(),lr=lr,alpha=0.9)
        optimizer = torch.optim.Adam(model.parameters(),lr=lr,betas=(0.9,0.99))
#         optimizer = torch.optim.Adagrad(model.parameters(),lr=lr)
#         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=period,T_mult=1,eta_min=1e-5) #original 
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=15)
        
        tmp_count = 0
        for ep in range(0,epochs+1):
            model.train()
            for idx, data in enumerate(train_loader):
                img, target_b, target = data
                img, target_b, target = img.to(device), target_b.to(device,dtype=torch.long), target.to(device,dtype=torch.long)
                pred_b = model(img)
                  
                loss_b = criterion_b(pred_b,target_b) 
                loss_avg_b += loss_b.item()
                
                data_num += img.size(0)
                optimizer.zero_grad()
                loss_b.backward()
                optimizer.step()

            ###Cosine annealing
#             lr_scheduler.step()

            ###Evaluate Train Loss 
#             if ep%2 == 0:
#                 loss_avg /= data_num
#                 print("Ep:{}, loss:{}, lr:{}".format(ep, loss_avg,optimizer.param_groups[0]['lr']))
#                 loss_avg = 0
#                 data_num = 0

            ###Validation
            if ep!=0 and ep%val_period == 0:
                model.eval()
                acc_b = 0
                val_loss_b = 0
                data_num  = 0
                with torch.no_grad():
                    for idx, data in enumerate(val_loader):
                        img, target_b, target = data
                        img, target_b, target = img.to(device), target_b.to(device,dtype=torch.long), target.to(device,dtype=torch.long)
                        pred_b = model(img)

                        val_loss_b += criterion_b(pred_b,target_b).item()
                        
                        # print(pred) 
                        ########
                        _,pred_native_class = torch.max(pred_b.data, 1)
                        
                        acc_b += (pred_native_class == target_b).sum().item()
                        data_num += img.size(0)

                acc_b /= data_num
                val_loss_b /= data_num

                ###Plateau
                lr_scheduler.step(val_loss_b)
                if optimizer.param_groups[0]['lr'] < 1e-4:
                    break                    

                if acc_b >= max_acc_b:
                    max_acc_b = acc_b
                    best_model_dict = model.state_dict()
                    
                if val_loss_b <= min_loss:
                    min_loss_b = val_loss_b
#                     best_model_dict = model.state_dict()
                
                print("Episode:{}, Validation Loss:{},Acc_b:{:.3f}%,lr:{}"
                      .format(ep,val_loss_b,acc_b*100,optimizer.param_groups[0]['lr']))
            
            if ep!=0 and ep%10 == 0:
                torch.save(best_model_dict, "./Kmnist_saved_model/tmp_Fold{}_acc_b{:.3f}".format(fold,max_acc_b*1e2))
            
        ###K-Fold ensemble: Saved k best model for k dataloader
        print("===================Best Fold:{} Saved, Acc:{}==================".format(fold,max_acc_b))
        torch.save(best_model_dict, "./Kmnist_saved_model/Fold{}_loss{:.4f}_acc_b{:.3f}".format(fold,min_loss_b*1e3,max_acc_b*1e2))
        print("======================================================")

# Train digit classifier

In [None]:
if __name__ == "__main__":
    epochs = 300
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    
    criterion = torch.nn.CrossEntropyLoss()
#     criterion_b = torch.nn.BCEWithLogitsLoss()
    
    while True:
        print("Fold:",len(train_loaders))

        for fold in range(0,len(train_loaders)):
            train_loader = train_loaders[fold]
            val_loader = val_loaders[fold]

            model = get_model()

            max_acc = 0
            min_loss = 10000
            best_model_dict = None
            data_num = 0
            loss_avg = 0

#             optimizer = torch.optim.Adamax(model.parameters(),lr=0.002,weight_decay=0)
    #         optimizer = torch.optim.SGD(model.parameters(),lr=lr)
    #         optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)
            optimizer = torch.optim.Adam(model.parameters(),lr=lr,betas=(0.9,0.99))
    #         optimizer = torch.optim.Adagrad(model.parameters(),lr=lr)
    #         optimizer = adabound.AdaBound(model.parameters(), lr=lr, final_lr=0.01,amsbound=True)
    #         optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
    #         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=period,T_mult=1,eta_min=1e-5) #original 
            lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=20,factor=0.1)

            for ep in range(0,epochs+1):
                model.train()
                for idx, data in enumerate(train_loader):
                    img, target = data
                    img, target = img.to(device), target.to(device,dtype=torch.long)

    #                 print(np.shape(img),np.shape(target_b),np.shape(target)) #Tensor(4,1,28,28), Tensor(4)
    #                 print(np.max(img.cpu().numpy()),np.min(img.cpu().numpy())) #1.0 0.0
                    pred = model(img)
    #                 print(pred.size())   #(32,10)
    #                 print(target.size()) #(32,)

                    ###Input shape: input:(batch_num,1), target:(batch_num,a int 0 or 1) for CSE LOSS, target:(batch_num,1) for BCE loss
    #                 loss_b = criterion_b(pred_b,target_b) 

                    ###Input shape: input:(batch_num,10), target:(batch_num,a int between 0~10)
                    loss = criterion(pred,target)
                    loss_avg += loss.item()
                    data_num += img.size(0)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                ###Cosine annealing
    #             lr_scheduler.step()

                ###Evaluate Train Loss 
    #             if ep%2 == 0:
    #                 loss_avg /= data_num
    #                 print("Ep:{}, loss:{}, lr:{}".format(ep, loss_avg,optimizer.param_groups[0]['lr']))
    #                 loss_avg = 0
    #                 data_num = 0

                ###Validation
                if ep!=0 and ep%val_period == 0:
                    model.eval()
                    acc = 0
                    val_loss = 0
                    data_num  = 0
                    with torch.no_grad():
                        for idx, data in enumerate(val_loader):
                            img, target = data
                            img, target = img.to(device), target.to(device,dtype=torch.long)
                            pred = model(img)

                            val_loss += criterion(pred, target).item()

                            # print(pred) 
                            _,pred_class = torch.max(pred.data, 1)
        #                     print(pred_class)
                            acc += (pred_class == target).sum().item()
                            data_num += img.size(0)

                    acc /= data_num
                    val_loss /= data_num

                    ###Plateau
                    lr_scheduler.step(val_loss)
                    if optimizer.param_groups[0]['lr'] < 1e-5:
                        break                    

                    if acc >= max_acc:
                        max_acc = acc
                        min_loss = val_loss
                        best_model_dict = model.state_dict()                    

    #                 if val_loss <= min_loss:
    #                     max_acc = acc
    #                     min_loss = val_loss
    #                     best_model_dict = model.state_dict()

                    print("Episode:{}, Validation Loss:{},Acc:{:.4f}%,lr:{}"
                          .format(ep,val_loss,acc*100,optimizer.param_groups[0]['lr']))

    #             if max_acc>0.995 and ep!=0 and ep%10 == 0:
    #                 torch.save(best_model_dict, "./Kmnist_saved_model/tmp_Fold{}_acc{:.4f}".format(fold,max_acc*1e2))

            ###K-Fold ensemble: Saved k best model for k dataloader
            print("===================Best Fold:{} Saved Loss:{} Acc:{}==================".format(fold,min_loss,max_acc))
            torch.save(best_model_dict, "./Kmnist_saved_model/74k_Fold{}_loss{:.4f}_acc{:.3f}".format(fold,min_loss*1e3,max_acc*1e2))
            print("======================================================")

            del model
            torch.cuda.empty_cache()


# Native classifier Emsemble inference

In [None]:
# transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
# transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution        
# transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution


ensemble_root = "./Kmnist_saved_model/senet2_5x2fold_65k"   #model-> 1 fc(512) + dropout(0.1)
ensemble_models = []

ensemble_root_dig = "./Kmnist_saved_model/ensemble_dig/baseline_cnn_60k_3fold"    #model-> 1 fc(256) + dropout(0.2)
ensemble_models_dig = []

data_num = 0
acc = 0

# mean,std = 0.08229437, 0.23876116
# mean_dig,std_dig = 0.09549136, 0.24336776
# mean_large,std_large = 0.08889289, 0.24106446

vr = 1
# indices = np.arange(60000)
# test_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=vr,indices=indices)
indices = np.arange(120000)
test_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=True, validate_rate=vr,indices=indices)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=0)

native_model = convNet_native(in_channels=1)
native_model.cuda()
native_model.load_state_dict(torch.load("./Kmnist_saved_model/native_classifier/old/Fold0_loss0.0242_acc_b99.704_without_aug"))
native_model.eval()   

for file_name in os.listdir(ensemble_root):
    if file_name.find("Fold") == -1:
        continue
    model = SE_Net2(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)

for file_name in os.listdir(ensemble_root_dig):
    if file_name.find("Fold") == -1:
        continue
    model = convNet(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root_dig,file_name)))
    model.eval()
    ensemble_models_dig.append(model)
    
### Test Native Classifier
with torch.no_grad():
    for idx,data in enumerate(test_loader):
        ###Classify native or not
        img, target_b, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)
        _,pred_native = torch.max(native_model(img),dim=1)  #(batch_num,)
        
        ###Classify by normal model, Average Ensemble
        pred_list = torch.Tensor([]).to(device)
        model_num = len(ensemble_models)
        for i in range(model_num):
            pred = ensemble_models[i](img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
        
        
        ###Classify by dig_aug_model, Average Ensemble
        pred_list = torch.Tensor([]).to(device)
        model_num = len(ensemble_models_dig)
        for i in range(model_num):
            pred = ensemble_models_dig[i](img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class_dig = torch.max(pred.data, 1)   #(batch_num,)
        
        ###Make final result tensor
        native_mask = pred_native    #(batch_num,)  ex: ([1,0,0,1,0])
        nonnative_mask = torch.ones([img.size(0),], dtype=torch.long).to(device) - native_mask  #(batch_num,) ex:([0,1,1,0,1])
        
        r1 = (pred_class*native_mask)  #a*b = torch.mul(a,b)
        r2 = (pred_class_dig*nonnative_mask)
        pred_final = (r1+r2).to(torch.long)  #(batch_num,)

#         print("model1:",pred_class)
#         print("model2:",pred_class_dig)
#         print("mask:",native_mask)
#         print("non_mask:",nonnative_mask)
#         print("r1:",r1)
#         print("r2:",r2)
#         print("result:",result)
#         print("target:",target)
#         stop
    
#         acc += (pred_native).sum().item()
        acc += (pred_final == target).sum().item()
        data_num += img.size(0)

#     val_loss /= data_num
    acc /= data_num
    print("Acc:{:.4f}%".format(acc*100))


# Ensemble critical digits inference

In [None]:
ensemble_root = "./Kmnist_saved_model/senet2_5x2fold_65k"
ensemble_critical_root = "./Kmnist_saved_model/critical_classifier/01"
ensemble_models = []
ensemble_models_crit = []
epochs = 500
period = 100
model_num = epochs//period
model = 5
data_num = 0
acc = 0
acc_2step = 0

vr = 1
indices = np.arange(60000)
test_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=vr,indices=indices)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=0)

for file_name in os.listdir(ensemble_root):
    if file_name.find("Fold") == -1:
        continue
    model = SE_Net2(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)
    
for file_name in os.listdir(ensemble_critical_root):
    if file_name.find("Fold") == -1:
        continue    
    model_crit = SE_Net2(in_channels=1)
    model_crit.out = nn.Linear(256,2)   #For critic cases classification
    model_crit.cuda()
    model_crit.load_state_dict(torch.load("{}/{}".format(ensemble_critical_root,file_name)))
    model_crit.eval()
    ensemble_models_crit.append(model_crit)

model_num = len(ensemble_models)
crit_model_num = len(ensemble_models_crit)
print("len of models:",len(ensemble_models))    
print("len of critical models:",len(ensemble_models_crit))    
result = np.array([])
result1 =np.array([])
label = np.array([])    
with torch.no_grad():
    for idx, data in enumerate(test_loader):
        img, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)

        ###Average Ensemble
        pred_list = torch.Tensor([]).to(device)
        for i in range(model_num):
            pred = ensemble_models[i](img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
        
        
        ###Advanced inference for critical classes:
        pred_list = torch.Tensor([]).to(device)
        for i in range(crit_model_num):
            pred = ensemble_models_crit[i](img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class_crit = torch.max(pred.data, 1)   #(batch_num,)        
        
        ###Make final result tensor
        tensor1 = torch.tensor([1],dtype=torch.long).to(device)
        tensor0 = torch.tensor([0],dtype=torch.long).to(device)
        mask_1_0 = torch.where(pred_class==1,tensor1,tensor0) #(batch_num,)  ex: ([1,0,0,1,0])
        mask_others = torch.ones([img.size(0),], dtype=torch.long).to(device) - mask_1_0  #(batch_num,) ex:([0,1,1,0,1])
        
        r1 = (pred_class_crit*mask_1_0)  #a*b = torch.mul(a,b)
        r2 = (pred_class*mask_others)
        pred_final = (r1+r2).to(torch.long)  #(batch_num,)

#         print("pred_class:",pred_class)
#         print("pred_class_crit:",pred_class_crit)
#         print("mask_1_0:",mask_1_0)
#         print("mask_other:",mask_others)
#         print("r1:",r1)
#         print("r2:",r2)
#         print("pred:",pred_final)
        
        
        ###Voting Ensemble
#         pred_list = torch.LongTensor([]).to(device)
#         for i in range(model_num):
#             pred = ensemble_models[i](img) #(batch_num,10)
#             _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
#             pred_list = torch.cat((pred_list,pred_class.unsqueeze(1)),dim=1)
            
#         pred_class_list = torch.LongTensor([]).to(device)
#         for i in range(img.size(0)):
#             pred_np = pred_list[i].cpu().numpy()
#             unique_class,count = np.unique(pred_np,return_counts=True)
#             unique_class = np.array(unique_class[np.argmax(count)]).reshape(-1)   #unique class shape(1,)
#             class_voted= torch.from_numpy(unique_class).to(device)    #(1,)
#             pred_class_list = torch.cat((pred_class_list,class_voted))    
    
#         acc += (pred_class == target).sum().item()
        acc += (pred_class == target).sum().item()
        acc_2step += (pred_final == target).sum().item()
        data_num += img.size(0)
        result = np.hstack([result,pred_class.cpu().numpy()])
        result1 = np.hstack([result1,pred_final.cpu().numpy()])
        label = np.hstack([label,target.cpu().numpy()])

#     val_loss /= data_num
    acc /= data_num
    acc_2step /= data_num
    print("Acc:{:.4f}%".format(acc*100))
    print("Acc 2step :{:.4f}%".format(acc_2step*100))


# Ensemble inference

In [None]:
ensemble_root = "./Kmnist_saved_model/ensemble_tuned_cnn/senet2_5x2fold_65k_tmp"
ensemble_root2 = "./Kmnist_saved_model/ensemble_tuned_cnn/senet2_5fold_strong_aug_b768"
ensemble_models = []
ensemble_models2 = []

data_num = 0
acc = 0

vr = 1
indices = np.arange(60000)
test_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=vr,indices=indices)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=0)

for file_name in os.listdir(ensemble_root):
    if file_name.find("Fold") == -1:
        continue
    model = SE_Net2(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)
    
for file_name in os.listdir(ensemble_root2):
    if file_name.find("Fold") == -1:
        continue    
    model2 = SE_Net2(in_channels=1)
    model2.cuda()
    model2.load_state_dict(torch.load("{}/{}".format(ensemble_root2,file_name)))
    model2.eval()
    ensemble_models2.append(model2)

model_num = len(ensemble_models)
model2_num = len(ensemble_models2)
print("len of models:",len(ensemble_models))    
print("len of models2:",len(ensemble_models2))    
result = np.array([])
result1 =np.array([])
label = np.array([])    
with torch.no_grad():
    for idx, data in enumerate(test_loader):
        img, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)

        ###Average Ensemble
#         pred_list = torch.Tensor([]).to(device)
#         for i in range(model_num):
#             pred = ensemble_models[i](img) #(batch_num,10)
#             pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
#         ###inference for models2
#         for i in range(model2_num):
#             pred = ensemble_models2[i](img) #(batch_num,10)
#             pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
            
#         pred = torch.mean(pred_list,dim=2)   #(batch,10)
#         _,pred_class = torch.max(pred.data, 1)   #(batch_num,)        
        
        ###Voting Ensemble
        pred_list = torch.LongTensor([]).to(device)
        for i in range(model_num):
            pred = ensemble_models[i](img) #(batch_num,10)
            _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
            pred_list = torch.cat((pred_list,pred_class.unsqueeze(1)),dim=1)
        for i in range(model2_num):
            pred = ensemble_models2[i](img) #(batch_num,10)
            _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
            pred_list = torch.cat((pred_list,pred_class.unsqueeze(1)),dim=1)
            
        pred_class_list = torch.LongTensor([]).to(device)
        for i in range(img.size(0)):
            pred_np = pred_list[i].cpu().numpy()
            unique_class,count = np.unique(pred_np,return_counts=True)
            unique_class = np.array(unique_class[np.argmax(count)]).reshape(-1)   #unique class shape(1,)
            class_voted= torch.from_numpy(unique_class).to(device)    #(1,)
            pred_class_list = torch.cat((pred_class_list,class_voted))    
    
#         acc += (pred_class == target).sum().item()
        acc += (pred_class == target).sum().item()
        data_num += img.size(0)
        
#         result = np.hstack([result,pred_class.cpu().numpy()])
#         result1 = np.hstack([result1,pred_final.cpu().numpy()])
#         label = np.hstack([label,target.cpu().numpy()])

#     val_loss /= data_num
    acc /= data_num
    print("Acc:{:.4f}%".format(acc*100))


In [None]:

print(np.where(label!=result)[0])
print(np.where(label!=result1)[0])

In [None]:
self.data = global_dataimport torch
import numpy as np

t1 = torch.Tensor([[1,2,3,4],[4,3,2,1],[1,5,3,3]])  #(3,4)
t1 = t1.unsqueeze(2)

t_list = torch.Tensor([])

for i in range(3):
    t_list = torch.cat((t_list,t1),dim=2)

print(t_list.size())
print(t_list)
t_list = torch.mean(t_list,dim=2)
print(t_list.size())
print(t_list)


# n1 = t1.cpu().numpy()

# n1, count = np.unique(n1,return_counts=True,axis=0)
# print(count)
# n1 = np.argmax(count)

