In [11]:
import os, time
import numpy as np
import torch
from torchvision import transforms, datasets
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from torchsummary import summary
device = "cuda"


# Squeeze and Excitation Net

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class Seq_Ex_Block(nn.Module):
    def __init__(self, in_ch, r=16):
        super(Seq_Ex_Block, self).__init__()
        self.se = nn.Sequential(
            GlobalAvgPool(),
            nn.Linear(in_ch, in_ch//r),
            nn.ReLU(inplace=True),
            nn.Linear(in_ch//r, in_ch),
            nn.Sigmoid()
        )

    def forward(self, x):
        se_weight = self.se(x).unsqueeze(-1).unsqueeze(-1)
#         print(f'x:{x.sum()}, x_se:{x.mul(se_weight).sum()}')
        return x.mul(se_weight)

class GlobalAvgPool(nn.Module):
    def __init__(self):
        super(GlobalAvgPool, self).__init__()
    def forward(self, x):
        return x.view(*(x.shape[:-2]),-1).mean(-1)

class SE_Net(nn.Module):
    def __init__(self,in_channels):
        super(SE_Net,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,5,1,2)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.2)
        
        self.c4 = nn.Conv2d(64,128,3,1,0)
        self.bn4 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c5 = nn.Conv2d(128,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,5,1,2)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)        
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.2)
        
        self.c7 = nn.Conv2d(128,256,3,1,0)
        self.bn7 = nn.BatchNorm2d(256,1e-3,0.01)
        self.se1 = Seq_Ex_Block(in_ch=256,r=16)
        self.d3 = nn.Dropout(0.2)

        self.fc1 = nn.Linear(256*2*2,256)
        self.bn8 = nn.BatchNorm1d(256,1e-3,0.01)
        
        self.fc2 = nn.Linear(256,128)
        self.bn9 = nn.BatchNorm1d(128,1e-3,0.01)
        
        self.out = nn.Linear(128,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.relu(self.c1(x)))
        x = self.bn2(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.m1(x)
        x = self.d1(x)
        
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        x = self.bn6(F.relu(self.c6(x)))
        x = self.m2(x)
        x = self.d2(x)
        
        x = self.bn7(F.relu(self.c7(x)))
        x = self.se1(x)
        
        x = self.d3(x)        
        
        x = x.view(-1, 256*2*2) #reshape
        
        x = self.bn8(self.fc1(x))
        x = self.bn9(self.fc2(x))
        
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in')
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')
        
        

# Conv model definitions

In [12]:
import torch.nn as nn
import torch.nn.functional as F
class convNet(nn.Module):
    def __init__(self,in_channels):
        super(convNet,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,5,1,2)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.2)
        
        self.c4 = nn.Conv2d(64,128,3,1,0)
        self.bn4 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c5 = nn.Conv2d(128,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,5,1,2)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)        
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.2)
        
        self.c7 = nn.Conv2d(128,256,3,1,0)
        self.bn7 = nn.BatchNorm2d(256,1e-3,0.01)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.2)

        self.fc1 = nn.Linear(256*1*1,256)
        self.bn8 = nn.BatchNorm1d(256,1e-3,0.01)
        
        self.fc2 = nn.Linear(256,128)
        self.bn9 = nn.BatchNorm1d(128,1e-3,0.01)
        
        self.out = nn.Linear(128,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.relu(self.c1(x)))
        x = self.bn2(F.relu(self.c2(x)))
        x = self.bn3(F.relu(self.c3(x)))
        x = self.m1(x)
        x = self.d1(x)
        
        x = self.bn4(F.relu(self.c4(x)))
        x = self.bn5(F.relu(self.c5(x)))
        x = self.bn6(F.relu(self.c6(x)))
        x = self.m2(x)
        x = self.d2(x)
        
        x = self.bn7(F.relu(self.c7(x)))
        x = self.m3(x)
        x = self.d3(x)        
        
        x = x.view(-1, 256*1*1) #reshape
        
        x = self.bn8(self.fc1(x))
        x = self.bn9(self.fc2(x))
        
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in')
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')

In [None]:
model = convNet(in_channels=1)
# model = SE_Net(in_channels=1)
model.cuda()
summary(model, input_size=(1, 28, 28))

In [None]:
from imgaug import augmenters as iaa
# from imgaug.augmentables.segmaps import SegmentationMapOnImage

class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
#         iaa.Scale((640, 480)),
#         iaa.Fliplr(0.5),
            
#         iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.6))),
        iaa.Sometimes(0.1, iaa.AverageBlur(1.2)),
        iaa.Sometimes(0.5, iaa.Affine(rotate=(-35, 35),order=[0, 1],translate_px={"x":(-3, 3),"y":(-4,4)},mode='symmetric')),
        iaa.Sometimes(0.5,iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.25))),
        iaa.Sometimes(0.1, iaa.SaltAndPepper(0.05,False)),
        iaa.Invert(0.5),
#         iaa.Add((-5, 5)), # change brightness of images (by -10 to 10 of original value)
#         iaa.AdditiveGaussianNoise(-1,1)
        iaa.Sometimes(0.2,iaa.GammaContrast(2))
            
#         iaa.AddToHueAndSaturation(from_colorspace="GRAY",value=(-20, 20))  #Hue-> color, saturation -> saido
    ])
    def __call__(self, img, mask=None):
        img = np.array(img)        
        return self.aug.augment_image(image=img)
#         return self.aug(image=img, segmentation_maps=label)

# Trans and Dataset definition

In [14]:
trans = transforms.Compose([
#         transforms.ColorJitter(0.9,0.2,0.2,0.5),
        transforms.RandomAffine(degrees=10,translate=(0.25,0.25),scale=(0.75,1.25),shear=5),
#         transforms.RandomAffine(degrees=10,translate=(0.2,0.2),scale=[0.9,1.1]), #For native distinguisher
#         ImgAugTransform(),
#         lambda x: Image.fromarray(x),
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1  
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
    ])

trans_val = transforms.Compose([
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
    ])

trans_test = transforms.Compose([
        transforms.ToTensor(),
#         transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution
#         transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
#         transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution
])

global_data = pd.read_csv("./dataset/train.csv")
global_dig_aug_data = pd.read_csv("./dataset/Dig-Mnist-Augmented.csv")
# global_dig_data = pd.read_csv("./dataset/Dig-MNIST.csv")
global_data_large = pd.read_csv("./dataset/train_large.csv")

class KMnistDataset(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None):
        self.is_validate = is_validate
        self.data = global_data
#         self.data = global_dig_aug_data    ################Temp Revised Caution##############
        
#         print("data shape:", np.shape(self.data))
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
#         print(idx)
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        label = self.data.iloc[idx, 0]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, label

    def __len__(self):
        return self.len

class KMnistDataset_binary_aid(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None):
        self.is_validate = is_validate
        self.data = global_data_large
        
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
#         print(idx)
        img = self.data.iloc[idx, 2:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        native_label = self.data.iloc[idx, 0]  #(num,)
        label = self.data.iloc[idx, 1]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
#         native_label = torch.as_tensor(native_label, dtype=torch.uint8).unsqueeze(0)    #value: 0~9, shape(1,1) for BCE loss
        native_label = torch.as_tensor(native_label, dtype=torch.uint8)    #value: 0~9, shape(1) for CSE loss
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, native_label, label
    def __len__(self):
        return self.len

    
class TestDataset(Dataset):
    def __init__(self,data_len=None):
        self.data = pd.read_csv("./dataset/test.csv")
        print("data shape:", np.shape(self.data))
        self.transform = trans_test
        if data_len == None:
            self.len = len(self.data)
        
    def __getitem__(self, idx):
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        return img, torch.Tensor([])

    def __len__(self):
        return self.len
    

# Get kfold dataset loader

In [15]:
def get_kfold_dataset_loader(k=5,val_rate=0.1,indices_len=None, batch_size=None,num_workers=None, binary_aid=False):
    ###Return [list of train dataset_loader, list of val dataset_loader]
    train_loader_list = []
    val_loader_list = []
    indices = np.arange(indices_len)
    val_len = indices_len//k
    idx = 0
    
    for i in range(k):
#         np.random.shuffle(indices)  #Random cross validation
        ind = np.concatenate([indices[:idx],indices[idx+val_len:],indices[idx:idx+val_len]])
        idx += val_len
#         print(ind)
        
        if binary_aid == True:
            train_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=False, validate_rate=val_rate,indices=ind)
            val_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=True, validate_rate=val_rate, indices=ind)
        else:
            train_dataset = KMnistDataset(data_len=None,is_validate=False, validate_rate=val_rate,indices=ind)
            val_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=val_rate, indices=ind)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        
        train_loader_list.append(train_loader)
        val_loader_list.append(val_loader)
        
    return train_loader_list, val_loader_list


# Get models

In [16]:
def get_model(native_net=False):
    #Advance model
    # model_name = 'efficientnet-b0'
    # image_size = EfficientNet.get_image_size(model_name)
    # model = EfficientNet.from_pretrained(model_name, num_classes=10)
    # model = model.to(device)

    #Basic cnn
    if native_net == True:
        model = convNet_native(in_channels=1)
    else:
#         model = SE_Net(in_channels=1)
        model = convNet(in_channels=1)
    
    # model.load_state_dict(torch.load("./Kmnist_saved_model/ep20_acc0.9910"))

    #pretrained model
#     model = models.resnet18()
#     model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)
# #     summary(model, input_size=(1, 28, 28))

    if device == "cuda":
        model.cuda()
    
    return model


# Get dataset distribution

In [None]:
# train distribution: mean=[0.08229437],std=[0.23876116]
# dig augmented distribution: mean=[0.09549136],std=[0.24336776]
# train large distribution: mean=[0.08889286],std=[0.24106438]

def get_dataset_mean_std(dataloader):
    print("Calculate distribution:")
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in dataloader:
        img = data[0].to(device)
        batch_samples = img.size(0)
        img = img.contiguous().view(batch_samples, img.size(1), -1)
        mean += img.mean(2).sum(0)
        std += img.std(2).sum(0)
        nb_samples += batch_samples
        if nb_samples%5120 == 0:
            print("Finished:", nb_samples)
            
    print("num of samples:",nb_samples)
    mean /= nb_samples
    std /= nb_samples
#     print("Average mean:",mean)
#     print("Average std:", std)
    return mean.cpu().numpy(), std.cpu().numpy()

# Get train and val loaders

In [17]:
batch_size = 256
num_workers = 12
vr = 0.1
k = 10
indices_len = 60000
# indices_len = 10240  ################Temp Revised Caution##############
# indices_len = 120000

###Single dataset
# indices = np.arange(indices_len)
# train_dataset = KMnistDataset(data_len=None,is_validate=False,validate_rate=vr,indices=indices)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(train_loader)
# print("train distribution: mean={},std={}".format(mean, std))

# indices = np.arange(10240)
# dig_val_dataset = DigValDataset(data_len=None,indices=indices)
# dig_val_loader = DataLoader(dig_val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(dig_val_loader)
# print("validate distribution:",mean, std)

# test_dataset = TestDataset(data_len=None)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(test_loader)
# print("test distribution:",mean, std)

###K-fold dataset
train_loaders, val_loaders = get_kfold_dataset_loader(k, vr, indices_len, batch_size, num_workers, binary_aid=False)

# Train native classifier

In [None]:
if __name__ == "__main__":
    epochs = 120
    period = 40
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    
    criterion_b = torch.nn.CrossEntropyLoss()

    print("Fold:",len(train_loaders))
    
    for fold in range(len(train_loaders)):
        train_loader = train_loaders[fold]
        val_loader = val_loaders[fold]
        
        model = get_model(native_net=True)
        torch.cuda.empty_cache()    #Need further check
            
        max_acc_b = 0
        min_loss_b = 10000
        best_model_dict = None
        data_num = 0
        loss_avg_b = 0

#         optimizer = torch.optim.SGD(model.parameters(),lr=lr)
#         optimizer = torch.optim.RMSprop(model.parameters(),lr=lr,alpha=0.9)
        optimizer = torch.optim.Adam(model.parameters(),lr=lr,betas=(0.9,0.99))
#         optimizer = torch.optim.Adagrad(model.parameters(),lr=lr)
#         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=period,T_mult=1,eta_min=1e-5) #original 
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=15)
        
        tmp_count = 0
        for ep in range(0,epochs+1):
            model.train()
            for idx, data in enumerate(train_loader):
                img, target_b, target = data
                img, target_b, target = img.to(device), target_b.to(device,dtype=torch.long), target.to(device,dtype=torch.long)
                pred_b = model(img)
                  
                loss_b = criterion_b(pred_b,target_b) 
                loss_avg_b += loss_b.item()
                
                data_num += img.size(0)
                optimizer.zero_grad()
                loss_b.backward()
                optimizer.step()

            ###Cosine annealing
#             lr_scheduler.step()

            ###Evaluate Train Loss 
#             if ep%2 == 0:
#                 loss_avg /= data_num
#                 print("Ep:{}, loss:{}, lr:{}".format(ep, loss_avg,optimizer.param_groups[0]['lr']))
#                 loss_avg = 0
#                 data_num = 0

            ###Validation
            if ep!=0 and ep%val_period == 0:
                model.eval()
                acc_b = 0
                val_loss_b = 0
                data_num  = 0
                with torch.no_grad():
                    for idx, data in enumerate(val_loader):
                        img, target_b, target = data
                        img, target_b, target = img.to(device), target_b.to(device,dtype=torch.long), target.to(device,dtype=torch.long)
                        pred_b = model(img)

                        val_loss_b += criterion_b(pred_b,target_b).item()
                        
                        # print(pred) 
                        ########
                        _,pred_native_class = torch.max(pred_b.data, 1)
                        
                        acc_b += (pred_native_class == target_b).sum().item()
                        data_num += img.size(0)

                acc_b /= data_num
                val_loss_b /= data_num

                ###Plateau
                lr_scheduler.step(val_loss_b)
                if optimizer.param_groups[0]['lr'] < 1e-4:
                    break                    

                if acc_b >= max_acc_b:
                    max_acc_b = acc_b
                    best_model_dict = model.state_dict()
                    
                if val_loss_b <= min_loss:
                    min_loss_b = val_loss_b
#                     best_model_dict = model.state_dict()
                
                print("Episode:{}, Validation Loss:{},Acc_b:{:.3f}%,lr:{}"
                      .format(ep,val_loss_b,acc_b*100,optimizer.param_groups[0]['lr']))
            
            if ep!=0 and ep%10 == 0:
                torch.save(best_model_dict, "./Kmnist_saved_model/tmp_Fold{}_acc_b{:.3f}".format(fold,max_acc_b*1e2))
            
        ###K-Fold ensemble: Saved k best model for k dataloader
        print("===================Best Fold:{} Saved, Acc:{}==================".format(fold,max_acc_b))
        torch.save(best_model_dict, "./Kmnist_saved_model/Fold{}_loss{:.4f}_acc_b{:.3f}".format(fold,min_loss_b*1e3,max_acc_b*1e2))
        print("======================================================")

# Train digit classifier

In [18]:
if __name__ == "__main__":
    epochs = 200
    period = 40
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    
    criterion = torch.nn.CrossEntropyLoss()
#     criterion_b = torch.nn.BCEWithLogitsLoss()
    
    print("Fold:",len(train_loaders))
    
    for fold in range(len(train_loaders)):
        train_loader = train_loaders[fold]
        val_loader = val_loaders[fold]
        
        model = get_model()
            
        max_acc = 0
        min_loss = 10000
        best_model_dict = None
        data_num = 0
        loss_avg = 0

#         optimizer = torch.optim.SGD(model.parameters(),lr=lr)
#         optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)
        optimizer = torch.optim.Adam(model.parameters(),lr=lr)
#         optimizer = torch.optim.Adagrad(model.parameters(),lr=lr)
#         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=period,T_mult=1,eta_min=1e-5) #original 
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=15)
        
        for ep in range(0,epochs+1):
            model.train()
            for idx, data in enumerate(train_loader):
                img, target = data
                img, target = img.to(device), target.to(device,dtype=torch.long)
                
#                 print(np.shape(img),np.shape(target_b),np.shape(target)) #Tensor(4,1,28,28), Tensor(4)
#                 print(np.max(img.cpu().numpy()),np.min(img.cpu().numpy())) #1.0 0.0
                pred = model(img)
#                 print(pred.size())   #(32,10)
#                 print(target.size()) #(32,)
                  
                ###Input shape: input:(batch_num,1), target:(batch_num,a int 0 or 1) for CSE LOSS, target:(batch_num,1) for BCE loss
#                 loss_b = criterion_b(pred_b,target_b) 

                ###Input shape: input:(batch_num,10), target:(batch_num,a int between 0~10)
                loss = criterion(pred,target)
                
                loss_avg += loss.item()
                data_num += img.size(0)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            ###Cosine annealing
#             lr_scheduler.step()

            ###Evaluate Train Loss 
#             if ep%2 == 0:
#                 loss_avg /= data_num
#                 print("Ep:{}, loss:{}, lr:{}".format(ep, loss_avg,optimizer.param_groups[0]['lr']))
#                 loss_avg = 0
#                 data_num = 0

            ###Validation
            if ep!=0 and ep%val_period == 0:
                model.eval()
                acc = 0
                val_loss = 0
                data_num  = 0
                with torch.no_grad():
                    for idx, data in enumerate(val_loader):
                        img, target = data
                        img, target = img.to(device), target.to(device,dtype=torch.long)
                        pred = model(img)

                        val_loss += criterion(pred, target).item()
                        
                        # print(pred) 
                        _,pred_class = torch.max(pred.data, 1)
    #                     print(pred_class)
                        acc += (pred_class == target).sum().item()
                        data_num += img.size(0)

                acc /= data_num
                val_loss /= data_num

                ###Plateau
                lr_scheduler.step(val_loss)
                if optimizer.param_groups[0]['lr'] < 1e-5:
                    break                    

                if acc >= max_acc:
                    max_acc = acc
                    best_model_dict = model.state_dict()
                
                if val_loss <= min_loss:
                    min_loss = val_loss
#                     best_model_dict = model.state_dict()
                
                print("Episode:{}, Validation Loss:{},Acc:{:.4f}%,lr:{}"
                      .format(ep,val_loss,acc*100,optimizer.param_groups[0]['lr']))
            
#             if max_acc>0.995 and ep!=0 and ep%10 == 0:
#                 torch.save(best_model_dict, "./Kmnist_saved_model/tmp_Fold{}_acc{:.4f}".format(fold,max_acc*1e2))
    
        ###K-Fold ensemble: Saved k best model for k dataloader
        print("===================Best Fold:{} Saved, Acc:{}==================".format(fold,max_acc))
        torch.save(best_model_dict, "./Kmnist_saved_model/batch256_Fold{}_loss{:.4f}_acc{:.3f}".format(fold,min_loss*1e3,max_acc*1e2))
        print("======================================================")
        
        del model
            
            ###Snapshot ensemble: saved model
#             if ep!=0 and ep%period == 0:
# #                 ensemble_models.append(best_model_dict)
#                 model_id = ep//period
#                 print("===================Best Model{} Saved, Acc:{}==================".format(model_id,max_acc))
#                 torch.save(best_model_dict, "./Kmnist_saved_model/model{}_ep{}_acc{:.4f}".format(model_id,ep,max_acc))
#                 print("======================================================")
#                 max_acc = 0


Fold: 10
Episode:1, Validation Loss:0.0016770446275671323,Acc:87.1833%,lr:0.001
Episode:2, Validation Loss:0.0003218738387028376,Acc:97.4333%,lr:0.001
Episode:3, Validation Loss:0.0001683667277296384,Acc:99.0333%,lr:0.001
Episode:4, Validation Loss:0.000250692555680871,Acc:98.4000%,lr:0.001
Episode:5, Validation Loss:0.000272042660197864,Acc:98.2333%,lr:0.001
Episode:6, Validation Loss:0.0002583980684479078,Acc:98.1667%,lr:0.001
Episode:7, Validation Loss:0.0001725691071090599,Acc:98.8333%,lr:0.001
Episode:8, Validation Loss:0.0002699875749337177,Acc:98.2833%,lr:0.001
Episode:9, Validation Loss:0.00020472534745931626,Acc:98.9000%,lr:0.001
Episode:10, Validation Loss:0.00016478395167117318,Acc:99.1667%,lr:0.001
Episode:11, Validation Loss:0.00022754265430072944,Acc:98.8667%,lr:0.001
Episode:12, Validation Loss:0.00014300018595531582,Acc:99.0000%,lr:0.001
Episode:13, Validation Loss:0.00020462230856840808,Acc:98.7667%,lr:0.001
Episode:14, Validation Loss:0.00018684749165549875,Acc:98.683

# Native classifier Emsemble inference

In [None]:
# transforms.Normalize(mean=[0.08229437],std=[0.23876116]) #train dataset dist
# transforms.Normalize(mean=[0.09549136],std=[0.24336776]) #dig_augmented distribution        
# transforms.Normalize(mean=[0.08889289],std=[0.24106446])  #train_large dataset distribution


ensemble_root = "./Kmnist_saved_model/emsemble/20_fold_simple_cnn_ensemble"   #model-> 1 fc(512) + dropout(0.1)
ensemble_models = []

ensemble_root_dig = "./Kmnist_saved_model/emsemble/dig_aug_ensemble"    #model-> 1 fc(256) + dropout(0.2)
ensemble_models_dig = []

data_num = 0
acc = 0

mean,std = 0.08229437, 0.23876116
mean_dig,std_dig = 0.09549136, 0.24336776
mean_large,std_large = 0.08889289, 0.24106446

vr = 1
# indices = np.arange(60000)
# test_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=vr,indices=indices)
indices = np.arange(120000)
test_dataset = KMnistDataset_binary_aid(data_len=None,is_validate=True, validate_rate=vr,indices=indices)

test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=12)

native_model = convNet_native(in_channels=1)
native_model.cuda()
native_model.load_state_dict(torch.load("./Kmnist_saved_model/emsemble/native_classifier/Fold0_loss0.0242_acc_b99.704_without_aug"))
native_model.eval()   


for file_name in os.listdir(ensemble_root):
    if file_name[4:6] != "18":
        continue
    model = convNet_tmp(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)

for file_name in os.listdir(ensemble_root_dig):
    if file_name[4] != "0":
        continue
    model = convNet(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root_dig,file_name)))
    model.eval()
    ensemble_models_dig.append(model)
    
### Test Native Classifier
with torch.no_grad():
    for idx,data in enumerate(test_loader):
        ###Classify native or not
        img, target_b, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)
        _,pred_native = torch.max(native_model(img),dim=1)  #(batch_num,)
        
        norm_img = (img-mean_large)/std_large
        ###Classify by normal model, Average Ensemble
        pred_list = torch.Tensor([]).to(device)
        model_num = len(ensemble_models)
        for i in range(model_num):
            pred = ensemble_models[i](norm_img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
        
        
        norm_img = (img-mean_large)/std_large
        ###Classify by dig_aug_model, Average Ensemble
        pred_list = torch.Tensor([]).to(device)
        model_num = len(ensemble_models_dig)
        for i in range(model_num):
            pred = ensemble_models_dig[i](norm_img) #(batch_num,10)
            pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
        pred = torch.mean(pred_list,dim=2)   #(batch,10)
        _,pred_class_dig = torch.max(pred.data, 1)   #(batch_num,)
        
        ###Make final result tensor
        native_mask = pred_native    #(batch_num,)  ex: ([1,0,0,1,0])
        nonnative_mask = torch.ones([img.size(0),], dtype=torch.long).to(device) - native_mask  #(batch_num,) ex:([0,1,1,0,1])
        
        r1 = (pred_class*native_mask)  #a*b = torch.mul(a,b)
        r2 = (pred_class_dig*nonnative_mask)
        pred_final = (r1+r2).to(torch.long)  #(batch_num,)

#         print("model1:",pred_class)
#         print("model2:",pred_class_dig)
#         print("mask:",native_mask)
#         print("non_mask:",nonnative_mask)
#         print("r1:",r1)
#         print("r2:",r2)
#         print("result:",result)
#         print("target:",target)
#         stop
    
#         acc += (pred_native).sum().item()
        acc += (pred_final == target).sum().item()
        data_num += img.size(0)

#     val_loss /= data_num
    acc /= data_num
    print("Acc:{:.4f}%".format(acc*100))


# Ensemble inference

In [None]:
ensemble_root = "/home/ccchang/localization_net/Kmnist_saved_model/emsemble/5_fold_ep80_lr1e-2"
ensemble_models = []
epochs = 500
period = 100
model_num = epochs//period
model = 5
data_num = 0
acc = 0

for file_name in os.listdir(ensemble_root):
    model = convNet(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)

with torch.no_grad():
    for idx, data in enumerate(validate_loader):
        img, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)

        ###Single model
#         pred = model(img)
#         _,pred_class = torch.max(pred.data, 1)
        
        ###Average Ensemble
#         pred_list = torch.Tensor([]).to(device)
#         for i in range(model_num):
#             pred = ensemble_models[i](img) #(batch_num,10)
#             pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
#         pred = torch.mean(pred_list,dim=2)   #(batch,10)
        
#         _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
#         val_loss += criterion(pred, target)

        ###Voting Ensemble
        pred_list = torch.LongTensor([]).to(device)
        for i in range(model_num):
            pred = ensemble_models[i](img) #(batch_num,10)
            _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
            pred_list = torch.cat((pred_list,pred_class.unsqueeze(1)),dim=1)
            
        pred_class_list = torch.LongTensor([]).to(device)
        for i in range(img.size(0)):
            pred_np = pred_list[i].cpu().numpy()
            unique_class,count = np.unique(pred_np,return_counts=True)
            unique_class = np.array(unique_class[np.argmax(count)]).reshape(-1)   #unique class shape(1,)
            class_voted= torch.from_numpy(unique_class).to(device)    #(1,)
            pred_class_list = torch.cat((pred_class_list,class_voted))    
    
#         acc += (pred_class == target).sum().item()
        acc += (pred_class_list == target).sum().item()
        data_num += img.size(0)

#     val_loss /= data_num
    acc /= data_num
    print("Acc:{:.4f}%".format(acc*100))


In [None]:
self.data = global_dataimport torch
import numpy as np

t1 = torch.Tensor([[1,2,3,4],[4,3,2,1],[1,5,3,3]])  #(3,4)
t1 = t1.unsqueeze(2)

t_list = torch.Tensor([])

for i in range(3):
    t_list = torch.cat((t_list,t1),dim=2)

print(t_list.size())
print(t_list)
t_list = torch.mean(t_list,dim=2)
print(t_list.size())
print(t_list)


# n1 = t1.cpu().numpy()

# n1, count = np.unique(n1,return_counts=True,axis=0)
# print(count)
# n1 = np.argmax(count)

