In [1]:
import os, time
import numpy as np
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from torchsummary import summary
device = "cuda"

# Sub: SE_Net3

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class Seq_Ex_Block(nn.Module):
    def __init__(self, in_ch, r=16):
        super(Seq_Ex_Block, self).__init__()
        self.se = nn.Sequential(
            GlobalAvgPool(),
            nn.Linear(in_ch, in_ch//r),
            nn.ReLU(inplace=True),
            nn.Linear(in_ch//r, in_ch),
            nn.Sigmoid()
        )

    def forward(self, x):
        se_weight = self.se(x).unsqueeze(-1).unsqueeze(-1)
#         print(f'x:{x.sum()}, x_se:{x.mul(se_weight).sum()}')
        return x.mul(se_weight)

class GlobalAvgPool(nn.Module):
    def __init__(self):
        super(GlobalAvgPool, self).__init__()
    def forward(self, x):
        return x.view(*(x.shape[:-2]),-1).mean(-1)

class SE_Net3(nn.Module):
    def __init__(self,in_channels):
        super(SE_Net3,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=3,stride=1,padding=0)
        self.bn1 = nn.BatchNorm2d(num_features=64,eps=1e-3,momentum=0.01)
        self.c2 = nn.Conv2d(64,64,3,1,0)
        self.bn2 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c3 = nn.Conv2d(64,64,3,1,1)
        self.bn3 = nn.BatchNorm2d(64,1e-3,0.01)
        self.c4 = nn.Conv2d(64,64,5,1,2)
        self.bn4 = nn.BatchNorm2d(64,1e-3,0.01)        
        
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.5)
        
        self.c5 = nn.Conv2d(64,128,3,1,0)
        self.bn5 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c6 = nn.Conv2d(128,128,3,1,0)
        self.bn6 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c7 = nn.Conv2d(128,128,3,1,1)
        self.bn7 = nn.BatchNorm2d(128,1e-3,0.01)
        self.c8 = nn.Conv2d(128,128,5,1,2)
        self.bn8 = nn.BatchNorm2d(128,1e-3,0.01)
        
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.5)
        
        self.c9 = nn.Conv2d(128,256,3,1,0)
        self.bn9 = nn.BatchNorm2d(256,1e-3,0.01)
        self.c10 = nn.Conv2d(256,256,3,1,1)
        self.bn10 = nn.BatchNorm2d(256,1e-3,0.01)
        
        self.se1 = Seq_Ex_Block(in_ch=256,r=16)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.5)

        self.fc1 = nn.Linear(256*1*1,256)
        self.bn11 = nn.BatchNorm1d(256,1e-3,0.01)
        self.out = nn.Linear(256,10)
        
        self.init_linear_weights()
        
    def forward(self,x):
        x = self.bn1(F.leaky_relu(self.c1(x),0.05))
        x = self.bn2(F.leaky_relu(self.c2(x),0.05))
        x = self.bn3(F.leaky_relu(self.c3(x),0.05))
        x = self.bn4(F.leaky_relu(self.c4(x),0.05))
        x = self.d1(self.m1(x))
        
        x = self.bn5(F.leaky_relu(self.c5(x),0.05))
        x = self.bn6(F.leaky_relu(self.c6(x),0.05))
        x = self.bn7(F.leaky_relu(self.c7(x),0.05))
        x = self.bn8(F.leaky_relu(self.c8(x),0.05))
        x = self.d2(self.m2(x))
        
        x = self.bn9(F.leaky_relu(self.c9(x),0.05))
        x = self.bn10(F.leaky_relu(self.c10(x),0.05))
        x = self.se1(x)
        x = self.d3(self.m3(x))
        
        x = x.view(-1, 256*1*1) #reshape
        x = self.bn11(F.leaky_relu(self.fc1(x),0.05))
        return self.out(x)
    
    def init_linear_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in')  #default mode: fan_in
        nn.init.kaiming_normal_(self.out.weight, mode='fan_in')


In [3]:
global_data = pd.read_csv("./dataset/train.csv") #will change during iteration
global_data_dig = pd.read_csv("./dataset/Dig-MNIST.csv")
global_data_test = pd.read_csv("./dataset/test.csv")
# ensemble_root = "Kmnist_saved_model/final_submit/adam2" #will change during iteration

trans = transforms.Compose([
        transforms.RandomAffine(degrees=15,translate=(0.25,0.25),scale=[0.7,1.1],shear=8), #60k baseline
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1  
    ])

trans_val = transforms.Compose([
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1
    ])

trans_test = transforms.Compose([
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1
    ])


# KMnist Dataset Definition

In [4]:
class KMnistDataset(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None, data=None):
        self.is_validate = is_validate
        self.data = data
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        label = self.data.iloc[idx, 0]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, label

    def __len__(self):
        return self.len
    
class TestDataset(Dataset):
    def __init__(self,data_len=None):
        self.data = pd.read_csv("./dataset/test.csv")
        self.transform = trans_test
        if data_len == None:
            self.len = len(self.data)
        
    def __getitem__(self, idx):
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        return img, torch.Tensor([])

    def __len__(self):
        return self.len    

# Get Kfold dataset & Get model

In [5]:
def get_kfold_dataset_loader(k=5,val_rate=0.1,indices_len=None, batch_size=None,num_workers=None,data=None):
    ###Return [list of train dataset_loader, list of val dataset_loader]
    train_loader_list = []
    val_loader_list = []
    indices = np.arange(indices_len)
    val_len = indices_len//k
    idx = 0
    
    for i in range(k):
        ind = np.concatenate([indices[:idx],indices[idx+val_len:],indices[idx:idx+val_len]])
        idx += val_len
        train_dataset = KMnistDataset(data_len=None,is_validate=False, validate_rate=val_rate,indices=ind,data=data)
        val_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=val_rate, indices=ind,data=data)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        
        train_loader_list.append(train_loader)
        val_loader_list.append(val_loader)
        
    return train_loader_list, val_loader_list

def get_model(native_net=False):
    if native_net == True:
        model = convNet_native(in_channels=1)
    else:
        model = SE_Net3(in_channels=1)
    if device == "cuda":
        model.cuda()
    return model

# Inference and generate from DIG Dataset

In [6]:
def Infer_and_gen_dig_dataset(step=None,ensemble_root=None):
    print("start inference & gernerate Dig dataset...")
    vr = 1
    indices=np.arange(len(global_data_dig))
    dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=vr,indices=indices,data=global_data_dig)
    loader = DataLoader(dataset, batch_size=128, shuffle=False, num_workers=8)

    ensemble_models = []
    for file_name in os.listdir(ensemble_root):
        if file_name.find("Fold") == -1:
            continue
        model = SE_Net3(in_channels=1)
        model.cuda()
        model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
        model.eval()
        ensemble_models.append(model)

    model_num = len(ensemble_models)
    print("model num:",model_num)

    ###Inference Dig dataset
    result = np.empty((0,3))
    # result = np.array([])
    labels = np.array([])
    data_num = 0
    with torch.no_grad():
        for idx,data in enumerate(loader):
            img, label = data
            img, label = img.to(device), label.to(device)

            ###Average Ensemble
            pred_list = torch.Tensor([]).to(device)
            for i in range(model_num):
                pred = ensemble_models[i](img) #(batch_num,10)
                pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
            pred = torch.mean(pred_list,dim=2)   #(batch,10)
    #         _,pred = torch.max(pred.data, 1)   #(batch_num,)        
            _,pred = torch.topk(pred,3)  #(batch_num,k), get topk result

            result = np.concatenate([result,pred.cpu().numpy()],axis=0)
            labels = np.concatenate([labels,label.cpu().numpy()],axis=0)
            data_num += img.size(0)

    print("Inference finished:",data_num)
    print(np.shape(result),np.shape(labels))
    torch.cuda.empty_cache()

    
    ###Collect top-1 corrected data from dig dataset
    dig_idx_list = np.array([])
    indices = np.where((result[:,0]==labels))[0]  #get top1 corrected index
    dig_idx_list = np.hstack([dig_idx_list,indices])
    dig_idx_list = np.unique(dig_idx_list).astype(int)
    data_len = len(dig_idx_list)
    npy_name = "./dataset_final/iterative_trained/digidx_{}_s{}.npy".format(data_len,step)
    print("Top1 data num:",data_len)
    print("Save npy as:",npy_name)
    np.save(npy_name,dig_idx_list)
    
    ### Combine train.csv with top 1 corrected index from dig dataset
    pix_str = "pixel0"
    for i in range(1,784):
        pix_str = pix_str + ",pixel" + str(i)
    numpy_header = "label," + pix_str

    top1_digidx = np.load(npy_name)
    origin_data = pd.read_csv("./dataset/train.csv")
    dig_data = pd.read_csv("./dataset/Dig-MNIST.csv")

    train_csv = np.array(origin_data).astype(int)
    tmp_csv = np.array(dig_data).astype(int)
    top1_dig_csv = []
    for idx in top1_digidx:
        top1_dig_csv.append(tmp_csv[idx])

    top1_dig_csv = np.array(top1_dig_csv).astype(int)
    print("shape of top1_dig_csv:", np.shape(top1_dig_csv))
    data_len = np.shape(top1_dig_csv)[0]
    top1_digcsv_name = "./dataset_final/iterative_trained/digtop1_{}_s{}.csv".format(data_len,step)
    print("Save dig csv as:",top1_digcsv_name)
    np.savetxt(top1_digcsv_name, top1_dig_csv, delimiter=",",fmt="%d",header=numpy_header,comments='')

    print("Dig csv finished")
    return top1_digcsv_name
    

# Pseudo labeling & combine with Dig csv

In [7]:
def pseudo_labeling(step=None,ensemble_root=None):
    print("Start pseudo_labeling...")
    test_dataset = TestDataset(data_len=None)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)

    ### Inference models
    ensemble_models = []
    for file_name in os.listdir(ensemble_root):
        if file_name.find("Fold") == -1:
            continue
        model = SE_Net3(in_channels=1)
        model.cuda()
        model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
        model.eval()
        ensemble_models.append(model)

    model_num = len(ensemble_models)
    print("model num:",model_num)

    data_num = 0
    psuedo_labels = np.array([])
    with torch.no_grad():
        for idx,data in enumerate(test_loader):
                img, label = data
                img, label = img.to(device), label.to(device)

                ###Average Ensemble
                pred_list = torch.Tensor([]).to(device)
                for i in range(model_num):
                    pred = ensemble_models[i](img) #(batch_num,10)
                    pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
                pred = torch.mean(pred_list,dim=2)   #(batch,10)
                _,pred = torch.max(pred.data, 1)   #(batch_num,)        
                psuedo_labels = np.concatenate([psuedo_labels,pred.cpu().numpy()],axis=0)
                data_num += img.size(0)
    print("Inference complete:",np.shape(psuedo_labels))
    
    ###Generate test_csv
    pix_str = "pixel0"
    for i in range(1,784):
        pix_str = pix_str + ",pixel" + str(i)
    numpy_header = "label," + pix_str

    test_csv = np.array(pd.read_csv("./dataset/test.csv")).astype(int)[:,1:]
    pseudo_labels = psuedo_labels.reshape(-1,1).astype(int)
    test_csv = np.concatenate([pseudo_labels,test_csv],axis=1)

    print("test csv shape:",np.shape(test_csv))
    test_csv_name = "./dataset_final/iterative_trained/test_pseu_s{}.csv".format(step)
    print("Save test csv as:",test_csv_name)
    np.savetxt(test_csv_name, test_csv, delimiter=",",fmt="%d",header=numpy_header,comments='')
    
    print("pseudo label finished")
    return test_csv_name

# Combine csv

In [8]:
def combine_csv(step,dig_csv_name, test_pseu_csv_name):
    print("start combining...")
    
    pix_str = "pixel0"
    for i in range(1,784):
        pix_str = pix_str + ",pixel" + str(i)
    numpy_header = "label," + pix_str    
    
    ### Combine train.csv with test_pseudo csv and digtop1 csv
    train_csv = np.array(pd.read_csv("./dataset_final/train.csv")).astype(int)
    digtop1_csv = np.array(pd.read_csv(dig_csv_name)).astype(int)
    test_csv = np.array(pd.read_csv(test_pseu_csv_name)).astype(int)
    
    print("shape of digtop1_csv:",np.shape(digtop1_csv))
    print("shape of test_csv:",np.shape(test_csv))

    ###Combine train, pseudo
#     new_csv = np.vstack([train_csv,test_csv])
#     for i in range(5):
#         np.random.shuffle(new_csv)  #Multi-dimensional arrays are only shuffled along the first axis:
#     print(np.shape(new_csv))
#     np.savetxt("./dataset_final/train_pseu_65k_s1.csv", new_csv, delimiter=",",fmt="%d",header=numpy_header,comments='')

    ###Combine train, pseudo and digtop1 
    new_csv = np.vstack([train_csv,test_csv,digtop1_csv])
    for i in range(5):
        np.random.shuffle(new_csv)  #Multi-dimensional arrays are only shuffled along the first axis:
    print("shape of new csv:",np.shape(new_csv))
    data_len = np.shape(new_csv)[0]
    new_csv_name = "./dataset_final/iterative_trained/train_pseu_dig_{}_s{}.csv".format(data_len,step)
    print("Save new csv as:",new_csv_name)
    np.savetxt(new_csv_name, new_csv, delimiter=",",fmt="%d",header=numpy_header,comments='')

    print("===================All finished===================")
    print("")
    return new_csv_name

# Train models

In [None]:
batch_size = 1024
num_workers = 8
k = 5

if __name__ == "__main__":
    epochs = 300
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    criterion = torch.nn.CrossEntropyLoss()
    step = 0
    dir_name = "Kmnist_saved_model/final_submit/adam2/origin_60k_5fold" #temp used
    
    while True:
        step += 1
        ensemble_root = dir_name
        dig_csv_name = Infer_and_gen_dig_dataset(step,ensemble_root)
        pseu_csv_name = pseudo_labeling(step,ensemble_root)
        new_csv_name = combine_csv(step,dig_csv_name,pseu_csv_name)
        global_data = pd.read_csv(new_csv_name)
        print("Set global data to:",new_csv_name)
        print("Set ensemble root to:",ensemble_root)
        
        print("Step:",step)
        indices_len = len(global_data)
        vr = (indices_len//k)/indices_len
        print("global_data len:",indices_len)
        print("validation rate:",vr)
        train_loaders, val_loaders = get_kfold_dataset_loader(k, vr, indices_len, batch_size, num_workers, global_data)
        
        print("Fold:",len(train_loaders))
        dir_name = "./Kmnist_saved_model/Step{}".format(step)
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        
        for fold in range(0,len(train_loaders)):
            train_loader = train_loaders[fold]
            val_loader = val_loaders[fold]
            model = get_model()
            max_acc = 0
            min_loss = 10000
            best_model_dict = None
            data_num = 0
            loss_avg = 0
            optimizer = torch.optim.Adam(model.parameters(),lr=lr,betas=(0.9,0.99))
            lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=20,factor=0.1)

            for ep in range(0,epochs+1):
                model.train()
                for idx, data in enumerate(train_loader):
                    img, target = data
                    img, target = img.to(device), target.to(device,dtype=torch.long)

                    pred = model(img)
                    loss = criterion(pred,target)
                    loss_avg += loss.item()
                    data_num += img.size(0)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                ###Validation
                if ep!=0 and ep%val_period == 0:
                    model.eval()
                    acc = 0
                    val_loss = 0
                    data_num  = 0
                    with torch.no_grad():
                        for idx, data in enumerate(val_loader):
                            img, target = data
                            img, target = img.to(device), target.to(device,dtype=torch.long)
                            pred = model(img)
                            val_loss += criterion(pred, target).item()
                            _,pred_class = torch.max(pred.data, 1)
                            acc += (pred_class == target).sum().item()
                            data_num += img.size(0)

                    acc /= data_num
                    val_loss /= data_num

                    ###Plateau
                    lr_scheduler.step(val_loss)
                    if optimizer.param_groups[0]['lr'] < 1e-5:
                        break                    

                    if acc >= max_acc:
                        max_acc = acc
                        min_loss = val_loss
                        best_model_dict = model.state_dict()                    

                    print("Episode:{}, Validation Loss:{},Acc:{:.4f}%,lr:{}"
                          .format(ep,val_loss,acc*100,optimizer.param_groups[0]['lr']))
            print("===================Best Fold:{} Saved Loss:{} Acc:{}==================".format(fold,min_loss,max_acc))
            path = "{}/S{}_Fold{}_loss{:.4f}_acc{:.3f}".format(dir_name,step,fold,min_loss*1e3,max_acc*1e2)
            torch.save(best_model_dict,path)
            print("======================================================")
            del model
            torch.cuda.empty_cache()

#         step += 1
#         ensemble_root = dir_name
#         dig_csv_name = Infer_and_gen_dig_dataset(step,ensemble_root)
#         pseu_csv_name = pseudo_labeling(step,ensemble_root)
#         new_csv_name = combine_csv(step,dig_csv_name,pseu_csv_name)
#         global_data = pd.read_csv(new_csv_name)
#         print("Set global data to:",new_csv_name)
#         print("Set ensemble root to:",ensemble_root)

start inference & gernerate Dig dataset...
model num: 5
Inference finished: 10240
(10240, 3) (10240,)
Top1 data num: 9340
Save npy as: ./dataset_final/iterative_trained/digidx_9340_s1.npy
shape of top1_dig_csv: (9340, 785)
Save dig csv as: ./dataset_final/iterative_trained/digtop1_9340_s1.csv
Dig csv finished
Start pseudo_labeling...
model num: 5
Inference complete: (5000,)
test csv shape: (5000, 785)
Save test csv as: ./dataset_final/iterative_trained/test_pseu_s1.csv
pseudo label finished
start combining...
shape of digtop1_csv: (9340, 785)
shape of test_csv: (5000, 785)
shape of new csv: (74340, 785)
Save new csv as: ./dataset_final/iterative_trained/train_pseu_dig_74340_s1.csv

Set global data to: ./dataset_final/iterative_trained/train_pseu_dig_74340_s1.csv
Set ensemble root to: Kmnist_saved_model/final_submit/adam2/origin_60k_5fold
Step: 1
global_data len: 74340
validation rate: 0.2
Fold: 5
Episode:1, Validation Loss:0.003666151201721799,Acc:27.7711%,lr:0.001
Episode:2, Validatio

# Confusion Matrix

In [None]:
import seaborn as sn
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

###Confusion matrix
print(classification_report(labels, result,digits=4))
plt.figure(figsize=(10,10))
confusion_mat = confusion_matrix(labels, result)
sn.heatmap(confusion_mat, annot=True, cmap='YlGnBu',fmt="d",linewidths=.5, linecolor='w')
plt.title('Confusion matrix of Real World validation result')
plt.ylabel('True label')
plt.xlabel('Predicted label')


# Show data distribution

In [None]:
global_data = pd.read_csv("./dataset/train.csv")
global_data_test = pd.read_csv("./dataset/train_test_psuedo_65k.csv")

train_digtop1= pd.read_csv("./dataset/digtop1_9548.csv")
plt.hist(train_digtop1 ,density=0,label=True,rwidth=0.3)
plt.xticks(range(0,10))
