In [1]:
import os, time
import numpy as np
import torch
from torchvision import transforms, datasets
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from torchsummary import summary
device = "cuda"


In [2]:
import torch.nn as nn
import torch.nn.functional as F
class convNet(nn.Module):
    def __init__(self,in_channels):
        super(convNet,self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
        #                dilation=1, groups=1, bias=True, padding_mode='zeros')
        self.c1 = nn.Conv2d(in_channels=in_channels, out_channels=64,kernel_size=5,stride=1,padding=2)
        self.bn1 = nn.BatchNorm2d(num_features=64,momentum=0.1)
        self.c2 = nn.Conv2d(64,64,5,1,2)
        self.bn2 = nn.BatchNorm2d(num_features=64,momentum=0.1)
        self.m1 = nn.MaxPool2d(2)
        self.d1 = nn.Dropout(0.2)
        
        self.c3 = nn.Conv2d(64,128,5,1,2)
        self.bn3 = nn.BatchNorm2d(128,0.1)
        self.c4 = nn.Conv2d(128,128,5,1,2)
        self.bn4 = nn.BatchNorm2d(128,0.1)
        self.m2 = nn.MaxPool2d(2)
        self.d2 = nn.Dropout(0.2)
        
        self.c5 = nn.Conv2d(128,256,3,1,1)
        self.bn5 = nn.BatchNorm2d(256,0.1)
        self.c6 = nn.Conv2d(256,256,3,1,1)
        self.bn6 = nn.BatchNorm2d(256,0.1)
        self.m3 = nn.MaxPool2d(2)
        self.d3 = nn.Dropout(0.2)
        
        self.fc1 = nn.Linear(256*3*3,256)
        self.out = nn.Linear(256,10)

    def forward(self,x):
        x = F.leaky_relu(self.bn1(self.c1(x)),negative_slope=0.1)
        x = F.leaky_relu(self.bn2(self.c2(x)),0.1)
        x = self.m1(x)
        x = self.d1(x)
        
        x = F.leaky_relu(self.bn3(self.c3(x)),0.1)
        x = F.leaky_relu(self.bn4(self.c4(x)),0.1)
        x = self.m2(x)
        x = self.d2(x)
        
        x = F.leaky_relu(self.bn5(self.c5(x)),0.1)
        x = F.leaky_relu(self.bn6(self.c6(x)),0.1)
        x = self.m3(x)
        x = self.d3(x)
        
        x = x.view(-1, 256*3*3) #reshape
        x = F.leaky_relu(self.fc1(x),0.1)
        return self.out(x)

In [3]:
def get_model():
    #Advance model
    # model_name = 'efficientnet-b0'
    # image_size = EfficientNet.get_image_size(model_name)
    # model = EfficientNet.from_pretrained(model_name, num_classes=10)
    # model = model.to(device)

    #Basic cnn
    model = convNet(in_channels=1)
    model.cuda()
    # model.load_state_dict(torch.load("./Kmnist_saved_model/ep20_acc0.9910"))

    #pretrained model
#     model = models.resnet18()
#     model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)
#     model.cuda()
# #     summary(model, input_size=(1, 28, 28))
    return model


In [46]:
from imgaug import augmenters as iaa
# from imgaug.augmentables.segmaps import SegmentationMapOnImage

class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
#         iaa.Scale((640, 480)),
#         iaa.Fliplr(0.5),
            
#         iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.6))),
        iaa.Sometimes(0.1, iaa.AverageBlur(1.2)),
        iaa.Sometimes(0.5, iaa.Affine(rotate=(-35, 35),order=[0, 1],translate_px={"x":(-3, 3),"y":(-4,4)},mode='symmetric')),
        iaa.Sometimes(0.5,iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.25))),
        iaa.Sometimes(0.1, iaa.SaltAndPepper(0.05,False)),
        iaa.Invert(0.5),
#         iaa.Add((-5, 5)), # change brightness of images (by -10 to 10 of original value)
#         iaa.AdditiveGaussianNoise(-1,1)
        iaa.Sometimes(0.2,iaa.GammaContrast(2))
            
#         iaa.AddToHueAndSaturation(from_colorspace="GRAY",value=(-20, 20))  #Hue-> color, saturation -> saido
    ])
    def __call__(self, img, mask=None):
        img = np.array(img)        
        return self.aug.augment_image(image=img)
#         return self.aug(image=img, segmentation_maps=label)

In [7]:
trans = transforms.Compose([
#         transforms.RandomResizedCrop(28),
        transforms.ColorJitter(0.9,0.2,0.2,0.5),
        transforms.RandomAffine(degrees=10,translate=(0.2,0.2),scale=[0.7,1.1],shear=15),
#         ImgAugTransform(),
#         lambda x: Image.fromarray(x),
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1  
#         transforms.Normalize(mean=[0.06525399], std=[0.20466233])  #comment this line when calculate ditribution
        transforms.Normalize(mean=[0.1126489],std=[0.28132638]) ################Temp Revised Caution##############
    ])

trans_val = transforms.Compose([
        transforms.ToTensor(),  #Take Image as input and convert to tensor with value from 0 to1
        transforms.Normalize(mean=[0.1126489],std=[0.28132638])  #dig_val distribution
#         transforms.Normalize(mean=[0.06525399], std=[0.20466233])  #comment this line when calculate ditribution    
    ])

trans_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.06525399], std=[0.20466233])  #average distribution between train and test

])

global_data = pd.read_csv("./dataset/train.csv")
global_dig_val_data = pd.read_csv("./dataset/Dig-MNIST.csv")

class KMnistDataset(Dataset):
    def __init__(self,data_len=None, is_validate=False,validate_rate=None,indices=None):
        self.is_validate = is_validate
#         self.data = global_data
        self.data = global_dig_val_data    ################Temp Revised Caution##############
        
#         print("data shape:", np.shape(self.data))
        if data_len == None:
            data_len = len(self.data)
        
        self.indices = indices
        if self.is_validate:
            self.len = int(data_len*validate_rate)
            self.offset = int(data_len*(1-validate_rate))
            self.transform = trans_val
        else:
            self.len = int(data_len*(1-validate_rate))
            self.offset = 0
            self.transform = trans
        
    def __getitem__(self, idx):
        idx += self.offset
        idx = self.indices[idx]
#         print(idx)
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        label = self.data.iloc[idx, 0]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, label

    def __len__(self):
        return self.len

class DigValDataset(Dataset):
    def __init__(self,data_len=None,indices=None):
        self.data = global_dig_val_data
        print("data shape:", np.shape(self.data))
        if data_len == None:
            data_len = len(self.data)
        self.indices = indices
        self.len = int(data_len)
        self.transform = trans_val
        
    def __getitem__(self, idx):
        idx = self.indices[idx]
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        label = self.data.iloc[idx, 0]  #(num,)
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        label = torch.as_tensor(label, dtype=torch.uint8)    #value: 0~9, shape(1)
        return img, label    
    
    def __len__(self):
        return self.len    
    
    
class TestDataset(Dataset):
    def __init__(self,data_len=None):
        self.data = pd.read_csv("./dataset/test.csv")
        print("data shape:", np.shape(self.data))
        self.transform = trans_test
        if data_len == None:
            self.len = len(self.data)
        
    def __getitem__(self, idx):
        img = self.data.iloc[idx, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
        img = Image.fromarray(img)
        img = self.transform(img)     #value: 0~1, shape:(1,28,28)
        return img, torch.Tensor([])

    def __len__(self):
        return self.len
    

In [12]:
# train distribution: [0.06464077] [0.20316151]
# test distribution: [0.0726126] [0.22267213]
# dist1 = np.array([0.06464077,0.20316151])
# dist2 = np.array([0.0726126,0.22267213])
# dist = (dist1*60000 + dist2*5000)/65000
# print(dist)

def get_dataset_mean_std(dataloader):
    print("Calculate distribution:")
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in dataloader:
        img, label = data
        img, label = img.to(device), label.to(device)
        batch_samples = img.size(0)
        img = img.contiguous().view(batch_samples, img.size(1), -1)
        mean += img.mean(2).sum(0)
        std += img.std(2).sum(0)
        nb_samples += batch_samples
        if nb_samples%5120 == 0:
            print("Finished:", nb_samples)
            
    print("num of samples:",nb_samples)
    mean /= nb_samples
    std /= nb_samples
#     print("Average mean:",mean)
#     print("Average std:", std)
    return mean.cpu().numpy(), std.cpu().numpy()

In [8]:
def get_kfold_dataset_loader(k=5,val_rate=0.1,indices_len=None, batch_size=None,num_workers=None, dig_val=False):
    ###Return [list of train dataset_loader, list of val dataset_loader]
    train_loader_list = []
    val_loader_list = []
    for i in range(k):
        indices = np.arange(indices_len)
        np.random.shuffle(indices)
                    
        train_dataset = KMnistDataset(data_len=None,is_validate=False, validate_rate=val_rate,indices=indices)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        
        if dig_val==True:
            indices = np.arange(10240)
            val_dataset = DigValDataset(data_len=None,indices=indices)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        else:
            val_dataset = KMnistDataset(data_len=None,is_validate=True, validate_rate=val_rate, indices=indices)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        
        train_loader_list.append(train_loader)
        val_loader_list.append(val_loader)
        
    return train_loader_list, val_loader_list


In [11]:
batch_size = 512
num_workers = 0
vr = 0.075
k = 3
# indices_len = 60000
indices_len = 10240  ################Temp Revised Caution##############

###Single dataset
# indices = np.arange(indices_len)
# train_dataset = KMnistDataset(data_len=None,is_validate=False, validate_rate=vr,indices=indices)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(train_loader)
# print("train distribution:",mean, std)

# indices = np.arange(10240)
# dig_val_dataset = DigValDataset(data_len=None,indices=indices)
# dig_val_loader = DataLoader(dig_val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(dig_val_loader)
# print("validate distribution:",mean, std)

# test_dataset = TestDataset(data_len=None)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# mean, std = get_dataset_mean_std(test_loader)
# print("test distribution:",mean, std)

###K-fold dataset
train_loaders, val_loaders = get_kfold_dataset_loader(k, vr, indices_len, batch_size, num_workers, dig_val=True)


data shape: (10240, 785)
data shape: (10240, 785)
data shape: (10240, 785)


In [None]:
data = pd.read_csv("./dataset/train.csv")

for i in range(1000,1050):
    fig, axes = plt.subplots(1,2,figsize=(8,4))
    img2 = data.iloc[i, 1:].values.astype(np.uint8).reshape((28, 28))  #value: 0~255
    img2 = Image.fromarray(img2)
    axes[0].imshow(img2,cmap="gray")
    img2 = trans(img2).cpu().numpy().reshape(28,28)
    axes[1].imshow(img2,cmap="gray")
    plt.pause(.1)

kmnist_dataset.data.head(100)

type(kmnist_dataset.data.iloc[20,1:].values)  #numpy ndarray
type(kmnist_dataset.data.iloc[20,0])  #numpy int64
kmnist_dataset.data.head(5)

In [None]:
lr = 1e-2
ep = 300
optimizer = torch.optim.SGD(model.parameters(),lr=lr)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=50,T_mult=1,eta_min=1e-6) #original 
# lr_scheduler = CosineAnnealingWarmUpRestarts(optimizer,T_0=20,T_mult=3,eta_max=lr,T_up=10)  #advance
plt.figure()
x = list(range(ep))
y = []
for epoch in range(ep):
    lr_scheduler.step()
    lr = lr_scheduler.get_lr()
    y.append(lr_scheduler.get_lr()[0])
plt.plot(x, y)

In [None]:
if __name__ == "__main__":
    epochs = 120
    period = 40
    ensemble_models = []
    lr = 1e-3
    val_period = 1
    
    print("Fold:",len(train_loaders))
    
    for fold in range(len(train_loaders)):
        train_loader = train_loaders[fold]
        val_loader = val_loaders[fold]
        
        model = get_model()
        torch.cuda.empty_cache()    #Need further check
            
        max_acc = 0
        min_loss = 10000
        best_model_dict = None
        data_num = 0
        loss_avg = 0
#         optimizer = torch.optim.SGD(model.parameters(),lr=lr)
#         optimizer = torch.optim.RMSprop(model.parameters(),lr=lr,alpha=0.9)
        optimizer = torch.optim.Adam(model.parameters(),lr=lr,betas=(0.9,0.99))
#         optimizer = torch.optim.Adagrad(model.parameters(),lr=lr)
#         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=period,T_mult=1,eta_min=1e-5) #original 
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=15)
        criterion = torch.nn.CrossEntropyLoss()
        
        tmp_count = 0
        for ep in range(0,epochs+1):
            model.train()
            for idx, data in enumerate(train_loader):
                img, target = data
                img, target = img.to(device), target.to(device,dtype=torch.long)
    #             print(np.shape(img),np.shape(target)) #Tensor(4,1,28,28), Tensor(4)
    #             print(np.max(img.cpu().numpy()),np.min(img.cpu().numpy())) #1.0 0.0
                pred = model(img)
    #             print(pred.size())   #(32,10)
    #             print(target.size()) #(32,)
                ###Input shape must be pred:, target:
                loss = criterion(pred,target)
                loss_avg += loss.item()
                data_num += img.size(0)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            ###Cosine annealing
#             lr_scheduler.step()

            ###Evaluate Train Loss 
#             if ep%2 == 0:
#                 loss_avg /= data_num
#                 print("Ep:{}, loss:{}, lr:{}".format(ep, loss_avg,optimizer.param_groups[0]['lr']))
#                 loss_avg = 0
#                 data_num = 0

            ###Validation
            if ep!=0 and ep%val_period == 0:
                model.eval()
                acc = 0
                val_loss = 0
                data_num  = 0
                with torch.no_grad():
                    for idx, data in enumerate(val_loader):
                        img, target = data
                        img, target = img.to(device), target.to(device,dtype=torch.long)
                        pred = model(img)
                        val_loss += criterion(pred, target)
                        # print(pred)
                        _,pred_class = torch.max(pred.data, 1)
    #                     print(pred_class)
                        acc += (pred_class == target).sum().item()
                        data_num += img.size(0)

                acc /= data_num
                val_loss /= data_num

                ###Plateau
                lr_scheduler.step(val_loss)
                if optimizer.param_groups[0]['lr'] < 1e-5:
                    break                    

                if acc >= max_acc:
                    max_acc = acc
                    best_model_dict = model.state_dict()
                
                if val_loss <= min_loss:
                    min_loss = val_loss
#                     best_model_dict = model.state_dict()
                
                print("Episode:{}, Validation Loss:{}, Acc:{:.4f}% lr:{:.4f}".format(ep,val_loss,acc*100,optimizer.param_groups[0]['lr']))
            
            if ep!=0 and ep%10 == 0:
                torch.save(best_model_dict, "./Kmnist_saved_model/Fold{}_current_acc{:.4f}".format(fold,max_acc))
            
            
        ###K-Fold ensemble: Saved k best model for k dataloader
        print("===================Best Fold:{} Saved, Acc:{}==================".format(fold,max_acc))
        torch.save(best_model_dict, "./Kmnist_saved_model/Fold{}_loss{:.3f}_acc{:.3f}".format(fold,min_loss*1e3,max_acc*1e2))
        print("======================================================")

            
            ###Snapshot ensemble: saved model
#             if ep!=0 and ep%period == 0:
# #                 ensemble_models.append(best_model_dict)
#                 model_id = ep//period
#                 print("===================Best Model{} Saved, Acc:{}==================".format(model_id,max_acc))
#                 torch.save(best_model_dict, "./Kmnist_saved_model/model{}_ep{}_acc{:.4f}".format(model_id,ep,max_acc))
#                 print("======================================================")
#                 max_acc = 0
        
# if __name__ == "__main__":
#     main()
    

# Ensemble inference

In [None]:
ensemble_root = "/home/ccchang/localization_net/Kmnist_saved_model/emsemble/5_fold_ep80_lr1e-2"
ensemble_models = []
epochs = 500
period = 100
model_num = epochs//period
model = 5
data_num = 0
acc = 0

for file_name in os.listdir(ensemble_root):
    model = convNet(in_channels=1)
    model.cuda()
    model.load_state_dict(torch.load("{}/{}".format(ensemble_root,file_name)))
    model.eval()
    ensemble_models.append(model)

with torch.no_grad():
    for idx, data in enumerate(validate_loader):
        img, target = data
        img, target = img.to(device), target.to(device,dtype=torch.long)

        ###Single model
#         pred = model(img)
#         _,pred_class = torch.max(pred.data, 1)
        
        ###Average Ensemble
#         pred_list = torch.Tensor([]).to(device)
#         for i in range(model_num):
#             pred = ensemble_models[i](img) #(batch_num,10)
#             pred_list = torch.cat((pred_list,pred.unsqueeze(2)),dim=2) #pred_list: (batch_num,10,model_num)
#         pred = torch.mean(pred_list,dim=2)   #(batch,10)
        
#         _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
#         val_loss += criterion(pred, target)

        ###Voting Ensemble
        pred_list = torch.LongTensor([]).to(device)
        for i in range(model_num):
            pred = ensemble_models[i](img) #(batch_num,10)
            _,pred_class = torch.max(pred.data, 1)   #(batch_num,)
            pred_list = torch.cat((pred_list,pred_class.unsqueeze(1)),dim=1)
            
        pred_class_list = torch.LongTensor([]).to(device)
        for i in range(img.size(0)):
            pred_np = pred_list[i].cpu().numpy()
            unique_class,count = np.unique(pred_np,return_counts=True)
            unique_class = np.array(unique_class[np.argmax(count)]).reshape(-1)   #unique class shape(1,)
            class_voted= torch.from_numpy(unique_class).to(device)    #(1,)
            pred_class_list = torch.cat((pred_class_list,class_voted))    
    
#         acc += (pred_class == target).sum().item()
        acc += (pred_class_list == target).sum().item()
        data_num += img.size(0)

#     val_loss /= data_num
    acc /= data_num
    print("Acc:{:.4f}%".format(acc*100))


In [None]:
self.data = global_dataimport torch
import numpy as np

t1 = torch.Tensor([[1,2,3,4],[4,3,2,1],[1,5,3,3]])  #(3,4)
t1 = t1.unsqueeze(2)

t_list = torch.Tensor([])

for i in range(3):
    t_list = torch.cat((t_list,t1),dim=2)

print(t_list.size())
print(t_list)
t_list = torch.mean(t_list,dim=2)
print(t_list.size())
print(t_list)


# n1 = t1.cpu().numpy()

# n1, count = np.unique(n1,return_counts=True,axis=0)
# print(count)
# n1 = np.argmax(count)

