In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline
import albumentations as A
import albumentations.pytorch
import random

In [29]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [30]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device: {}'.format(device))

device: cuda


In [31]:
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

## EDA

In [32]:
train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [33]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
Y_train = train_data['label']
X_train = train_data.drop(['label'], axis=1)
print(Y_train)
print(X_train)

In [None]:
X_train.isnull().any().describe()

## Data Preprocessing

In [None]:
X_train = X_train 
test_data = test_data 

print(X_train.max())

In [None]:
print(test_data.max())
print(test_data.min())

convert dataframe into numpy array

In [None]:
X_train = X_train.values
Y_train = Y_train.values
test_data = test_data.values

In [None]:
X_train = X_train.reshape(-1,28,28,1)
test_data = test_data.reshape(-1,28,28,1)

print(X_train.shape)
print(test_data.shape)

In [None]:
def visualize(image):
    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.imshow(image)

In [None]:
print(X_train[0].shape)
raw_image = X_train[0]
plt.imshow(raw_image)

In [None]:
channel3_image = X_train[0]
channel3_image = np.repeat(channel3_image,3,axis=2)
print(channel3_image.shape)
plt.imshow(channel3_image)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,16))
print(type(ax))
print(ax.shape)
print(ax)
ax[0].imshow(raw_image)
ax[0].set_title('initial')
ax[1].imshow(channel3_image)
ax[1].set_title('transformed')

In [None]:
image = X_train[0]
image = image.astype(dtype=np.uint8)
transform = A.Compose([
    A.ShiftScaleRotate(p=0.5),
    A.Normalize(mean=(0.485,), std=(0.229,)),
    #A.pytorch.ToTensorV2()
                      ])
plt.imshow(transform(image=image)['image'])

In [None]:
print(image.shape)
print(image.min())
print(image.max())

In [None]:
transformed_image=transform(image=image)['image']
print(transformed_image.min())
print(transformed_image.max())
print(type(transformed_image))
print(transformed_image.shape)

In [None]:
class MNIST_dataset(Dataset):
    def __init__(self, x_train, y_train, transform=None):
        self.x = x
        self.y = y
        self.transform = transform
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self,idx):
        x = self.x[idx].astype(dtype=np.uint8)      
        y = torch.tensor(self.y[idx])
        if self.transform:
            x = self.transform(image = x)['image']
        return x, y
    
class MNIST_test_dataset(Dataset):
    def __init__(self, x, transform=None):
        self.x = x
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self,idx):
        x = self.x[idx].astype(dtype=np.uint8)
        if self.transform:
            x = self.transform(image = x)['image']
        return x
        

In [None]:
train_dataset = MNIST_dataset(X_train, Y_train, transform = A.Compose([ 
    A.ShiftScaleRotate(p=0.5),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    A.pytorch.ToTensorV2()
]))
valid_dataset = MNIST_dataset(X_train, Y_train, transform = A.Compose([ 
    A.Normalize(mean=(0.5,), std=(0.5,)),
    A.pytorch.ToTensorV2()
]))
test_dataset  = MNIST_test_dataset(test_data, transform = A.Compose([ 
    A.Normalize(mean=(0.5,), std=(0.5,)),
    A.pytorch.ToTensorV2 ()
]))

test_dataloader  = DataLoader(dataset = test_dataset,  batch_size = 64)

## Model

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1,  out_channels=16, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1)
        
        self.fc1 = nn.Linear(128*8*8,1024)
        self.fc2 = nn.Linear(1024,512)
        self.fc3 = nn.Linear(512,10)
        
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.batchnorm4 = nn.BatchNorm2d(128)
        self.drop = nn.Dropout2d(p=0.5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        
        #initilization
        nn.init.kaiming_normal_(self.conv1.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv2.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv3.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.conv4.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc3.weight, mode='fan_in', nonlinearity='relu')

        
    def forward(self,x):
        #x shape: 64*1*28*28
        out = self.conv1(x)
        out = self.batchnorm1(out)
        out = self.relu(out)
        #64*16*26*26
        
        out = self.conv2(out)
        out = self.batchnorm2(out) 
        out = self.relu(out)
        #64*32*24*24
        
        out = self.max_pool(out)
        #64*32*12*12
        
        out = self.conv3(out)
        out = self.batchnorm3(out) 
        out = self.relu(out)
        #64*64*10*10
        
        out = self.conv4(out)
        out = self.batchnorm4(out) 
        out = self.relu(out)
        #64*128*8*8
        #flatten
        out = out.view(out.size(0),-1)
        #64*128*8*8
        
        out = self.fc1(out)
        out = self.relu(out)
        out = self.drop(out)
        
        out = self.fc2(out)
        out = self.relu(out)
        out = self.drop(out)
        
        out = self.fc3(out)
        
        return out
        

In [None]:
model = CNN().to(device)

In [None]:
optimizer = optim.Adam(model.parameters(),lr=1e-3,betas=(0.9,0.999))
criterion = nn.CrossEntropyLoss()

In [None]:
def train(model, device, criterion, dataloader, optimizer):
    train_acc, train_loss = 0, 0
    model.train()
    for (images, labels) in dataloader:
        images, labels = images.float().to(device), labels.to(device)
        output = model(images)
        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * images.size(0)
        
        prediction = torch.argmax(output, dim=1)
        train_acc += (prediction==labels).sum().item()
    return train_loss, train_acc

def valid(model, device, criterion, dataloader):
    valid_acc, valid_loss = 0, 0
    model.eval()
    for images, labels in dataloader:
        images, labels = images.float().to(device), labels.to(device)
        output = model(images)
        loss = criterion(output, labels)
        
        valid_loss += loss.item() * images.size(0)
        
        prediction = torch.argmax(output,dim=1)
        valid_acc += (prediction==labels).sum().item()
    return valid_loss, valid_correct

In [None]:
splits = StratifiedKFold(n_splits=10, shuffle=True, random_state=777)

In [None]:
fold_performance = {}
history = {'train_loss': [], 'valid_loss': [],'train_acc':[],'valid_acc':[]}

total_epochs = 100
early_stopping = EarlyStopping(patience=200, verbose=True)

for fold,(train_idx, val_idx) in enumerate(splits.split(np.arange(len(train_dataset)),train_dataset.y)):
    train_sampler=SubsetRandomSampler(train_idx)
    valid_sampler=SubsetRandomSampler(val_idx)
    train_dataloader = DataLoader(dataset = train_dataset, batch_size = 64, sampler = train_sampler)
    valid_dataloader = DataLoader(dataset = valid_dataset, batch_size = 64, sampler = valid_sampler)
    for epoch in range(total_epochs):
        train_loss, train_acc = train(model, device, criterion, train_dataloader, optimizer)
        valid_loss, valid_acc = valid(model, device, criterion, valid_dataloader) 
    
        train_loss = train_loss / len(train_dataset) 
        train_acc = train_acc / len(train_dataset) * 100
        valid_loss = valid_loss / len(valid_dataset)
        valid_acc = valid_acc / len(valid_dataset) * 100
    
        print("Epoch:{}/{} AVG Training Loss: {:.4f} AVG Test Loss: {:.3f} AVG Training ACC: {:.3f}% AVG Test ACC: {:.3f}%".format(
           epoch+1, total_epochs, train_loss, valid_loss, train_acc, valid_acc
        ))
        history['train_loss'].append(train_loss)
        history['valid_loss'].append(valid_loss)
        history['train_acc'].append(train_acc)
        history['valid_acc'].append(valid_acc)
        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break
    fold_performance['fold{}'.format(fold+1)] = history

In [None]:
torch.save(model.state_dict(),'total_epoch_model.pt')

In [None]:
model.load_state_dict(torch.load('checkpoint.pt'))

In [None]:
import numpy as np

model.eval()
test_pred = torch.LongTensor()
for data in test_dataloader:
    data = data.float().to(device)
    output=model(data)
    pred = torch.argmax(output,dim=1)
    #1-dimensional tensor->pred, test_pred
    test_pred = torch.cat((test_pred,pred.cpu()), dim=0)
    

first_column = np.arange(1,len(test_dataset)+1)
first_column = first_column.reshape(-1,1)
test_pred = test_pred.reshape(-1,1)
print(first_column)
print(test_pred)
test_df = pd.DataFrame(np.concatenate((first_column,test_pred.numpy()),axis=1), columns=['ImageId', 'Label'])
print(test_df)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
epochs = np.arange(1, len(history['train_loss'])+1)
plt.figure()
plt.plot(epochs, history['train_loss'], label = 'train_loss')
plt.plot(epochs, history['valid_loss'], label = 'valid_loss')
plt.legend(loc = "upper left")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

In [None]:
epochs = np.arange(1, len(history['train_loss'])+1)
plt.figure()
plt.plot(epochs, history['train_acc'], label = 'train_acc')
plt.plot(epochs, history['valid_acc'], label = 'valid_acc')
plt.legend(loc = "upper left")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

In [None]:
test_df.to_csv('submission.csv', index=False)

In [None]:
df = pd.read_csv('./submission.csv')
print(df)
print(df['Label'].max())