In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from tensorflow import keras
import tensorflow as tf
import skimage

In [10]:
import numpy as np
import pandas as pd

train_data = np.genfromtxt('/kaggle/input/digit-recognizer/train.csv', delimiter=',')

In [11]:
train_labels = train_data[1:,0]
train_data = train_data[1:,1:]

In [12]:
print(train_data.shape)

(42000, 784)


In [13]:
train_data = train_data.reshape(-1,28,28,1)
print(train_data.shape)

(42000, 28, 28, 1)


In [19]:
training_array = keras.utils.to_categorical(train_labels, 10)

In [22]:
print(training_array.shape)
print(train_labels)

(42000, 10)
[1. 0. 1. ... 7. 6. 9.]


In [50]:
class DataGen(keras.utils.Sequence):

    def __init__(self, x_arr, y, batch_size):
        self.x = x_arr
        self.y = y
        self.batch_size = batch_size
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        i = idx * self.batch_size
        batch_imgs = self.x[i:i + self.batch_size,:,:]
        batch_labs = self.y[i:i + self.batch_size,:]
        im = np.zeros((self.batch_size,) + (28, 28) , dtype="float32")
        la = np.zeros((self.batch_size,10))
        j = 0
        for j in np.arange(batch_imgs.shape[0]):
            img_array = np.squeeze(batch_imgs[j,:,:])
            # Perform random data augmentation
            rand_nums = np.random.rand(2,2)
            if rand_nums[0,0]>0.5:
                # flip
                if rand_nums[0,1]>0.5:
                    img_array = np.fliplr(img_array)
                else:
                    img_array = np.flipud(img_array)
            if rand_nums[1,0]>0.5:
                # rotate
                if rand_nums[1,1]>0.5:
                    img_array = skimage.transform.rotate(img_array, 30)
                else:
                    img_array = skimage.transform.rotate(img_array, 330)
            # Perform min/max normalization
            img_array = (img_array - np.min(img_array))/(np.max(img_array)-np.min(img_array))
            #
            im[j] = img_array
            im = np.array(im)
            la[j] = batch_labs[j,:]
            la = np.array(la)
        return im, la

In [51]:
val_samples = int(np.floor(train_data.shape[0]*0.3))

train_imgs = train_data[:-val_samples,:,:]
train_labs = training_array[:-val_samples,:]

val_imgs = train_data[-val_samples:,:,:]
val_labs = training_array[-val_samples:,:]

train_gen = DataGen(train_imgs, train_labs, 64)
valid_gen = DataGen(val_imgs, val_labs, 64)

In [34]:
from keras import Input, layers, Model

def get_model(img_size):

    inputs = Input(shape=img_size + (1,),dtype=tf.float16)
    print(inputs.shape)

    # [First half of the network: downsampling inputs]

    # Entry block
    e1 = layers.Conv2D(16, 3, strides=1, padding="same", input_shape=(1, 28, 28, 1))(inputs)
    e2 = layers.BatchNormalization()(e1)
    e3 = layers.Activation("relu")(e2)

    e4 = layers.Conv2D(16,3, strides=1, padding='same')(e3)
    e5 = layers.BatchNormalization()(e4)
    e6 = layers.Activation("relu")(e5)

    pool_e = layers.MaxPool2D(pool_size=2, strides=2, padding='same')(e6)

    #Down Block 1
    db1conv1 = layers.Conv2D(32,3, strides=1, padding='same')(pool_e)
    db1bn1 = layers.BatchNormalization()(db1conv1)
    db1act1 = layers.Activation("relu")(db1bn1)

    db1conv2 = layers.Conv2D(32,3, strides=1, padding='same')(db1act1)
    db1bn2 = layers.BatchNormalization()(db1conv2)
    db1act2 = layers.Activation("relu")(db1bn2)

    pool_1 = layers.MaxPool2D(pool_size=2, strides=2, padding='same')(db1act2)

    #Down Block 2
    db2conv1 = layers.Conv2D(64,3, strides=1, padding='same')(pool_1)
    db2bn1 = layers.BatchNormalization()(db2conv1)
    db2act1 = layers.Activation("relu")(db2bn1)

    db2conv2 = layers.Conv2D(64,3, strides=1, padding='same')(db2act1)
    db2bn2 = layers.BatchNormalization()(db2conv2)
    db2act2 = layers.Activation("relu")(db2bn2)



    #Upsampling Block 1
    up1up = layers.UpSampling2D(size=2)(db2act2)

    up1conc = layers.concatenate([up1up, db1act2], axis=-1)

    up1conv1 = layers.Conv2D(32,3,strides=1, padding="same")(up1conc)
    up1bn1 = layers.BatchNormalization()(up1conv1)
    up1act1 = layers.Activation("relu")(up1bn1)

    up1conv2 = layers.Conv2D(32,3,strides=1, padding="same")(up1act1)
    up1bn2 = layers.BatchNormalization()(up1conv2)
    up1act2 = layers.Activation("relu")(up1bn2)

    #Upsampling Block 0
    up0up = layers.UpSampling2D(size=2)(up1act2)

    up0conc = layers.concatenate([up0up, e6], axis=-1)

    up0conv1 = layers.Conv2D(16,3,strides=1, padding="same")(up0conc)
    up0bn1 = layers.BatchNormalization()(up0conv1)
    up0act1 = layers.Activation("relu")(up0bn1)

    up0conv2 = layers.Conv2D(16,3,strides=1, padding="same")(up0act1)
    up0bn2 = layers.BatchNormalization()(up0conv2)
    up0act2 = layers.Activation("relu")(up0bn2)

    # Exit Layer
    econv = layers.Conv2D(1, 1, data_format="channels_last")(up0act2)
    
    flat = keras.layers.Flatten()(econv)
    outputs = keras.layers.Dense(10, activation='softmax')(flat)

    model = Model(inputs, outputs)
    return model

In [35]:
model = get_model((28,28))
a=model.summary(line_length=150)

(None, 28, 28, 1)
Model: "model_1"
______________________________________________________________________________________________________________________________________________________
Layer (type)                                     Output Shape                     Param #           Connected to                                      
input_7 (InputLayer)                             [(None, 28, 28, 1)]              0                                                                   
______________________________________________________________________________________________________________________________________________________
conv2d_31 (Conv2D)                               (None, 28, 28, 16)               160               input_7[0][0]                                     
______________________________________________________________________________________________________________________________________________________
batch_normalization_35 (BatchNormalization)      (None, 28,

In [52]:
import time, math
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, epsilon=0.01), loss=['categorical_crossentropy'],metrics = ['categorical_accuracy'])
epochs = 300

callbacks = [
    keras.callbacks.EarlyStopping(patience=20, verbose=1),
    keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=10, min_lr=0.00000001, verbose=1),
    keras.callbacks.ModelCheckpoint("test1", verbose=1, save_best_only=True)
]
start = time.time()
history = model.fit(train_gen, epochs=epochs, validation_data=valid_gen, callbacks=callbacks,shuffle=True)
end = time.time()
print('Training time: ', end-start)

2023-01-05 21:28:54.035208: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/300

Epoch 00001: val_loss improved from inf to 0.87445, saving model to test1


2023-01-05 21:30:09.406278: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


Epoch 2/300

Epoch 00002: val_loss improved from 0.87445 to 0.32509, saving model to test1
Epoch 3/300

Epoch 00003: val_loss did not improve from 0.32509
Epoch 4/300

Epoch 00004: val_loss improved from 0.32509 to 0.22493, saving model to test1
Epoch 5/300

Epoch 00005: val_loss improved from 0.22493 to 0.18098, saving model to test1
Epoch 6/300

Epoch 00006: val_loss did not improve from 0.18098
Epoch 7/300

Epoch 00007: val_loss improved from 0.18098 to 0.17281, saving model to test1
Epoch 8/300

Epoch 00008: val_loss improved from 0.17281 to 0.14969, saving model to test1
Epoch 9/300

Epoch 00009: val_loss did not improve from 0.14969
Epoch 10/300

Epoch 00010: val_loss did not improve from 0.14969
Epoch 11/300

Epoch 00011: val_loss improved from 0.14969 to 0.12580, saving model to test1
Epoch 12/300

KeyboardInterrupt: 