In [1]:
import numpy as np
from my_cnn.CNN import Classifier
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
train_data = pd.read_csv('data/mnist_train.csv')
y_train = train_data['label'].astype(np.int8)
X_train = train_data.drop(['label'], axis=1).to_numpy().reshape((42000, 28, 28))
X_train = (X_train / 255).astype(np.float32)

In [5]:
architecture = 'conv pool lin'
n_classes = len(set(y_train))
layer_params = [(6, 7), (4,), (n_classes)]
input_size = X_train[0].shape

cfr = Classifier(architecture, layer_params, n_classes, input_size)

In [None]:
cfr.fit(X_train, y_train, learning_rate=1e-3, n_epochs=10)

In [6]:
import pickle

with open('model3.pkl', 'wb') as file:
    pickle.dump(cfr, file)

In [7]:
n_samples = len(X_train)
start = 0
samples = range(start, start + n_samples)
correct = 0
for sample in samples:
    print(f'Sample {sample}, accuracy {correct / (sample + 1e-6)}', end='\r')
    out = cfr.predict(X_train[sample])
    true_label = y_train[sample]
    correct += out == true_label

Sample 9881, accuracy 0.9755085516672899

KeyboardInterrupt: 

In [3]:
import pickle
with open('model3.pkl', 'rb') as file:
    cfr2 = pickle.load(file)

In [4]:
cfr2.predict(X_train)

KeyboardInterrupt: 

In [8]:
test_data = pd.read_csv('data/mnist_test.csv')
len(test_data)

28000

In [9]:
X_test = test_data.to_numpy().reshape((len(test_data), 28, 28))
X_test = (X_test / 255).astype(np.float32)

In [12]:
prediction = cfr2.predict(X_test)

In [13]:
y_pred = pd.DataFrame(prediction, columns=['Label'])
y_pred.index += 1
y_pred['Label'].to_csv('data/submission.csv', index_label='ImageId')

In [43]:
def asStride(arr,sub_shape,stride):
    '''Get a strided sub-matrices view of an ndarray.
    See also skimage.util.shape.view_as_windows()
    '''
    s0,s1=arr.strides[:2]
    m1,n1=arr.shape[:2]
    m2,n2=sub_shape
    view_shape=(1+(m1-m2)//stride[0],1+(n1-n2)//stride[1],m2,n2)+arr.shape[2:]
    strides=(stride[0]*s0,stride[1]*s1,s0,s1)+arr.strides[2:]
    subs=np.lib.stride_tricks.as_strided(arr,view_shape,strides=strides)
    return subs

def poolingOverlap(mat,ksize,stride=None,method='max',pad=False):
    '''Overlapping pooling on 2D or 3D data.

    <mat>: ndarray, input array to pool.
    <ksize>: tuple of 2, kernel size in (ky, kx).
    <stride>: tuple of 2 or None, stride of pooling window.
              If None, same as <ksize> (non-overlapping pooling).
    <method>: str, 'max for max-pooling,
                   'mean' for mean-pooling.
    <pad>: bool, pad <mat> or not. If no pad, output has size
           (n-f)//s+1, n being <mat> size, f being kernel size, s stride.
           if pad, output has size ceil(n/s).

    Return <result>: pooled matrix.
    '''

    m, n = mat.shape[:2]
    ky,kx=ksize
    if stride is None:
        stride=(ky,kx)
    sy,sx=stride

    _ceil=lambda x,y: int(np.ceil(x/float(y)))

    if pad:
        ny=_ceil(m,sy)
        nx=_ceil(n,sx)
        size=((ny-1)*sy+ky, (nx-1)*sx+kx) + mat.shape[2:]
        mat_pad=np.full(size,np.nan)
        mat_pad[:m,:n,...]=mat
    else:
        mat_pad=mat[:(m-ky)//sy*sy+ky, :(n-kx)//sx*sx+kx, ...]

    view=asStride(mat_pad,ksize,stride)

    if method=='max':
        result=np.nanmax(view,axis=(2,3))
    else:
        result=np.nanmean(view,axis=(2,3))

    return result