In [None]:
# installations
!pip install kaggle
!pip install keras
!pip install tensorflow
!pip install matplotlib
!pip install pandas

In [None]:
# imports
import zipfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.utils import to_categorical

In [None]:
# download dataset
!kaggle competitions download -c mais202-fall2019

In [None]:
# extract the files from the zip folder into an unzipped folder
archive = zipfile.ZipFile('mais202-fall2019.zip', 'r')
archive.extractall('./mais202-fall2019/')

In [None]:
# read the npy files
train_images = np.load('./mais202-fall2019/train_images.npy')
test_images = np.load('./mais202-fall2019/test_images.npy')

In [None]:
# read the train labels
labels = pd.read_csv('./mais202-fall2019/train_labels.csv')
train_labels = to_categorical(np.array(labels.iloc[:, 1]))

# test
print(train_labels[0])

In [None]:
# read the label to string mapping
labels_to_string = pd.read_csv('./mais202-fall2019/label_int_to_str_mapping.csv')
label_mapping = np.array(labels_to_string.iloc[:, 0:2])

# test
print(label_mapping[0])

In [None]:
# function to visualize an image
def show_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d, interpolation='nearest')
    plt.show()
    
# visualize one image
show_image(train_images[0])

In [None]:
# test
print(train_images.shape, '\n\n', train_images[0])

In [None]:
# normalize the images
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

In [None]:
# reshape the images
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

In [None]:
# test
print(train_images.shape, '\n\n', train_images[0])

In [None]:
# test
print(test_images.shape, '\n\n', test_images[0])

In [None]:
num_filters = 8
filter_size = 3
pool_size = 2

# build the model
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

In [None]:
# compile the model
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

In [None]:
# train the model
model.fit(
  train_images,
  train_labels,
    epochs=50
)

In [None]:
# predict
test_predictions = model.predict(test_images)
print(test_predictions.shape)

In [None]:
# function that takes an array showing the categorical result and puts it back in decimal form
def to_uncategorical(arr):
    copy = np.copy(arr)
    for i in range(copy.size):
        copy[i] *= i
        
    return int(round(np.sum(copy)))
    
# test
print(test_predictions[0])
print(to_uncategorical(test_predictions[0]))

In [None]:
# function that calls to_uncategorical(arr) on every array in predictions
def to_uncategorical_arr(arr):
    # x is number of trials; y is number of classification categories
    x, y = arr.shape
    print(x)
    classified = []
    for i in range(x):
        classified.append(to_uncategorical(arr[i]))
    return classified
    
# test
print(to_uncategorical_arr(test_predictions))

In [None]:
# output to csv
n, m = test_predictions.shape
d = {'ID': range(n), 'label': to_uncategorical_arr(test_predictions)}
data_frame = pd.DataFrame(data=d)
print(data_frame)

data_frame.to_csv('./mais202-fall2019/test_predictions.csv', index=False)