# CNN on Griddified MNIST

### Load and flatten data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras

# load
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# flatten
x_train_flatten = x_train.reshape(x_train.shape[0], -1)
x_test_flatten = x_test.reshape(x_test.shape[0], -1)

### Do Griddify on train and test sets

In [2]:
from griddify import Griddify
import pickle
from pathlib import Path

path_train = Path("../../files/MNIST/Xi_train.pkl")
path_test = Path("../../files/MNIST/Xi_test.pkl")

if path_train.is_file() and path_test.is_file():
    'If it exists, we load it.'
    with open(path_train, "rb") as f:
        Xi_train = pickle.load(f)
    with open(path_test, "rb") as f:
        Xi_test = pickle.load(f)
else:
    'Otherwise, we compute it and save it to disk.'
    gf = Griddify(preprocess=False)
    gf.fit(x_train_flatten)
    Xi_train = gf.transform(x_train_flatten)
    Xi_test = gf.transform(x_test_flatten)
    
    with open(path_train, "wb+") as f:
        pickle.dump(Xi_train, f, pickle.HIGHEST_PROTOCOL)
    with open(path_test, "wb+") as f:
        pickle.dump(Xi_test, f, pickle.HIGHEST_PROTOCOL)    

### Preprocessing

In [3]:
# Scale images to the [0, 1] range
Xi_train = Xi_train.astype("float32") / 255
Xi_test = Xi_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
Xi_train = np.expand_dims(Xi_train, -1)
Xi_test = np.expand_dims(Xi_test, -1)

print("Xi_train shape:", Xi_train.shape)
print(Xi_train.shape[0], "train samples")
print(Xi_test.shape[0], "test samples")

Xi_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


### Model

In [4]:
'Model parameters'
num_classes = 10
input_shape = (28, 28, 1)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [5]:
'Build model'
from tensorflow.keras import layers

model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1600)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                1

### Train model

In [6]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
model.fit(Xi_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/15
  128/54000 [..............................] - ETA: 6:47

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-37978336b86e>", line 1, in <module>
    model.fit(Xi_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 728, in fit
    use_multiprocessing=use_multiprocessing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 324, in fit
    total_epochs=epochs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 123, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/

In [None]:
pd.DataFrame(model.history.history)[['accuracy', 'val_accuracy']].plot(title="Performance Learning Curve")

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-14857be67987>", line 1, in <module>
    pd.DataFrame(model.history.history)[['accuracy', 'val_accuracy']].plot(title="Performance Learning Curve")
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/frame.py", line 2912, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
    self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/indexing.py", line 1298, in _validate_read_indexer
    raise KeyError(f"None of [{key}] are in the [{axis_name}]"

In [None]:
pd.DataFrame(model.history.history)[['loss', 'val_loss']].plot(title="Optimization Learning Curve")

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-7acfba1bff6c>", line 1, in <module>
    pd.DataFrame(model.history.history)[['loss', 'val_loss']].plot(title="Optimization Learning Curve")
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/frame.py", line 2912, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
    self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/pandas/core/indexing.py", line 1298, in _validate_read_indexer
    raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyEr

### Evaluation on test set: Loss and Accuracy

In [None]:
score_griddify = model.evaluate(Xi_test, y_test, verbose=0)

print("Test loss:", score_griddify[0])
print("Test accuracy:", score_griddify[1])

ERROR! Session/line number was not unique in database. History logging moved to new session 3339


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-6e7ee147df37>", line 1, in <module>
    score_griddify = model.evaluate(Xi_test, y_test, verbose=0)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 833, in evaluate
    use_multiprocessing=use_multiprocessing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 456, in evaluate
    sample_weight=sample_weight, steps=steps, callbacks=callbacks, **kwargs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 444, in _model_iteration
    total_epochs=1)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensor

-----

# Performance comparison between the original MNIST and the Griddified one

### Preprocess, fit and predict the original MNIST dataset

Preprocessing

In [7]:
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

ERROR! Session/line number was not unique in database. History logging moved to new session 3340
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


Fitting

In [None]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Train on 54000 samples, validate on 6000 samples
Epoch 1/15
  128/54000 [..............................] - ETA: 13s

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-6df7dee5b35a>", line 1, in <module>
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 728, in fit
    use_multiprocessing=use_multiprocessing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 324, in fit
    total_epochs=epochs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 123, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/p

Predicting

In [None]:
score_original = model.evaluate(x_test, y_test, verbose=0)

print("Test loss:", score_original[0])
print("Test accuracy:", score_original[1])

ERROR! Session/line number was not unique in database. History logging moved to new session 3341


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-0e503aa7ecaa>", line 1, in <module>
    score_original = model.evaluate(x_test, y_test, verbose=0)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 833, in evaluate
    use_multiprocessing=use_multiprocessing)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 456, in evaluate
    sample_weight=sample_weight, steps=steps, callbacks=callbacks, **kwargs)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 444, in _model_iteration
    total_epochs=1)
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/tensorf

### Comparison of test accuracies

In [None]:
print("Accuracy of the original MNIST:", score_original[1])
print("Accuracy of the Griddified MNIST:", score_griddify[1])
print("Difference:", round(abs(score_original[1] - score_griddify[1]), 3))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-93da28ccb55f>", line 1, in <module>
    print("Accuracy of the original MNIST:", score_original[1])
NameError: name 'score_original' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1169, in get_records
    return _fixed_getinnerframes(et

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-93da28ccb55f>", line 1, in <module>
    print("Accuracy of the original MNIST:", score_original[1])
NameError: name 'score_original' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hodei/anaconda3/envs/molmap/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3263, in run_ast_nodes
    if (await self.run_c