# Experiment with the mnist fashion data set

It is very similar to the mnist digit data set.  It has 10 categories of black and white images. 

* 0 = t-shirt
* 1 = trouser
* 2 = pullover
* 3 = dress
* 4 = coat
* 5 = sandal
* 6 = shirt
* 7 = sneaker
* 8 = bag
* 9 = ankle boot

In [2]:
from aitk.utils import gallery, array_to_image
from aitk.networks import Network

import tensorflow
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

# Get the data
* Download the data
* Explore what you have

In [3]:
(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()

In [4]:
train_x.shape

(60000, 28, 28)

In [5]:
test_x.shape

(10000, 28, 28)

In [6]:
images = [array_to_image(train_x[i]) for i in range(10)]
bigger = [image.resize((100,100)) for image in images]

In [7]:
gallery(bigger, gallery_shape=(5,2), labels=train_y[:10])

0,1,2,3,4
9,0,0,3,0
2,7,2,5,5


In [8]:
train_y[:20]

array([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9, 1, 0, 6, 4],
      dtype=uint8)

# Prepare the data for the network
* You may need to normalize the inputs so that they are in the range [0,1].
* You may need to convert the targets so that they are represented as one-hot vectors when you are doing categorization.

### Input data

In [9]:
min_input = train_x.min()
max_input = train_x.max()
print("range of input values is:", min_input, max_input)

range of input values is: 0 255


In [10]:
train_x_normalized = train_x/255

In [11]:
test_x_normalized = test_x/255

### Data sent into a Conv2D layer must have a depth
* This may require you to do a reshape command.
* For these black and white images there is only one channel of information.
* For color images there are typically 3 channels (Red, Green, Blue)

In [15]:
train_x.shape

(60000, 28, 28)

In [12]:
train_x_normalized = train_x_normalized.reshape(60000,28,28,1)
test_x_normalized = test_x_normalized.reshape(10000,28,28,1)

In [13]:
test_x_normalized.shape

(10000, 28, 28, 1)

In [14]:
test_y.shape

(10000,)

### Target data

In [12]:
num_categories = len(set(train_y))
print(num_categories)

10


In [13]:
train_y_category = to_categorical(train_y, num_categories)
print(train_y_category)

[[0. 0. 0. ... 0. 0. 1.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [14]:
test_y_category = to_categorical(test_y, num_categories)

In [15]:
train_y_category[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

# Construct the model

This is just one possible configuration of layers to learn the data set.  You should experiment with the number of filters, the filter size, and the layers themselves. However, be sure to leave the output layer's activation function as `softmax`.

You should explicitly name each layer so that it will be easier to inspect them later. Use a clear naming convention.  For example the first convolution layer might be "conv1", the next "conv2", and so on.

# Compile the model

For categorical data like handwritten digits we typically use a loss function called `categorical_crossentropy` and should be used in conjunction with the `softmax` activation function at the output layer.  This interprets the outputs as representing probabilities and forces them to sum to 1.0.  Thus the output from the network will reflect how likely it considers a particular input to be a member of one of the output categories.

In [16]:
neural_net = Sequential()
neural_net.add(Conv2D(32,(3,3),padding="same", name="conv1", activation="relu",input_shape=(28,28,1)))
neural_net.add(Conv2D(32,(3,3),padding="same", name="conv2", activation="relu"))
neural_net.add(MaxPooling2D(pool_size=(2,2),name="pool1"))
neural_net.add(Dropout(0.25))
neural_net.add(Conv2D(64,(3,3),padding="same", name="conv3", activation="relu"))
neural_net.add(Conv2D(64,(3,3),padding="same", name="conv4", activation="relu"))
neural_net.add(MaxPooling2D(pool_size=(2,2),name="pool2"))
neural_net.add(Dropout(0.25))
neural_net.add(Flatten(name="flatten"))
neural_net.add(Dense(50, name="hidden", activation='relu'))
neural_net.add(Dense(10, name="output",activation='softmax'))
neural_net.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 28, 28, 32)        320       
                                                                 
 conv2 (Conv2D)              (None, 28, 28, 32)        9248      
                                                                 
 pool1 (MaxPooling2D)        (None, 14, 14, 32)        0         
                                                                 
 dropout (Dropout)           (None, 14, 14, 32)        0         
                                                                 
 conv3 (Conv2D)              (None, 14, 14, 64)        18496     
                                                                 
 conv4 (Conv2D)              (None, 14, 14, 64)        36928     
                                                                 
 pool2 (MaxPooling2D)        (None, 7, 7, 64)          0

2022-11-03 21:09:23.562703: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-03 21:09:23.576894: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-03 21:09:23.577206: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-03 21:09:23.579511: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [17]:
neural_net.compile(optimizer="Adam", loss="categorical_crossentropy",
                   metrics=['accuracy'])

# Create an aitk Network
This allows us to do more visualization of what is happening inside the network.

In [18]:
net = Network(neural_net)

In [19]:
from time import sleep
for i in range(10):
    net.propagate(train_x_normalized[i])
    net.display(train_x_normalized[i])
    sleep(1.0)

# Train the model

In [None]:
history = net.fit(train_x_normalized, 
                  train_y_category, 
                  verbose=1,
                  validation_data=(test_x_normalized, 
                                   test_y_category),
                  epochs=25)

# Inspect the feature maps

We can ask the network to propagate to specific layers and inspect the representations created there to try to understand how it has solved the problem. First we need to set up the color maps to display properly by finding the maximum possible value using a particular test image.

In [75]:
test_image = 100 # change this to see a different image 

In [76]:
from math import ceil
for layer in ["conv1", "conv2", "pool1", "conv3", "conv4", "pool2"]:
    data = [net.propagate_to(test_x_normalized[test_image], layer, channel=channel)
            for channel in range(8)]
    largest = max([sublist.max() for sublist in data])
    net.set_config_layer(layer, colormap=("gray", 0, ceil(largest)))

In [85]:
images = [net.propagate_to(test_x_normalized[test_image], "conv1", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [78]:
images = [net.propagate_to(test_x_normalized[test_image], "conv2", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [79]:
images = [net.propagate_to(test_x_normalized[test_image], "pool1", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [80]:
images = [net.propagate_to(test_x_normalized[test_image], "conv3", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [81]:
images = [net.propagate_to(test_x_normalized[test_image], "conv4", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [82]:
images = [net.propagate_to(test_x_normalized[test_image], "pool2", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


# Examine the results
Check which inputs the network is getting wrong. Recall that there are 10 thousand test images.

In [83]:
from numpy import argmax
outputs = net.predict(test_x_normalized)
answers = [argmax(output) for output in outputs]
targets = [argmax(target) for target in test_y_category]

2022-11-03 18:13:06.544707: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 957.03MiB (rounded to 1003520000)requested by op Conv2D
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-11-03 18:13:06.544790: I tensorflow/core/common_runtime/bfc_allocator.cc:1027] BFCAllocator dump for GPU_0_bfc
2022-11-03 18:13:06.544823: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (256): 	Total Chunks: 90, Chunks in use: 90. 22.5KiB allocated for chunks. 22.5KiB in use in bin. 6.4KiB client-requested in use in bin.
2022-11-03 18:13:06.544845: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (512): 	Total Chunks: 1, Chunks in use: 0. 768B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2022-11-03 18:13:06.544868: I tensorflow/cor

Exception: You must supply the inputs for these banks in order and in the right shape: conv1_input: [(None, 28, 28, 1)]

In [44]:
incorrect = [i for i in range(len(answers)) if answers[i] != targets[i]]
len(incorrect)

NameError: name 'answers' is not defined

In [45]:
missed_target = [targets[i] for i in incorrect]
wrong_answer = [answers[i] for i in incorrect]

NameError: name 'incorrect' is not defined

Let's find out which target classifications are most frequently missed.

In [29]:
from collections import Counter

In [30]:
t_ctr = Counter(missed_target)
t_ctr.most_common()

NameError: name 'missed_target' is not defined

Let's find out which wrong classification is most frequently given.

In [47]:
a_ctr = Counter(wrong_answer)
a_ctr.most_common()

[(6, 265),
 (2, 154),
 (3, 135),
 (4, 126),
 (0, 109),
 (7, 50),
 (9, 31),
 (8, 26),
 (5, 21),
 (1, 9)]

In [48]:
images = [array_to_image(test_x[index]) for index in incorrect]
label = [str(missed_target[i]) + "," + str(wrong_answer[i]) for i in range(len(wrong_answer))]
gallery(images, labels=label)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
75,95,42,60,36,26,26,46,34,97,24,42,26,46,6,63,43,43,2,42,26,13,62,42,36,60,46,97.0,46.0,62.0,80.0
26,6,43,26,6,20,46,97,24,36,46,79,46,80,42,6,46,79,63,36,6,64,84,42,13,26,36,86.0,42.0,63.0,57.0
20,3,23,13,79,41,46,24,62,26,6,3,24,3,59,46,62,64,8,46,6,62,43,34,6,60,30,42.0,42.0,4.0,62.0
42,46,64,43,86,46,46,3,64,6,46,60,43,64,62,26,64,60,3,46,6,42,60,23,24,6,42,85.0,6.0,42.0,24.0
30,42,36,23,42,2,46,26,79,24,6,46,62,13,62,57,3,20,57,23,24,83,6,6,26,57,60,26.0,97.0,64.0,68.0
60,34,6,46,60,6,6,24,62,26,60,34,43,24,6,80,38,42,6,6,26,46,60,62,97,60,3,57.0,97.0,24.0,36.0
83,24,60,57,46,79,57,23,46,26,42,43,26,26,46,64,46,62,34,6,3,24,24,6,64,32,42,62.0,62.0,6.0,62.0
60,64,60,60,32,87,46,62,83,42,62,63,26,6,34,42,97,79,6,64,26,63,6,60,62,6,26,8.0,62.0,6.0,6.0
6,42,46,46,43,2,36,6,26,6,46,43,43,6,34,30,6,60,60,64,62,60,64,42,24,8,34,32.0,6.0,79.0,46.0
24,6,97,64,13,2,26,79,62,62,60,79,75,83,36,8,24,63,36,12,42,6,60,24,43,2,26,62.0,46.0,46.0,62.0
