# Fashion-MNIST Self-Trainer

## Setup Python

In [None]:
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, MaxPool2D
from keras.layers import BatchNormalization, LeakyReLU
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU, UpSampling2D, Conv2D, Conv2DTranspose
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.metrics import f1_score
import numpy as np
import random
import matplotlib.pyplot as plt


## Setup MNIST dataset

In [None]:
# Load Fashion-MNIST data
x_train = np.load("fashion-x.npy")
y_train = np.load("fashion-y.npy")
x_test = np.load("fashion-tx.npy")
y_test = np.load("fashion-ty.npy")

x_test = x_test.reshape((len(x_test),28,28,1))

x_train = x_train.astype(np.float32)/255.0
x_test = x_test.astype(np.float32)/255.0

y = np.zeros((len(y_test),10))
for i in range(len(y_test)):
    y[i][y_test[i]] = 1.0
y_test = y

## Setup Convolutional Network

In [None]:
class CNN(object):
    def __init__(self, width=28, height=28, channels=1):
        self.width = width
        self.height = height
        self.channels = channels

        self.shape = (self.width, self.height, self.channels)
        self.opt = Adam(lr=0.0001)
        
        self.acnn = self.ACNN()
        self.acnn.compile(optimizer=self.opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    def ACNN(self):
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=7, strides=(3, 3), padding='same', input_shape=self.shape))
        model.add(Dropout(rate=0.2))
        model.add(Activation('relu'))
        model.add(BatchNormalization(momentum=0.99))
        model.add(Conv2D(filters=32, kernel_size=4, strides=(2, 2), padding='same'))
        model.add(Dropout(rate=0.2))
        model.add(Activation('relu'))
        model.add(Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(Dropout(rate=0.1))
        model.add(Activation('relu'))
        model.add(Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(Dropout(rate=0.1))
        model.add(Activation('relu'))
        model.add(Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(Dropout(rate=0.1))
        model.add(Activation('relu'))
        model.add(Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(Dropout(rate=0.1))
        model.add(Activation('relu'))
        model.add(Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(Dropout(rate=0.1))
        model.add(Activation('relu'))
        model.add(Flatten())
        model.add(Dense(4096, activation='tanh'))
        model.add(Dense(units=10, activation='softmax'))
        return model

## Setup Generative Adversarial Network

In [None]:
class GAN(object):
    def __init__(self, width=28, height=28, channels=1):
        self.width = width
        self.height = height
        self.channels = channels
        
        self.latent_dim = 192+10

        self.shape = (self.width, self.height, self.channels)

        self.opt = Adam(lr=0.0002, beta_1=0.5, decay=8e-8)

        self.gen = self.generator()
        self.gen.compile(loss='binary_crossentropy', optimizer=self.opt)

        self.disc = self.discriminator()
        self.disc.compile(loss='binary_crossentropy', optimizer=self.opt, metrics=['accuracy'])

        self.disc.trainable = False
        self.stack = Sequential()
        self.stack.add(self.gen)
        self.stack.add(self.disc)
        self.stack.compile(loss='binary_crossentropy', optimizer=self.opt)

    def discriminator(self):
        model = Sequential()
        model.add(Conv2D(64, kernel_size=5, strides=(2, 2), padding='same', input_shape=self.shape))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2D(64, kernel_size=5, strides=(2, 2), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2D(64, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2D(32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2D(32, kernel_size=3, strides=(1, 1), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Flatten())
        model.add(Dense(11, activation='softmax'))

        return model
        
    def generator(self):
        model = Sequential()
        model.add(Dense(7 * 7 * 384,input_shape=(self.latent_dim,)))
        model.add(Reshape((7,7,384)))
        model.add(Conv2DTranspose(64, kernel_size=3, strides=(2, 2), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.9))
        model.add(Conv2DTranspose(64, kernel_size=3, strides=(2, 2), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.9))
        model.add(Conv2DTranspose(32, kernel_size=5, strides=(1, 1), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.9))
        model.add(Conv2DTranspose(32, kernel_size=5, strides=(1, 1), padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2DTranspose(self.channels, kernel_size=1, strides=(1, 1), padding='same'))
        model.add(Activation('sigmoid'))

        return model
    
    def train(self, x_train, y_train, epochs=20000, batch = 32):
        for epoch in range(epochs):
            half = int(batch/2)
            
            ## Train discriminator
            random_index = np.random.randint(0, len(x_train) - batch/2)
            real_x = x_train[random_index : random_index + half].reshape(half, self.width, self.height, self.channels)
            real_y = y_train[random_index : random_index + half] 
            
            noise = np.random.normal(0, 1, (half, self.latent_dim))
            for i in range(len(noise)):
                choose = random.randint(0,9)
                for j in range(10):
                    noise[i][j] = 0.0
                noise[i][choose] = 1.0
            syntetic = self.gen.predict(noise)

            x = np.concatenate((real_x, syntetic))
            y = np.zeros((batch,11))
            for i in range(half):
                y[i][int(real_y[i])] = 1.0
            for i in range(half,batch):
                y[i][10] = 1.0
            
            d_loss = self.disc.train_on_batch(x, y)

            ## Train generator
            noise = np.random.normal(0, 1, (batch, self.latent_dim))
            for i in range(len(noise)):
                choose = random.randint(0,9)
                for j in range(10):
                    noise[i][j] = 0.0
                noise[i][choose] = 1.0
            noise_y = self.gen.predict(noise) # Generated image
            label = []
            for i in range(len(noise)):
                label.append(np.argmax(noise[i][:10]))
            y = np.zeros((batch, 11))
            
            for i in range(len(y)):
                label_y = label[i]
                y[i][label_y] = 1
            
            g_loss = self.stack.train_on_batch(noise, y)

            if(epoch%1000 == 0):
                print(f'[Epoch: #{epoch}] Discriminator loss: {d_loss[0]}, Generator loss: {g_loss}')
                
                samples = 10
                noise = np.random.normal(0, 1,(samples, self.latent_dim))
                for i in range(len(noise)):
                    for j in range(10):
                        noise[i][j] = 0.0
                    noise[i][i] = 1.0
                images = self.gen.predict(noise)
                plt.figure(figsize=(10, 10))

                for i in range(images.shape[0]):
                    plt.subplot(4, 4, i+1)
                    image = images[i, :, :, :]
                    image *= 255
                    image = np.reshape(image, [self.height, self.width])
                    plt.imshow(image, cmap='gray')
                    plt.axis('off')
                plt.tight_layout()
                plt.savefig("Samples/gan_%d.png" % epoch)
                plt.close('all')

    def genDataset(self, size):
        noise = np.random.normal(0,1,(size, self.latent_dim))
        y = np.zeros((size,10))
        for i in range(size):
            choose = random.randint(0,9)
            for j in range(10):
                noise[i][j] = 0.0
            noise[i][choose] = 1.0
            y[i][choose] = 1.0
        x = self.gen.predict(noise)
        return x, y

## Train both CNN and GAN

### Setup GAN and CNN

In [None]:
gan = GAN()

In [None]:
cnn = CNN()

In [None]:
# LOAD GAN WEIGHTS
gan.gen.load_weights("generator.h5")
gan.disc.load_weights("discriminator.h5")
gan.stack.load_weights("stacked.h5")

# LOAD CNN WEIGHTS
cnn.acnn.load_weights("CNN.h5")

### SAVE CNN WEIGHTS

In [None]:
cnn.acnn.save_weights("CNN.h5")

### SAVE GAN WEIGHTS

In [None]:
gan.gen.save_weights("generator.h5")
gan.disc.save_weights("discriminator.h5")
gan.stack.save_weights("stacked.h5")

### TRAIN GAN WITH MNIST DATASET

In [None]:
gan.train(x_train[:2000],y_train[:2000], epochs=5001, batch=32)

### TRAIN CNN WITH GAN GENERATED IMAGES

In [None]:
for e in range(10):
    gan.train(x_train[:2000],y_train[:2000], epochs=500, batch=32)
    for i in range(10):
        x, y = gan.genDataset(2000)
        cnn.acnn.fit(x,y,batch_size=32, epochs=1, validation_split=0.2, shuffle=True)
    score = cnn.acnn.evaluate(x=x_test, y=y_test)
    print(f"{e} score: {score}")
    
    prd = cnn.acnn.predict(x=x_test)
    prd_x = np.zeros(len(prd))
    prd_y = np.zeros(len(prd))
    for i in range(len(prd_x)):
        prd_x[i] = np.argmax(prd[i])
    for i in range(len(prd_y)):
        prd_y[i] = np.argmax(y_test[i])
    print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")
#
prd = cnn.acnn.predict(x=x_test)
prd_x = np.zeros(len(prd))
prd_y = np.zeros(len(prd))
for i in range(len(prd_x)):
    prd_x[i] = np.argmax(prd[i])
for i in range(len(prd_y)):
    prd_y[i] = np.argmax(y_test[i])
print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")

### TRAIN CNN WITH MNIST DATASET

In [None]:
x_train = x_train.reshape((len(x_train),28,28,1))
y = np.zeros((len(y_train),10))
for i in range(len(y_train)):
    y[i][y_train[i]] = 1.0
for e in range(10): # epochs
    #Train gan
    gan.train(x_train[:2000],y_train[:2000], epochs=500, batch=32)
    #Train CNN
    for i in range(10):
        cnn.acnn.fit(x_train[:2000],y[:2000],batch_size=32, epochs=1, validation_split=0.2, shuffle=True)
    
        gan_x, gan_y = gan.genDataset(2000)
        cnn.acnn.fit(gan_x,gan_y,batch_size=32, epochs=1, validation_split=0.2, shuffle=True)
    #Test CNN
    prd = cnn.acnn.predict(x=x_test)
    prd_x = np.zeros(len(prd))
    prd_y = np.zeros(len(prd))
    for i in range(len(prd_x)):
        prd_x[i] = np.argmax(prd[i])
    for i in range(len(prd_y)):
        prd_y[i] = np.argmax(y_test[i])
    print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")
#Final test on CNN
score = cnn.acnn.evaluate(x=x_test, y=y_test)
print(f"Final score: {score}")

prd = cnn.acnn.predict(x=x_test)
prd_x = np.zeros(len(prd))
prd_y = np.zeros(len(prd))
for i in range(len(prd_x)):
    prd_x[i] = np.argmax(prd[i])
for i in range(len(prd_y)):
    prd_y[i] = np.argmax(y_test[i])
print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")

### TRAIN CNN WITH MNIST DATASET

In [None]:
x_train = x_train.reshape((len(x_train),28,28,1))
y = np.zeros((len(y_train),10))
for i in range(len(y_train)):
    y[i][y_train[i]] = 1.0
cnn.acnn.fit(x_train[:2000],y[:2000],batch_size=32, epochs=200, validation_split=0.2, shuffle=True)#, callbacks=[metrics])
score = cnn.acnn.evaluate(x=x_test, y=y_test)
print(f"Final score: {score}")

prd = cnn.acnn.predict(x=x_test)
prd_x = np.zeros(len(prd))
prd_y = np.zeros(len(prd))
for i in range(len(prd_x)):
    prd_x[i] = np.argmax(prd[i])
for i in range(len(prd_y)):
    prd_y[i] = np.argmax(y_test[i])
print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")

### TEST CNN SCORE

In [None]:
print(f"Start score: {score}")

prd = cnn.acnn.predict(x=x_test)
prd_x = np.zeros(len(prd))
prd_y = np.zeros(len(prd))
for i in range(len(prd_x)):
    prd_x[i] = np.argmax(prd[i])
for i in range(len(prd_y)):
    prd_y[i] = np.argmax(y_test[i])
print(f"F1val: {f1_score(prd_x,prd_y,average='weighted')}")

2000 dataset, (fashion MNIST):
1. [0.7500727490186692, 0.7989], 0.8022347200020112: 100 epochs (normal)
2. [1.3559218359351157, 0.8267], 0.830564980430131: 100 epochs (GAN)