In [29]:
from __future__ import print_function

import sys
import json
import keras
import numpy as np
from keras.datasets import mnist, cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD

__使用keras中的mnist及cifar10。__

In [31]:
epochs = 1000
num_classes = 0
image_size = 0
image_channel = 0
data_set = ''

__全域變數們，所有訓練最多到1000epochs，並在training step設定early stopping。__

In [32]:
def set_cifar10():
    global image_size, image_channel, num_classes, data_set
    data_set = 'cifar10'
    image_size = 32
    image_channel = 3
    num_classes = 10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train = scale_pixel_value(np.reshape(crop_image(x_train, crop_size), 
                                       [-1, (image_size - crop_size**2)**2 * image_channel]))
    num_train = x_train.shape[0]
    image_size = image_size - crop_size**2
    return x_train, y_train

def set_mnist():
    global image_size, image_channel, num_classes, data_set
    data_set = 'mnist'
    image_size = 28
    image_channel = 1
    num_classes = 10
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(x_train.shape[0], image_size**2 * image_channel)
    x_train = x_train.astype('float32')
    x_train /= 255
    return x_train, y_train
    
def crop_image(images, c):
    images = images[:, c:image_size-c, c:image_size-c]
    return images

def scale_pixel_value(images):
    return images/255.

def partially_corrupted_label(label, probability):
    pcl = []
    for l in label:
        if np.random.random_sample() <= probability:
            pcl.append(np.random.randint(0, num_classes))
        else:
            pcl.append(l)
    return np.array(pcl)

def random_label(label):
    return np.reshape(np.random.randint(num_classes, size=len(label)), [-1,])

def label_similarity(label1, label2):
    counter = 0
    print(label1.shape, label2.shape)
    for l1, l2 in zip(label1, label2):
        if l1 != l2:
            counter += 1
    return (len(label1) - float(counter)) / len(label1)

def shuffled_pixel(images):
    shuffled_images = []
    permutation = np.random.permutation(image_size**2 * image_channel)
    for image in images:
        shuffled_image = []
        for p in permutation:
            shuffled_image.append(image[p])
        shuffled_images.append(shuffled_image)
    return np.array(shuffled_images)

def gaussian(images, proportion):
    pixel_num = len(images) * (image_size**2 * image_channel)
    pixel = np.reshape(images, [pixel_num])
    mean = pixel.mean()
    variance_sum = np.ndarray.sum((pixel - mean)**2)
    standard_deviation = np.sqrt(variance_sum / pixel_num)
    temp = np.array(np.random.normal(loc=mean, scale=standard_deviation, 
                                     size=[(int)(len(images)*proportion), (image_size**2 * image_channel)]))
    temp = temp.clip(0.0, 1.0)
    return np.concatenate((images[:(int)(len(images)*(1 - proportion))], temp))
    
def random_pixel(images):
    random_pixel_images = []
    for image in images:
        random_pixel_image = []
        np.random.shuffle(image)
        random_pixel_images.append(image)
    return np.array(random_pixel_images)

In [38]:
def train(x_train, y_train, batch_size, hidden_size, layer, learning_rate):

    y_train = keras.utils.to_categorical(y_train, num_classes)
    #y_test = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(Dense(hidden_size, activation='relu', input_shape=(image_size**2 * image_channel,)))
    
    for i in xrange(layer-1):
        model.add(Dense(hidden_size, activation='relu'))
        #model.add(Dropout(0.2))
    
    model.add(Dense(10, activation='softmax'))
    model.summary()
    
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=learning_rate),
                  metrics=['accuracy'])

    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',patience=20,verbose=0)]
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_train, y_train), 
                        callbacks=callbacks)
    with open('./%s/batch_size_%d_layer_%d_hidden_size_%d_summary' % (data_set, batch_size, layer, hidden_size), 'w') as fout:
        orig_stdout = sys.stdout
        sys.stdout = fout
        print(model.summary())
        sys.stdout = orig_stdout

    return history.history

In [39]:
x_train , y_train = set_mnist()
batch_size = 1024
learning_rate = 0.01
hidden_size = [16, 128, 512, 1024]
layer = [2, 4, 6, 8, 10]

for i in xrange(3):
    for size in hidden_size:
        for l in layer:
            print('hidden_size: %d layer: %d' % (size, l))
            history = train(x_train, y_train, batch_size=batch_size, hidden_size=size, layer=l, learning_rate=learning_rate)
            with open('./%s/%d_batch_size_%d_layer_%d_hidden_size_%d' % (data_set, i+1, batch_size, l, size), 'w') as fout:
                json.dump(history, fout)
    break

for i in xrange(3):
    for size in hidden_size:
        for l in layer:
            print('random label hidden_size: %d layer: %d' % (size, l))
            history = train_mnist(x_train, random_label(y_train), batch_size=batch_size, hidden_size=size, layer=l, learning_rate=learning_rate)
            with open('./%s/%d_random_label_batch_size_%d_layer_%d_hidden_size_%d' % (data_set, i+1, batch_size, l, size), 'w') as fout:
                json.dump(history, fout)
    break

gaussian_proportion = [.2, .4, .6, .8, 1.]

for i in xrange(3):
    for proportion in gaussian_proportion:
        for size in hidden_size:
            for l in layer:
                print('gausson: %f hidden_size: %d layer: %d' % (proportion, size, l))
                history = train_mnist(gaussian(x_train, proportion), y_train, batch_size=batch_size, hidden_size=size, layer=l, learning_rate=learning_rate)
                with open('./%s/%d_gaussian_%f_batch_size_%d_layer_%d_hidden_size_%d' % (data_set, i+1, proportion, batch_size, l, size), 'w') as fout:
                    json.dump(history, fout)
    break

hidden_size: 16 layer: 2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_55 (Dense)             (None, 16)                12560     
_________________________________________________________________
dense_56 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_57 (Dense)             (None, 10)                170       
Total params: 13,002
Trainable params: 13,002
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 60000 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000


Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000

KeyboardInterrupt: 