In [11]:
from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras import utils
from keras import backend as K
from keras import layers
from keras import objectives
from keras import optimizers
from keras.callbacks import EarlyStopping

from sklearn.feature_selection import mutual_info_classif

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import argparse
import os

from IPython.display import clear_output

from sklearn.metrics import classification_report
from skimage import data, img_as_float
from skimage.util import random_noise

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

from sklearn.feature_selection import SelectKBest

In [38]:
################# methods for shaping our data ##############
def reshape_x(X, input_shape):
    X = np.reshape(X, [-1, input_shape])
    X = X.astype('float32') / 255
    return X
def reshape_y(y, num_categories):
    y = utils.to_categorical(y, num_categories)
    return y
###########################################################

sub = pd.read_csv('sub_best.csv', index_col=0).values
sup = pd.read_csv('sup_best.csv', index_col=0).values

X = np.concatenate((sub, sup))

y = [0] * 30 + [1] * 30
y = np.asarray(y)

X_new = SelectKBest(mutual_info_classif, k=200).fit_transform(X, y)

X_new = reshape_x(X_new, 200)




In [64]:
#########################VAE Model##############################
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

#VAE loss is sum of reconstruction loss and KL divergence

#The optimizer used in VAE functions below.
op = optimizers.RMSprop(lr=0.0001, rho=0.9, epsilon=None, decay=0.0)

#Used to create new models
def reset_weights(model):
    session = K.get_session()
    for layer in model.layers: 
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel.initializer.run(session=session)


def create_vae(input_size, batch_size=6):
    intermediate_dim=100
    latent_dim = 2
    vae_inputs = Input(shape=(200,)) #shape is batchsize * image size
    X = Dense(intermediate_dim, activation='relu')(vae_inputs)
    z_mean = Dense(latent_dim, name='z_mean')(X)
    z_log_var = Dense(latent_dim, name='z_log_var')(X)
    #Use the parameter trick
    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    #Decoder
    decoder_X = Dense(intermediate_dim, activation='relu')
    decoder_mean = Dense(input_size, activation='sigmoid')
    X_decoded = decoder_X(z)
    X_decoded_mean = decoder_mean(X_decoded)

    #Create the end-to-end VAE
    vae = Model(vae_inputs, X_decoded_mean)
    #create encoder
    encoder = Model(vae_inputs, z_mean)
    #create generator
    decoder_input = Input(shape=(latent_dim,))
    _X_decoded = decoder_X(decoder_input)
    _X_decoded_mean = decoder_mean(_X_decoded)
    generator = Model(decoder_input, _X_decoded_mean)
    
    def vae_loss(x, x_decoded_mean):
        xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
        kl_loss = -0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var),
                            axis=-1)
        return xent_loss + kl_loss
    return {'vae':vae, 'encoder':encoder, 'generator':generator, 'loss': vae_loss}
            


#in run_vae, vae_models param should be a dict containing the vae, encoder, and generator
def run_vae(vae_models, X, y, batch_size=6, epochs=30, manifold_std=5, 
               num_samples=50):
    
    for model in ('vae', 'encoder', 'generator'):
        reset_weights(vae_models[model])
    
    vae_models['vae'].compile(optimizer=op, loss=vae_models['loss'])
    vae_models['vae'].fit(X,X, shuffle=True, epochs=epochs, 
            batch_size=batch_size, validation_data=(y,y),
            callbacks = [EarlyStopping('val_loss', patience=4, restore_best_weights=True)])
    
    #clear_output()
    
    synthetic_data = np.empty([num_samples, 200])
    dev=[-manifold_std, manifold_std]
    for i in range (num_zeros):
        xi = np.random.random_sample() * np.random.choice(dev)
        yi = np.random.random_sample() * np.random.choice(dev)
        z_sample = np.array([[xi,yi]])
        x_decoded = vae_models['generator'].predict(z_sample)
        synthetic_data[i] = x_decoded
    return synthetic_data

In [39]:
def gaussian_expansion(x_data,y, magnitude, sigma, k):
    mu = 0.0
    num_samples = x_data.shape[0]
    x_noised = []
    for i in range(magnitude):
        noise = np.random.normal(mu, sigma, x_data.shape)
        x_noised.append(x_data + noise)
        
    y_new = [y] * (magnitude +1)
    y_new = np.asarray(y_new).flatten()
    print(y_new.shape)
    x_noised = np.asarray(x_noised)
    x_noised = x_noised.reshape(magnitude*x_data.shape[0],k)
    print(x_noised.shape)
    print(x_data.shape)
    y_new = enc.transform(np.reshape(y_new, (-1,1)))
    new_x = np.concatenate((x_data, x_noised))
    
    print(y_new.shape)
    print(new_x.shape)
    return(new_x, y_new)

In [65]:
vae = create_vae(200, 1)

In [62]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)
labels = np.arange(2)
labels = np.reshape(labels, (len(labels),1))
enc.fit(labels)

OneHotEncoder(categorical_features=None, categories=None,
       dtype=<class 'numpy.float64'>, handle_unknown='error',
       n_values=None, sparse=False)

In [54]:
x_tr, y_tr = gaussian_expansion(X_new, y, 25, 0.005,200)

(1560,)
(1500, 200)
(60, 200)
(1560, 2)
(1560, 200)


In [50]:
test = np.zeros((500, 28,28))
print(test.shape)
test = reshape_x(test, 28*28)
print(test.shape)

(500, 28, 28)
(500, 784)


In [66]:
print(x_tr.shape)
syn_data = run_vae(vae, X=x_tr, y=y_tr, num_samples=50)

(1560, 200)


ValueError: Error when checking input: expected input_15 to have shape (200,) but got array with shape (2,)