In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
import seaborn as sns
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
column_names = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat']
mushroom_df = pd.read_csv('./uci_repos/mushroom/agaricus-lepiota.data', names=column_names,na_values=['?'])
mushroom_df

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [3]:
y_train = mushroom_df['class']
x_train = mushroom_df.drop(columns=['class'])
x_train

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,k,s,n,f,n,a,c,b,y,e,...,s,o,o,p,o,o,p,b,c,l
8120,x,s,n,f,n,a,c,b,y,e,...,s,o,o,p,n,o,p,b,v,l
8121,f,s,n,f,n,a,c,b,n,e,...,s,o,o,p,o,o,p,b,c,l
8122,k,y,n,f,y,f,c,n,b,t,...,k,w,w,p,w,o,e,w,v,l


In [4]:
label_encoder=LabelEncoder()
for column in x_train.columns:
    x_train[column] = label_encoder.fit_transform(x_train[column])
x_train

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,5,2,4,1,6,1,0,1,4,0,...,2,7,7,0,2,1,4,2,3,5
1,5,2,9,1,0,1,0,0,4,0,...,2,7,7,0,2,1,4,3,2,1
2,0,2,8,1,3,1,0,0,5,0,...,2,7,7,0,2,1,4,3,2,3
3,5,3,8,1,6,1,0,1,5,0,...,2,7,7,0,2,1,4,2,3,5
4,5,2,3,0,5,1,1,0,4,1,...,2,7,7,0,2,1,0,3,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,3,2,4,0,5,0,0,0,11,0,...,2,5,5,0,1,1,4,0,1,2
8120,5,2,4,0,5,0,0,0,11,0,...,2,5,5,0,0,1,4,0,4,2
8121,2,2,4,0,5,0,0,0,5,0,...,2,5,5,0,1,1,4,0,1,2
8122,3,3,4,0,8,1,0,1,0,1,...,1,7,7,0,2,1,0,7,4,2


In [5]:
x_train.describe()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
count,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,...,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0,8124.0
mean,3.348104,1.827671,4.504677,0.415559,4.144756,0.974151,0.161497,0.309207,4.810684,0.567208,...,1.603644,5.816347,5.794682,0.0,1.965534,1.069424,2.291974,3.59675,3.644018,1.508616
std,1.604329,1.229873,2.545821,0.492848,2.103729,0.158695,0.368011,0.462195,3.540359,0.495493,...,0.675974,1.901747,1.907291,0.0,0.242669,0.271064,1.801672,2.382663,1.252082,1.719975
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,3.0,0.0,2.0,1.0,0.0,0.0,2.0,0.0,...,1.0,6.0,6.0,0.0,2.0,1.0,0.0,2.0,3.0,0.0
50%,3.0,2.0,4.0,0.0,5.0,1.0,0.0,0.0,5.0,1.0,...,2.0,7.0,7.0,0.0,2.0,1.0,2.0,3.0,4.0,1.0
75%,5.0,3.0,8.0,1.0,5.0,1.0,0.0,1.0,7.0,1.0,...,2.0,7.0,7.0,0.0,2.0,1.0,4.0,7.0,4.0,2.0
max,5.0,3.0,9.0,1.0,8.0,1.0,1.0,1.0,11.0,1.0,...,3.0,8.0,8.0,0.0,3.0,2.0,4.0,8.0,5.0,6.0


In [6]:
x_train = np.array(x_train)
le = LabelEncoder()
y_train  = le.fit_transform(y_train)
np.unique(y_train)

array([0, 1])

In [7]:
(y_train).tolist().count(0),(y_train).tolist().count(1)

(4208, 3916)

In [8]:
def group_all_labels(data, num=100, minor=[]):
    # this function is to limit the number of labels that are used
    # it returns the indexes according the labels
    # data is an array of labels
    '''

    :param data: array of labels
    :param num: number required
    :param minor: list of minority indexes
    :return: array of labels indexes
    '''

    labels = np.unique(data)
    co_l = []
    min_col =[]
    if not minor:
        for l in labels:
            el_l = np.where(np.array(data) == l)
            co_l.append(el_l[0])

    else:
        for l in labels:
            if l in minor:
                el_l = np.where(np.array(data) == l)                
                ind = np.random.choice(el_l[0], num)
                co_l.append(ind)
                min_col.append(ind)
    
            else:
                el_l = np.where(np.array(data) == l)
                co_l.append(el_l[0])
    return co_l, min_col

In [9]:
grouped_labels, min_label = group_all_labels(y_train, 300, [1])
gr_data = []
gr_labels = [] 
for index, q in enumerate(grouped_labels):
    print('class {} : number of samples : {}'.format(index,len(q)))
    for r in q:
        gr_data.append(x_train[r])
        gr_labels.append(y_train[r])
        
x_train = np.array(gr_data)
y_train = np.array(gr_labels)

class 0 : number of samples : 4208
class 1 : number of samples : 300


# VAE Model

In [10]:
import tensorflow.compat.v1  as tf
tf.disable_v2_behavior()
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
import os
import numpy as np

mb_size = 32
z_dim = 3
X_dim = x_train.shape[1]
y_dim = len(np.unique(y_train))
h_dim = 3
lr = 1e-3

    
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random.normal(shape=size, stddev=xavier_stddev)

X = tf.keras.Input(shape=(X_dim,))
c = tf.keras.Input(shape=(y_dim,))
z = tf.keras.Input(shape=(z_dim,))

Q_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]))

Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]))


def Q(X, c):
    inputs = tf.concat(axis=1, values=[X, c])
    h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar


def sample_z(mu, log_var):
    eps = tf.random.normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps

P_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def P(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits

z_mu, z_logvar = Q(X, c)
z_sample = sample_z(z_mu, z_logvar)
_, logits = P(z_sample, c)

X_samples, _ = P(z, c)

recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
vae_loss = tf.reduce_mean(recon_loss + kl_loss)

Instructions for updating:
non-resource variables are not supported in the long term


In [11]:
def generate_sample():
    samples = []
    gen_labels =[]
    for r in range(100):
        for index in range(y_dim):
            gen_labels = gen_labels + [index]*mb_size 
            y = np.zeros([mb_size, y_dim])
            y[range(mb_size), index] = 1 
            samples.extend(sess.run(X_samples,
                                   feed_dict={z: np.random.randn(mb_size, z_dim), c: y}))

    gen_samples = np.array(samples).round(decimals=2)
    gen_labels = np.array(gen_labels)
    print(gen_samples.shape)
    print(gen_labels.shape)
    
    return gen_samples, gen_labels

# MLP Model

In [12]:
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Input, Flatten
from tensorflow.keras.models import Sequential


def build_model(input_shape=(12,), num_classes=2):
    """

    :param input_shape: shape of input_data
    :param num_classes: number of classes
    :return: keras.model.sequential compiled with categorical cross-entropy loss
    """
    model = Sequential([
        Input(shape=input_shape),
        Dense(32, activation="relu"),
        BatchNormalization(),
        Dense(64, activation="relu"),
        BatchNormalization(),
        Flatten(),
        Dropout(0.5),
        Dense(num_classes, activation="softmax"),
    ])

    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    model.summary()
    return model

# Baseline

In [13]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

baseline_list =[]
for i in range(10):

    baseline_model = build_model(input_shape=(x_train.shape[1],),num_classes=y_dim)
    batch_size=32
    epochs=2
    
    X_train, X_test,y_train1,y_test = train_test_split(x_train,y_train, test_size = 0.2)
    y_train_oh = np.array(tf.keras.utils.to_categorical(y_train1, num_classes=y_dim, dtype='float32'))
    test_y = np.array(tf.keras.utils.to_categorical(y_test, num_classes=y_dim, dtype='float32'))
    history_baseline = baseline_model.fit(X_train, y_train_oh, batch_size=batch_size, 
                        epochs=epochs, validation_data=(X_test, test_y))
    score_baseline = baseline_model.evaluate(X_test, test_y, verbose=0)
    print('baseline test loss: ',score_baseline[0])
    print('baseline test accuracy: ', score_baseline[1] )

    y_pred_baseline_oh = baseline_model.predict(X_test)
    y_pred_baseline = y_pred_baseline_oh.argmax(axis=-1)
    baseline_list.append(classification_report(y_test, y_pred_baseline, output_dict=True))

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                736       
_________________________________________________________________
batch_normalization (BatchNo (None, 32)                128       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
flatten (Flatten)            (None, 64)                0         
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
__________________________________________________

2021-10-24 19:17:31.234049: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2021-10-24 19:17:31.234089: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: rockbook
2021-10-24 19:17:31.234097: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: rockbook
2021-10-24 19:17:31.234216: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.74.0
2021-10-24 19:17:31.234256: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.63.1
2021-10-24 19:17:31.234270: E tensorflow/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 470.63.1 does not match DSO version 470.74.0 -- cannot find working devices in this configuration
2021-10-24 19:17:31.234614: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow

Epoch 2/2
 768/3606 [=====>........................] - ETA: 0s - loss: 0.4503 - acc: 0.8294



baseline test loss:  0.07601392850643252
baseline test accuracy:  0.9911308
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 32)                736       
_________________________________________________________________
batch_normalization_2 (Batch (None, 32)                128       
_________________________________________________________________
dense_4 (Dense)              (None, 64)                2112      
_________________________________________________________________
batch_normalization_3 (Batch (None, 64)                256       
_________________________________________________________________
flatten_1 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
____________________________________________



baseline test loss:  0.11181713644819884
baseline test accuracy:  0.98447895
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 32)                736       
_________________________________________________________________
batch_normalization_4 (Batch (None, 32)                128       
_________________________________________________________________
dense_7 (Dense)              (None, 64)                2112      
_________________________________________________________________
batch_normalization_5 (Batch (None, 64)                256       
_________________________________________________________________
flatten_2 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
___________________________________________



baseline test loss:  0.14446693377986453
baseline test accuracy:  0.9600887
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 32)                736       
_________________________________________________________________
batch_normalization_6 (Batch (None, 32)                128       
_________________________________________________________________
dense_10 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_7 (Batch (None, 64)                256       
_________________________________________________________________
flatten_3 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
____________________________________________



baseline test loss:  0.10998074126382361
baseline test accuracy:  0.9778271
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_8 (Batch (None, 32)                128       
_________________________________________________________________
dense_13 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_9 (Batch (None, 64)                256       
_________________________________________________________________
flatten_4 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
____________________________________________



baseline test loss:  0.11718661531376469
baseline test accuracy:  0.9689579
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_10 (Batc (None, 32)                128       
_________________________________________________________________
dense_16 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_11 (Batc (None, 64)                256       
_________________________________________________________________
flatten_5 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 64)                0         
____________________________________________



baseline test loss:  0.1094041842876411
baseline test accuracy:  0.981153
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_12 (Batc (None, 32)                128       
_________________________________________________________________
dense_19 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_13 (Batc (None, 64)                256       
_________________________________________________________________
flatten_6 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 64)                0         
______________________________________________



Epoch 2/2
baseline test loss:  0.10819255258318061
baseline test accuracy:  0.9711752
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_14 (Batc (None, 32)                128       
_________________________________________________________________
dense_22 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_15 (Batc (None, 64)                256       
_________________________________________________________________
flatten_7 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 64)                0         
__________________________________



Epoch 2/2
baseline test loss:  0.13740935906800886
baseline test accuracy:  0.9789357
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_16 (Batc (None, 32)                128       
_________________________________________________________________
dense_25 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_17 (Batc (None, 64)                256       
_________________________________________________________________
flatten_8 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 64)                0         
__________________________________



Epoch 2/2
baseline test loss:  0.1407912797779836
baseline test accuracy:  0.96674055
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_18 (Batc (None, 32)                128       
_________________________________________________________________
dense_28 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_19 (Batc (None, 64)                256       
_________________________________________________________________
flatten_9 (Flatten)          (None, 64)                0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 64)                0         
__________________________________



Epoch 2/2
baseline test loss:  0.11150894459427857
baseline test accuracy:  0.9767184


In [14]:
total_df = pd.DataFrame(baseline_list[0]).transpose()
print('number of runs: {}'.format(len(baseline_list)))
for r_dict in baseline_list[1:]:
    temp = pd.DataFrame(r_dict).transpose()
    total_df = total_df.add(temp)
    
average_10x = total_df/10.0
average_10x

number of runs: 10


Unnamed: 0,precision,recall,f1-score,support
0,0.974654,1.0,0.987143,841.3
1,1.0,0.644718,0.778854,60.7
accuracy,0.975721,0.975721,0.975721,0.975721
macro avg,0.987327,0.822359,0.882999,902.0
weighted avg,0.976414,0.975721,0.972893,902.0


# Augmentation experiment

In [16]:
augment_list =[]
for i in range(10):
    
    X_train, X_test, y_train1, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=40)
    y_train_oh = np.array(tf.keras.utils.to_categorical(y_train1, num_classes=y_dim, dtype='float32'))
    test_y = np.array(tf.keras.utils.to_categorical(y_test, num_classes=y_dim, dtype='float32'))
    
    
    solver = tf.compat.v1.train.AdamOptimizer().minimize(vae_loss)
    sess = tf.compat.v1.Session ()
    sess.run(
    tf.compat.v1.global_variables_initializer())
    
    X_train = np.array(X_train)
    i = 0
    for it in tqdm(range(50000)):
        ind = np.random.choice(X_train.shape[0], mb_size)
        X_mb = np.array(X_train[ind])
        y_mb = np.array(y_train_oh[ind])

        _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb, c: y_mb})

    
    gen_samples, gen_labels = generate_sample()
    x = np.concatenate([X_train, gen_samples])
    y = np.concatenate([y_train1, gen_labels])
    x = np.array(x)
    y_oh = np.array(tf.keras.utils.to_categorical(y, num_classes=y_dim, dtype='float32'))
    
    aug_model = build_model(input_shape=(x_train.shape[1],),num_classes=y_dim)
    batch_size=32
    epochs=2
    hist = aug_model.fit(x, y_oh, batch_size=batch_size, epochs=epochs, 
                            validation_data=(X_test, test_y))
    
    y_pred_aug_oh = aug_model.predict(X_test)
    y_pred_aug = y_pred_aug_oh.argmax(axis=-1)
    augment_list.append(classification_report(y_test, y_pred_aug, output_dict=True) )

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:48<00:00, 1038.79it/s]


(6400, 22)
(6400,)
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_33 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_22 (Batc (None, 32)                128       
_________________________________________________________________
dense_34 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_23 (Batc (None, 64)                256       
_________________________________________________________________
flatten_11 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_35 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:52<00:00, 953.16it/s]


(6400, 22)
(6400,)
Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_36 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_24 (Batc (None, 32)                128       
_________________________________________________________________
dense_37 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_25 (Batc (None, 64)                256       
_________________________________________________________________
flatten_12 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_38 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:51<00:00, 973.72it/s]


(6400, 22)
(6400,)
Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_39 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_26 (Batc (None, 32)                128       
_________________________________________________________________
dense_40 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_27 (Batc (None, 64)                256       
_________________________________________________________________
flatten_13 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_41 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:52<00:00, 946.87it/s]


(6400, 22)
(6400,)
Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_28 (Batc (None, 32)                128       
_________________________________________________________________
dense_43 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_29 (Batc (None, 64)                256       
_________________________________________________________________
flatten_14 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_14 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_44 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:53<00:00, 928.05it/s]


(6400, 22)
(6400,)
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_30 (Batc (None, 32)                128       
_________________________________________________________________
dense_46 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_31 (Batc (None, 64)                256       
_________________________________________________________________
flatten_15 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_47 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:51<00:00, 964.40it/s]


(6400, 22)
(6400,)
Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_48 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_32 (Batc (None, 32)                128       
_________________________________________________________________
dense_49 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_33 (Batc (None, 64)                256       
_________________________________________________________________
flatten_16 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_50 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:52<00:00, 954.95it/s]


(6400, 22)
(6400,)
Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_51 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_34 (Batc (None, 32)                128       
_________________________________________________________________
dense_52 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_35 (Batc (None, 64)                256       
_________________________________________________________________
flatten_17 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_17 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_53 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:54<00:00, 922.38it/s]


(6400, 22)
(6400,)
Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_54 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_36 (Batc (None, 32)                128       
_________________________________________________________________
dense_55 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_37 (Batc (None, 64)                256       
_________________________________________________________________
flatten_18 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_18 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_56 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:57<00:00, 875.06it/s]


(6400, 22)
(6400,)
Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_57 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_38 (Batc (None, 32)                128       
_________________________________________________________________
dense_58 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_39 (Batc (None, 64)                256       
_________________________________________________________________
flatten_19 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_19 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_59 (Dense)             (None



Epoch 2/2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [00:56<00:00, 891.68it/s]


(6400, 22)
(6400,)
Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_60 (Dense)             (None, 32)                736       
_________________________________________________________________
batch_normalization_40 (Batc (None, 32)                128       
_________________________________________________________________
dense_61 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_41 (Batc (None, 64)                256       
_________________________________________________________________
flatten_20 (Flatten)         (None, 64)                0         
_________________________________________________________________
dropout_20 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_62 (Dense)             (None



Epoch 2/2


In [17]:
import pandas as pd
total_df = pd.DataFrame(augment_list[0]).transpose()
print(len(augment_list))
for r_dict in augment_list[1:]:
    temp = pd.DataFrame(r_dict).transpose()
    total_df = total_df.add(temp)
    
average_10x = total_df/10.0
average_10x

10


Unnamed: 0,precision,recall,f1-score,support
0,0.975787,0.995699,0.985634,837.0
1,0.927333,0.681538,0.783574,65.0
accuracy,0.97306,0.97306,0.97306,0.97306
macro avg,0.95156,0.838619,0.884604,902.0
weighted avg,0.972295,0.97306,0.971073,902.0
