# Loading Data & Preprocessing

In [22]:
# Import Basic Libraries and Packages
from tensorflow.keras.layers import Dense, Conv2D, Flatten, InputLayer, MaxPool2D
from tensorflow.keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split
from tensorflow.keras.initializers import Constant
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import Dense
from skimage.feature import hog
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf
import lightgbm as lgb
import pandas as pd
import numpy as np
import np_utils
import random

# Import Datasets and segment them into components
cifar = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/cifar_dataset.npz')
cifar_Xtr = cifar['Xtr']
cifar_Str = cifar['Str'] 
cifar_Xts = cifar['Xts']
cifar_Yts = cifar['Yts']

mnist = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/mnist_dataset.npz')
mnist_Xtr = mnist['Xtr']
mnist_Str = mnist['Str'] 
mnist_Xts = mnist['Xts']
mnist_Yts = mnist['Yts']

# Remove a dimension from the labels
cifar_Str = cifar_Str[:,0]
cifar_Yts = cifar_Yts[:,0]
mnist_Str = mnist_Str[:,0]
mnist_Yts = mnist_Yts[:,0]

# Data exploration
print('Cifar XTR Dimensions: ',cifar_Xtr.shape)
print('Cifar STR Dimensions: ',cifar_Str.shape)
print('Cifar XTS Dimensions: ',cifar_Xts.shape)
print('Cifar YTS Dimensions: ',cifar_Yts.shape)
print()
print('Mnist XTR Dimensions: ',mnist_Xtr.shape)
print('Mnist STR Dimensions: ',mnist_Str.shape)
print('Mnist XTS Dimensions: ',mnist_Xts.shape)
print('Mnist YTS Dimensions: ',mnist_Yts.shape)

# View what the first 10 MNIST train labels look like
mnist_df_train = pd.get_dummies(mnist_Str)
mnist_df_test = pd.get_dummies(mnist_Yts)
cifar_df_train = pd.get_dummies(cifar_Str)
cifar_df_test = pd.get_dummies(cifar_Yts)

# Reshape data into proper images
cifar_train = cifar_Xtr.reshape(10000,32,32,3)
cifar_test = cifar_Xts.reshape(2000,32,32,3)

mnist_train = mnist_Xtr.reshape(10000,28,28)
mnist_test = mnist_Xts.reshape(2000,28,28)

print()
mnist_df_train.head(10)

Cifar XTR Dimensions:  (10000, 3072)
Cifar STR Dimensions:  (10000,)
Cifar XTS Dimensions:  (2000, 3072)
Cifar YTS Dimensions:  (2000,)

Mnist XTR Dimensions:  (10000, 784)
Mnist STR Dimensions:  (10000,)
Mnist XTS Dimensions:  (2000, 784)
Mnist YTS Dimensions:  (2000,)



Unnamed: 0,0,1
0,1,0
1,1,0
2,0,1
3,1,0
4,1,0
5,1,0
6,1,0
7,1,0
8,0,1
9,1,0


In [128]:
# View what the first 10 MNIST test labels look like
mnist_df_test.head(10)

Unnamed: 0,0,1
0,0,1
1,1,0
2,0,1
3,1,0
4,1,0
5,0,1
6,1,0
7,1,0
8,0,1
9,1,0


In [129]:
# View what the first 10 CIFAR training labels look like
cifar_df_train.head(10)

Unnamed: 0,0,1
0,1,0
1,1,0
2,0,1
3,0,1
4,1,0
5,1,0
6,1,0
7,1,0
8,1,0
9,0,1


In [130]:
# View what the first 10 CIFAR test labels look like
cifar_df_test.head(10)

Unnamed: 0,0,1
0,0,1
1,0,1
2,0,1
3,0,1
4,0,1
5,1,0
6,0,1
7,0,1
8,1,0
9,1,0


# Functions

In [23]:
# Initialize histogram of oriented gradients function
def transform(image, active=False):
    return hog(image, 
               orientations = 9, 
               pixels_per_cell = (4, 4), 
               cells_per_block = (1, 1), 
               visualise = active, 
               transform_sqrt = True,
               block_norm='L2-Hys')

# Create labels out of the probabilities given
def probConvert(predictedProb):
    for i in range(0, predictedProb.shape[0]):
        if predictedProb[i] >= 0.5:
            predictedProb[i] = 1
        else:
            predictedProb[i] = 0
    return predictedProb

# Assign labels depending on the value
def assignLabel(XLabel, YLabel, TransformedLabel):
    for i in range(0, XLabel.shape[0]):
        if XLabel[i] == YLabel[i]:
            TransformedLabel[i] = XLabel[i]
        else:
            TransformedLabel[i] = random.randint(0,1)
    return TransformedLabel

# Use this function to obtain the beta coefficient 
def estimateBeta(train_labels,prob,rho0,rho1):
    n = len(train_labels)
    beta = np.zeros((n,1))
    for i in range(n):
        if train_labels[i].any()==1:
            beta[i] = (prob[i][1]-rho0)/((1-rho0-rho1)*prob[i][1]+1e-5)
        else:
            beta[i] = (prob[i][0]-rho1)/((1-rho0-rho1)*(prob[i][0])+1e-5)
    return beta

# Normalize weights
def normalise(x):
    z = (x - min(x)) / (max(x) - min(x))
    return z

# Model Parameters Light Gradient Boosting

In [24]:
# Prepare parameters for light gradient boosting
params = {}
params['learning_rate'] = 0.003
params['boosting_type'] = 'gbdt'
params['objective'] = 'binary'
params['metric'] = 'binary_logloss'
params['sub_feature'] = 0.5
params['num_leaves'] = 10
params['min_data'] = 50
params['max_depth'] = 10

# Data Transformation

In [25]:
# Transform mnist train data into HOG features
mnist_hog_train = []
for i in range(mnist_train.shape[0]): 
    features_train = transform(mnist_train[i,:,:], active=False)
    mnist_hog_train.append(np.array(features_train).flatten())
mnist_hog_train = np.array(mnist_hog_train)
print("MNIST Features Train Dimension:", mnist_hog_train.shape)

# Transform mnist test data into HOG features
mnist_hog_test = []
for i in range(mnist_test.shape[0]): 
    features_test = transform(mnist_test[i,:,:], active=False)
    mnist_hog_test.append(np.array(features_test).flatten())
mnist_hog_test = np.array(mnist_hog_test)
print("MNIST Features Test Dimension:", mnist_hog_test.shape)

# Transform cifar train data into HOG features
cifar_hog_train = []
for i in range(cifar_train.shape[0]): 
    features_train = transform(cifar_train[i,:,:], active=False)
    cifar_hog_train.append(np.array(features_train).flatten())
cifar_hog_train = np.array(cifar_hog_train)
print("CIFAR Features Train Dimension:", cifar_hog_train.shape)

# Transform cifar test data into HOG features
cifar_hog_test = []
for i in range(cifar_test.shape[0]): 
    features_test = transform(cifar_test[i,:,:], active=False)
    cifar_hog_test.append(np.array(features_test).flatten())
cifar_hog_test = np.array(cifar_hog_test)
print("CIFAR Features Test Dimension:", cifar_hog_test.shape)

/Users/joshhuang/ve/ve_01/lib/python3.6/site-packages/skimage/feature/_hog.py:248: skimage_deprecation: Argument `visualise` is deprecated and will be changed to `visualize` in v0.16
  'be changed to `visualize` in v0.16', skimage_deprecation)


MNIST Features Train Dimension: (10000, 441)
MNIST Features Test Dimension: (2000, 441)
CIFAR Features Train Dimension: (10000, 576)
CIFAR Features Test Dimension: (2000, 576)


# Initial Classifiers

In [57]:
# Train initial classifier on MNIST to compare performance accuracy at the end of the algorithm
initLabel = mnist_Str
initTrain = mnist_Xtr
firstTrain = lgb.Dataset(initTrain, label=initLabel)
initClf = lgb.train(params, firstTrain, 1000)
initPredict = initClf.predict(mnist_Xts)
initConvert = probConvert(initPredict)
initAcc = accuracy_score(initConvert, mnist_Yts)
print('MNIST LGBM Initial Accuracy: ',initAcc*100)

initLabel = cifar_Str
initTrain = cifar_Xtr
firstTrain = lgb.Dataset(initTrain, label=initLabel)
initClf = lgb.train(params, firstTrain, 1000)
initPredict = initClf.predict(cifar_Xts)
initConvert = probConvert(initPredict)
initAcc = accuracy_score(initConvert, cifar_Yts)
print('CIFAR LGBM Initial Accuracy: ',initAcc*100)

MNIST LGBM Initial Accuracy:  88.9
CIFAR LGBM Initial Accuracy:  72.45


## MNIST Iterative Cross Learning LGBM

In [29]:
# Set the number of epochs
epochs = 10
clfX_accuracy_average = []
clfY_accuracy_average = []

# Implement the iterative cross-learning method
for i in range(epochs):
    
    # Extract 80% of the data
    initial80, initial20, initial80Label, initial20Label = train_test_split(mnist_hog_train, mnist_Str, test_size=0.2, shuffle = True)
    
    # Split into another two sets of training data
    modelXTrain, modelYTrain, modelXLabel, modelYLabel = train_test_split(initial80, initial80Label, test_size=0.5, shuffle = True)
    
    # Prepare data for light gradient boosting classification
    Xtrain = lgb.Dataset(modelXTrain, label=modelXLabel)
    Ytrain = lgb.Dataset(modelYTrain, label=modelYLabel)
    
    # Train the lgb classifier
    clfX = lgb.train(params, Xtrain, 1000)
    clfY = lgb.train(params, Ytrain, 1000)
    
    # Produce a list of probabilities
    predictedX = clfX.predict(mnist_hog_train)
    predictedY = clfY.predict(mnist_hog_train)
    
    # Convert the probabilities to a list of labels
    XLabel = probConvert(predictedX)
    YLabel = probConvert(predictedY)
    
    # Print the accuracy for each classifier on the test set
    testPredictX = clfX.predict(mnist_hog_test)
    testXLabel = probConvert(testPredictX)
    testPredictY = clfY.predict(mnist_hog_test)
    textYLabel = probConvert(testPredictY)
    testAccX = accuracy_score(testPredictX, mnist_Yts)
    testAccY = accuracy_score(testPredictY, mnist_Yts)
    clfX_accuracy_average.append(testAccX)
    clfY_accuracy_average.append(testAccY)
    
    # Assign the labels
    mnist_Str = assignLabel(XLabel, YLabel, mnist_Str)
    
    print('Epoch', i+1, 'Classifier X Accuracy: ', np.round(testAccX*100,2))
    print('Epoch', i+1, 'Classifier Y Accuracy: ', np.round(testAccY*100,2))  
    print()
    
    if i == 9:
        print('Final Classifier X Average Accuracy: ', np.mean(clfX_accuracy_average))
        print('Final Classifier X Standard Deviation: ', np.std(clfX_accuracy_average))
        print('Final Classifier Y Average Accuracy: ', np.mean(clfY_accuracy_average))
        print('Final Classifier Y Standard Deviation ', np.std(clfY_accuracy_average))

Epoch 1 Classifier X Accuracy:  86.3
Epoch 1 Classifier Y Accuracy:  82.35

Epoch 2 Classifier X Accuracy:  87.2
Epoch 2 Classifier Y Accuracy:  86.55

Epoch 3 Classifier X Accuracy:  87.05
Epoch 3 Classifier Y Accuracy:  87.25

Epoch 4 Classifier X Accuracy:  88.35
Epoch 4 Classifier Y Accuracy:  87.0

Epoch 5 Classifier X Accuracy:  87.2
Epoch 5 Classifier Y Accuracy:  87.6

Epoch 6 Classifier X Accuracy:  86.2
Epoch 6 Classifier Y Accuracy:  87.6

Epoch 7 Classifier X Accuracy:  85.85
Epoch 7 Classifier Y Accuracy:  86.95

Epoch 8 Classifier X Accuracy:  85.85
Epoch 8 Classifier Y Accuracy:  86.2

Epoch 9 Classifier X Accuracy:  85.7
Epoch 9 Classifier Y Accuracy:  86.05

Epoch 10 Classifier X Accuracy:  86.05
Epoch 10 Classifier Y Accuracy:  86.15

Final Classifier X Average Accuracy:  0.8657499999999999
Final Classifier X Standard Deviation:  0.008041299646201462
Final Classifier Y Average Accuracy:  0.8637
Final Classifier Y Standard Deviation  0.014458907289280199


## CIFAR Iterative Cross Learning LGBM

In [28]:
# Set the number of epochs
epochs = 10
clfX_accuracy_average = []
clfY_accuracy_average = []

# Implement the iterative cross-learning method
for i in range(epochs):
    
    # Extract 80% of the data
    initial80, initial20, initial80Label, initial20Label = train_test_split(cifar_hog_train, cifar_Str, test_size=0.2, shuffle = True)
    
    # Split into another two sets of training data
    modelXTrain, modelYTrain, modelXLabel, modelYLabel = train_test_split(initial80, initial80Label, test_size=0.5, shuffle = True)
    
    # Prepare data for light gradient boosting classification
    Xtrain = lgb.Dataset(modelXTrain, label=modelXLabel)
    Ytrain = lgb.Dataset(modelYTrain, label=modelYLabel)
    
    # Train the lgb classifier
    clfX = lgb.train(params, Xtrain, 1000)
    clfY = lgb.train(params, Ytrain, 1000)
    
    # Produce a list of probabilities
    predictedX = clfX.predict(cifar_hog_train)
    predictedY = clfY.predict(cifar_hog_train)
    
    # Convert the probabilities to a list of labels
    XLabel = probConvert(predictedX)
    YLabel = probConvert(predictedY)
    
    # Print the accuracy for each classifier on the test set
    testPredictX = clfX.predict(cifar_hog_test)
    testXLabel = probConvert(testPredictX)
    testPredictY = clfY.predict(cifar_hog_test)
    textYLabel = probConvert(testPredictY)
    testAccX = accuracy_score(testPredictX, cifar_Yts)
    testAccY = accuracy_score(testPredictY, cifar_Yts)
    clfX_accuracy_average.append(testAccX)
    clfY_accuracy_average.append(testAccY)
    
    # Assign the labels
    cifar_Str = assignLabel(XLabel, YLabel, cifar_Str)
    print('Epoch', i+1, 'Classifier X Accuracy: ', np.round(testAccX*100,2))
    print('Epoch', i+1, 'Classifier Y Accuracy: ', np.round(testAccY*100,2))  
    print()
    
    if i == 9:
        print('Final Classifier X Average Accuracy: ', np.mean(clfX_accuracy_average))
        print('Final Classifier X Standard Deviation', np.std(clfX_accuracy_average))
        print('Final Classifier Y Average Accuracy: ', np.mean(clfY_accuracy_average))
        print('Final Classifier Y Standard Deviation', np.std(clfY_accuracy_average))

Epoch 1 Classifier X Accuracy:  70.15
Epoch 1 Classifier Y Accuracy:  66.85

Epoch 2 Classifier X Accuracy:  62.15
Epoch 2 Classifier Y Accuracy:  61.85

Epoch 3 Classifier X Accuracy:  58.95
Epoch 3 Classifier Y Accuracy:  59.15

Epoch 4 Classifier X Accuracy:  57.1
Epoch 4 Classifier Y Accuracy:  57.8

Epoch 5 Classifier X Accuracy:  56.45
Epoch 5 Classifier Y Accuracy:  56.65

Epoch 6 Classifier X Accuracy:  56.0
Epoch 6 Classifier Y Accuracy:  55.9

Epoch 7 Classifier X Accuracy:  55.1
Epoch 7 Classifier Y Accuracy:  55.6

Epoch 8 Classifier X Accuracy:  54.75
Epoch 8 Classifier Y Accuracy:  54.8

Epoch 9 Classifier X Accuracy:  54.8
Epoch 9 Classifier Y Accuracy:  54.2

Epoch 10 Classifier X Accuracy:  53.7
Epoch 10 Classifier Y Accuracy:  54.2

Final Classifier X Average Accuracy:  0.57915
Final Classifier X Standard Deviation 0.046967036312716176
Final Classifier Y Average Accuracy:  0.577
Final Classifier Y Standard Deviation 0.03804208196195364


## MNIST Importance Reweighting LGBM

In [27]:
# Set the number of epochs
epochs = 10
rho1 = 0.2
rho2 = 0.4
initial_accuracy_average = []
final_accuracy_average = []


for i in range(epochs):

    # Split into two sets of training data
    modelXTrain, modelYTrain, modelXLabel, modelYLabel = train_test_split(mnist_hog_train, mnist_Str, test_size=0.2, shuffle = True)

    # Prepare data for light gradient boosting classification
    XTrain = lgb.Dataset(modelXTrain, label=modelXLabel)

    # Train the lgb classifier
    clfX = lgb.train(params, XTrain, 1000)

    # Produce a list of probabilities: one on the training set the other on the test
    predictedX = clfX.predict(mnist_hog_train)
    predictedY = clfX.predict(mnist_hog_test)

    # Convert the probabilities to a list of labels
    YLabel = probConvert(predictedY)

    # Calculate the accuracy on test
    initial_accuracy = accuracy_score(YLabel, mnist_Yts)
    initial_accuracy_average.append(initial_accuracy)
    print('Epoch',i+1,'Classifier Accuracy Before IR: ',initial_accuracy)

    # Create an array to feed in for estimateBeta
    probX = np.expand_dims(predictedX, axis=1)
    inverseX = np.expand_dims(1-predictedX, axis=1)
    totalprobX = np.concatenate((inverseX, probX), axis=1)

    # Obtain weights for importance reweighting
    weights = estimateBeta(modelXLabel, totalprobX, rho1, rho2)

    # Pass the weights through a non-linear activation function
    for j in range(len(weights)):
        if weights[j] < 0:
            weights[j] = 0.0

    # Normalize the weights           
    weights = normalise(weights)

    # Remove a dimension from the weight array
    weights = weights[:,0]

    # Retrain the model with the new weights
    newTrain = lgb.Dataset(modelXTrain, label=modelXLabel, weight=weights)
    clfZ = lgb.train(params, newTrain, 1000)
    predictedZ = clfZ.predict(mnist_hog_test)
    Zlabel = probConvert(predictedZ)

    # Print the final accuracy in each epoch
    final_accuracy = accuracy_score(Zlabel, mnist_Yts)
    final_accuracy_average.append(final_accuracy)
    print('Epoch',i+1,'Classifier Accuracy After IR: ',final_accuracy)
    print()
    
    # Print the average scores
    if i == 9:
        print('Before IR Average Accuracy', np.mean(initial_accuracy_average))
        print('Before IR Standard Deviation', np.std(initial_accuracy_average))
        print('After IR Average Accuracy', np.mean(final_accuracy_average))
        print('After IR Average Accuracy', np.std(final_accuracy_average))

Epoch 1 Classifier Accuracy Before IR:  0.864
Epoch 1 Classifier Accuracy After IR:  0.9235

Epoch 2 Classifier Accuracy Before IR:  0.858
Epoch 2 Classifier Accuracy After IR:  0.9275

Epoch 3 Classifier Accuracy Before IR:  0.8575
Epoch 3 Classifier Accuracy After IR:  0.917

Epoch 4 Classifier Accuracy Before IR:  0.859
Epoch 4 Classifier Accuracy After IR:  0.9175

Epoch 5 Classifier Accuracy Before IR:  0.846
Epoch 5 Classifier Accuracy After IR:  0.9145

Epoch 6 Classifier Accuracy Before IR:  0.866
Epoch 6 Classifier Accuracy After IR:  0.927

Epoch 7 Classifier Accuracy Before IR:  0.856
Epoch 7 Classifier Accuracy After IR:  0.9205

Epoch 8 Classifier Accuracy Before IR:  0.8705
Epoch 8 Classifier Accuracy After IR:  0.91

Epoch 9 Classifier Accuracy Before IR:  0.861
Epoch 9 Classifier Accuracy After IR:  0.9225

Epoch 10 Classifier Accuracy Before IR:  0.864
Epoch 10 Classifier Accuracy After IR:  0.93

Before IR Average Accuracy 0.8602000000000001
Before IR Standard Deviati

## CIFAR Importance Reweighting LGBM

In [26]:
# Set the number of epochs
epochs = 10
rho1 = 0.2
rho2 = 0.4
initial_accuracy_average = []
final_accuracy_average = []

for i in range(epochs):

    # Split into two sets of training data
    modelXTrain, modelYTrain, modelXLabel, modelYLabel = train_test_split(cifar_hog_train, cifar_Str, test_size=0.2, shuffle = True)

    # Prepare data for light gradient boosting classification
    XTrain = lgb.Dataset(modelXTrain, label=modelXLabel)

    # Train the lgb classifier
    clfX = lgb.train(params, XTrain, 1000)

    # Produce a list of probabilities: one on the training set the other on the test
    predictedX = clfX.predict(cifar_hog_train)
    predictedY = clfX.predict(cifar_hog_test)

    # Convert the probabilities to a list of labels
    YLabel = probConvert(predictedY)

    # Calculate the accuracy on test
    initial_accuracy = accuracy_score(YLabel, cifar_Yts)
    initial_accuracy_average.append(initial_accuracy)
    print('Epoch',i+1,'Classifier Accuracy Before IR: ',initial_accuracy)

    # Create an array to feed in for estimateBeta
    probX = np.expand_dims(predictedX, axis=1)
    inverseX = np.expand_dims(1-predictedX, axis=1)
    totalprobX = np.concatenate((inverseX, probX), axis=1)

    # Obtain weights for importance reweighting
    weights = estimateBeta(modelXLabel, totalprobX, rho1, rho2)

    # Pass the weights through a non-linear activation function
    for j in range(len(weights)):
        if weights[j] < 0:
            weights[j] = 0.0

    # Normalize the weights           
    weights = normalise(weights)

    # Remove a dimension from the weight array
    weights = weights[:,0]

    # Retrain the model with the new weights
    newTrain = lgb.Dataset(modelXTrain, label=modelXLabel, weight=weights)
    clfZ = lgb.train(params, newTrain, 1000)
    predictedZ = clfZ.predict(cifar_hog_test)
    Zlabel = probConvert(predictedZ)

    # Print the final accuracy in each epoch
    final_accuracy = accuracy_score(Zlabel, cifar_Yts)
    final_accuracy_average.append(final_accuracy)
    print('Epoch',i+1,'Classifier Accuracy After IR: ',final_accuracy)
    print()
    
    # Print the average scores
    if i == 9:
        print('Before IR Average Accuracy', np.mean(initial_accuracy_average))
        print('Before IR Standard Deviation', np.std(initial_accuracy_average))
        print('After IR Average Accuracy', np.mean(final_accuracy_average))
        print('After IR Standard Deviation', np.std(final_accuracy_average))

Epoch 1 Classifier Accuracy Before IR:  0.6835
Epoch 1 Classifier Accuracy After IR:  0.823

Epoch 2 Classifier Accuracy Before IR:  0.6775
Epoch 2 Classifier Accuracy After IR:  0.8235

Epoch 3 Classifier Accuracy Before IR:  0.663
Epoch 3 Classifier Accuracy After IR:  0.8115

Epoch 4 Classifier Accuracy Before IR:  0.665
Epoch 4 Classifier Accuracy After IR:  0.8235

Epoch 5 Classifier Accuracy Before IR:  0.671
Epoch 5 Classifier Accuracy After IR:  0.8195

Epoch 6 Classifier Accuracy Before IR:  0.683
Epoch 6 Classifier Accuracy After IR:  0.83

Epoch 7 Classifier Accuracy Before IR:  0.691
Epoch 7 Classifier Accuracy After IR:  0.8275

Epoch 8 Classifier Accuracy Before IR:  0.674
Epoch 8 Classifier Accuracy After IR:  0.8345

Epoch 9 Classifier Accuracy Before IR:  0.679
Epoch 9 Classifier Accuracy After IR:  0.836

Epoch 10 Classifier Accuracy Before IR:  0.69
Epoch 10 Classifier Accuracy After IR:  0.8325

Before IR Average Accuracy 0.6777000000000001
Before IR Standard Deviat

# Convolutional Neural Network

## MNIST Iterative Cross Learning CNN

In [19]:
# Prepare the dataset
dataset = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/mnist_dataset.npz') 
Xtr = dataset["Xtr"] 
Str = dataset["Str"] 
Xts = dataset["Xts"] 
Yts = dataset["Yts"] 

# Create initial model
modelZ = Sequential()
modelZ.add(InputLayer(input_shape=(28, 28, 1)))
modelZ.add(BatchNormalization())
modelZ.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelZ.add(MaxPool2D(padding='same'))
modelZ.add(Flatten())
modelZ.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelZ.add(Dense(2, activation='softmax'))
modelZ.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create model 1
modelX = Sequential()
modelX.add(InputLayer(input_shape=(28, 28, 1)))
modelX.add(BatchNormalization())
modelX.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelX.add(MaxPool2D(padding='same'))
modelX.add(Flatten())
modelX.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelX.add(Dense(2, activation='softmax'))
modelX.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create model 2
modelY = Sequential()
modelY.add(InputLayer(input_shape=(28, 28, 1)))
modelY.add(BatchNormalization())
modelY.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelY.add(MaxPool2D(padding='same'))
modelY.add(Flatten())
modelY.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelY.add(Dense(2, activation='softmax'))
modelY.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Sample randomly and average results 10 times
classifier_X_results = []
classifier_Y_results = []
repeat_experiment = 10
batch_size = 75

for i in range(repeat_experiment):
    
    # Perform first initial split of 80% training data to 20% validation data
    initial80, initial20, initial80Label, initial20Label = train_test_split(Xtr, Str, test_size = 0.2)
    
    # Perform the next split so that each classifier receives half of the training data
    X_train, X_val, Y_train, Y_val = train_test_split(initial80, initial80Label, test_size = 0.5)   

    # Encode class values as integers
    y_train = to_categorical(Y_train)
    y_val = to_categorical(Y_val)
    y_test = to_categorical(Yts)
    i80label = to_categorical(initial80Label)
    i20label = to_categorical(initial20Label)

    # Reshape data to fit model
    i80train = initial80.reshape(8000,28,28,1)
    i20train = initial20.reshape(2000,28,28,1)
    x_train = X_train.reshape(4000,28,28,1)
    x_val = X_val.reshape(4000,28,28,1)
    x_test = Xts.reshape(2000,28,28,1)
    
    # Train comparison model
    if i == 0:
        modelZ.fit(i80train,i80label,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))
        results_classesZ = modelZ.predict_classes(x_test)
        Z_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesZ)
        print()
        print('Initial Classifier Accuracy', np.round(Z_accuracy*100,2))
        print()

    # Train 2 classifiers, each with a different partiion of training data    
    modelX.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))
    modelY.fit(x_val,y_val,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))

    # Predict classes using the 2 classifiers
    results_classesX = modelX.predict_classes(x_test)
    results_classesY = modelY.predict_classes(x_test)
    
    # Get a list of accuracies by comparing with test data
    X_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesX)
    Y_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesY)
    
    # Append the list of accuracies
    classifier_X_results.append(X_accuracy)
    classifier_Y_results.append(Y_accuracy)
    
    # Reassign labels of the training data
    Str = assignLabel(results_classesX, results_classesY, Str)
    
    print()
    print('Epoch', i+1, 'Classifier X Accuracy: ', np.round(X_accuracy*100,2))
    print('Epoch', i+1, 'Classifier Y Accuracy: ', np.round(Y_accuracy*100,2))  
    print()
    
    if i == 9:
        print('Final Classifier X Average Accuracy: ', np.mean(classifier_X_results))
        print('Final Classifier X Standard Deviation', np.std(classifier_X_results))
        print('Final Classifier Y Average Accuracy: ', np.mean(classifier_Y_results))
        print('Final Classifier Y Standard Deviation', np.std(classifier_Y_results))

Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Initial Classifier Accuracy 90.7

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 1 Classifier X Accuracy:  91.5
Epoch 1 Classifier Y Accuracy:  93.15

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 2 Classifier X Accuracy:  91.25
Epoch 2 Classifier Y Accuracy:  65.9

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 3 Classifier X Accuracy:  73.05
Epoch 3 Classifier Y Accuracy:  88.65

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 4 Classifier X Accuracy:

## CIFAR Iterative Cross Learning CNN

In [20]:
# Prepare the dataset
dataset = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/cifar_dataset.npz') 
Xtr = dataset["Xtr"] 
Str = dataset["Str"] 
Xts = dataset["Xts"] 
Yts = dataset["Yts"] 

# Create initial model
modelZ = Sequential()
modelZ.add(InputLayer(input_shape=(32, 32, 3)))
modelZ.add(BatchNormalization())
modelZ.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelZ.add(MaxPool2D(padding='same'))
modelZ.add(Flatten())
modelZ.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelZ.add(Dense(2, activation='softmax'))
modelZ.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create model 1
modelX = Sequential()
modelX.add(InputLayer(input_shape=(32, 32, 3)))
modelX.add(BatchNormalization())
modelX.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelX.add(MaxPool2D(padding='same'))
modelX.add(Flatten())
modelX.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelX.add(Dense(2, activation='softmax'))
modelX.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create model 2
modelY = Sequential()
modelY.add(InputLayer(input_shape=(32, 32, 3)))
modelY.add(BatchNormalization())
modelY.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
modelY.add(MaxPool2D(padding='same'))
modelY.add(Flatten())
modelY.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
modelY.add(Dense(2, activation='softmax'))
modelY.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Sample randomly and average results 10 times
classifier_X_results = []
classifier_Y_results = []
repeat_experiment = 10
batch_size = 75

for i in range(repeat_experiment):
    
    # Perform first initial split of 80% training data to 20% validation data
    initial80, initial20, initial80Label, initial20Label = train_test_split(Xtr, Str, test_size = 0.2)
    
    # Perform the next split so that each classifier receives half of the training data
    X_train, X_val, Y_train, Y_val = train_test_split(initial80, initial80Label, test_size = 0.5)   

    # Encode class values as integers
    y_train = to_categorical(Y_train)
    y_val = to_categorical(Y_val)
    y_test = to_categorical(Yts)
    i80label = to_categorical(initial80Label)
    i20label = to_categorical(initial20Label)

    # Reshape data to fit model
    i80train = initial80.reshape(8000,32,32,3)
    i20train = initial20.reshape(2000,32,32,3)
    x_train = X_train.reshape(4000,32,32,3)
    x_val = X_val.reshape(4000,32,32,3)
    x_test = Xts.reshape(2000,32,32,3)
    
    # Train comparison model
    if i == 0:
        modelZ.fit(i80train,i80label,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))
        results_classesZ = modelZ.predict_classes(x_test)
        Z_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesZ)
        print()
        print('Initial Classifier Accuracy', np.round(Z_accuracy*100,2))
        print()

    # Train 2 classifiers, each with a different partiion of training data    
    modelX.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))
    modelY.fit(x_val,y_val,epochs=3,batch_size=batch_size,validation_data=(i20train, i20label))

    # Predict classes using the 2 classifiers
    results_classesX = modelX.predict_classes(x_test)
    results_classesY = modelY.predict_classes(x_test)
    
    # Get a list of accuracies by comparing with test data
    X_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesX)
    Y_accuracy = accuracy_score(y_test[:,1].flatten(), results_classesY)
    
    # Append the list of accuracies
    classifier_X_results.append(X_accuracy)
    classifier_Y_results.append(Y_accuracy)
    
    # Reassign labels of the training data
    Str = assignLabel(results_classesX, results_classesY, Str)
    
    print()
    print('Epoch', i+1, 'Classifier X Accuracy: ', np.round(X_accuracy*100,2))
    print('Epoch', i+1, 'Classifier Y Accuracy: ', np.round(Y_accuracy*100,2))  
    print()
    
    if i == 9:
        print('Final Classifier X Average Accuracy: ', np.mean(classifier_X_results))
        print('Final Classifier X Standard Deviation', np.std(classifier_X_results))
        print('Final Classifier Y Average Accuracy: ', np.mean(classifier_Y_results))
        print('Final Classifier Y Standard Deviation', np.std(classifier_Y_results))

Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Initial Classifier Accuracy 82.95

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 1 Classifier X Accuracy:  71.15
Epoch 1 Classifier Y Accuracy:  81.3

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 2 Classifier X Accuracy:  79.25
Epoch 2 Classifier Y Accuracy:  67.0

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 3 Classifier X Accuracy:  78.3
Epoch 3 Classifier Y Accuracy:  65.75

Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 4000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Epoch 4 Classifier X Accuracy:

## MNIST Importance Reweighting CNN

In [69]:
def estimateBeta(train_labels,prob,rho0,rho1):
    n = len(train_labels)
    beta = np.zeros((n,1))
    for i in range(n):
        if train_labels[:,1][i].any()==1:
            beta[i] = (prob[i][1]-rho0)/((1-rho0-rho1)*prob[i][1]+1e-5)
        else:
            beta[i] = (prob[i][0]-rho1)/((1-rho0-rho1)*(prob[i][0])+1e-5)
    return beta

# Prepare the dataset
dataset = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/mnist_dataset.npz') 

Xtr = dataset["Xtr"] 
Str = dataset["Str"] 
Xts = dataset["Xts"] 
Yts = dataset["Yts"] 

# Create CNN model
model = Sequential()

# Add model layers
model.add(InputLayer(input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
model.add(MaxPool2D(padding='same'))
model.add(Flatten())
model.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Sample randomly and average results 10 times
accuracy_results = []
accuracy_IR = []
repeat_experiment = 10

for i in range(repeat_experiment):

    # Partition data into 80% and 20% training to validation split
    X_train, X_val, Y_train, Y_val = train_test_split(Xtr, Str, test_size = 0.2)
    batch_size = 75

    # Encode class values as integers
    y_train = to_categorical(Y_train)
    y_val = to_categorical(Y_val)
    y_test = to_categorical(Yts)

    # Reshape data to fit model
    x_train = X_train.reshape(8000,28,28,1)
    x_val = X_val.reshape(2000,28,28,1)
    x_test = Xts.reshape(2000,28,28,1)

    # Train initial model and produce an initial set of predictions
    model.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(x_val, y_val))
    results_classes = model.predict_classes(x_test)
    accuracy = accuracy_score(y_test[:,1].flatten(), results_classes)
    accuracy_results.append(accuracy)
    print('Epoch',i+1,'Classifier Accuracy Before IR: ', accuracy)
    
    # Apply importance reweighting
    rho0 = 0.2
    rho1 = 0.4
    prob = model.predict(x_train)
    weights = estimateBeta(y_train, prob, rho0, rho1)

    # Apply non-linear activation function and normalize the weights
    for j in range(len(weights)):
        if weights[j] < 0:
            weights[j] = 0.0

    weights = normalise(weights)

    # Retrain model with new IR weights produce a new set of predictions
    model.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(x_val, y_val), sample_weight = weights.flatten())    
    results_final_importance_weighting = model.predict_classes(x_test)
    accuracy_final_importance_weighting = accuracy_score(y_test[:,1].flatten(), results_final_importance_weighting)
    accuracy_IR.append(accuracy_final_importance_weighting)
    print('Epoch',i+1,'Classifier Accuracy After IR: ',accuracy_final_importance_weighting)

# Prediction Results
accuracy_without_IR = np.mean(accuracy_results)
std_without_IR = np.std(accuracy_results)

accuracy_with_IR = np.mean(accuracy_IR)
std_with_IR = np.std(accuracy_IR)

print()
print('Average Accuracy No IR: ',accuracy_without_IR)
print('Average Standard Deviation No IR: ',std_without_IR)
print('Average Accuracy With IR: ',accuracy_with_IR)
print('Average Standard Deviation With IR: ',std_with_IR)

Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 

Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Average Accuracy No IR:  0.7798
Average Standard Deviation No IR:  0.07421866342100214
Average Accuracy With IR:  0.8033000000000001
Average Standard Deviation With IR:  0.08321844747410276


## CIFAR Importance Reweighting CNN

In [70]:
def estimateBeta(train_labels,prob,rho0,rho1):
    n = len(train_labels)
    beta = np.zeros((n,1))
    for i in range(n):
        if train_labels[:,1][i].any()==1:
            beta[i] = (prob[i][1]-rho0)/((1-rho0-rho1)*prob[i][1]+1e-5)
        else:
            beta[i] = (prob[i][0]-rho1)/((1-rho0-rho1)*(prob[i][0])+1e-5)
    return beta

# Prepare the dataset
dataset = np.load('/Users/joshhuang/Desktop/University/Master of Data Science/COMP5328/cifar_dataset.npz') 
Xtr = dataset["Xtr"] 
Str = dataset["Str"] 
Xts = dataset["Xts"] 
Yts = dataset["Yts"] 

# Create CNN model
model = Sequential()

# Add model layers
model.add(InputLayer(input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32, (2, 2), padding='same', bias_initializer=Constant(0.01), kernel_initializer='random_uniform'))
model.add(MaxPool2D(padding='same'))
model.add(Flatten())
model.add(Dense(128,activation='relu',bias_initializer=Constant(0.01), kernel_initializer='random_uniform',))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Sample randomly and average results 10 times
accuracy_results = []
accuracy_IR = []
repeat_experiment = 10

for i in range(repeat_experiment):

    # Partition data into 80% and 20% training to validation split
    X_train, X_val, Y_train, Y_val = train_test_split(Xtr, Str, test_size = 0.2)
    batch_size = 75

    # Encode class values as integers
    y_train = to_categorical(Y_train)
    y_val = to_categorical(Y_val)
    y_test = to_categorical(Yts)

    # Reshape data to fit model
    x_train = X_train.reshape(8000,32,32,3)
    x_val = X_val.reshape(2000,32,32,3)
    x_test = Xts.reshape(2000,32,32,3)

    # Train initial model and produce initial set of predictions
    model.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(x_val, y_val))
    results_classes = model.predict_classes(x_test)
    accuracy = accuracy_score(y_test[:,1].flatten(), results_classes)
    accuracy_results.append(accuracy)
    print('Epoch',i+1,'Classifier Accuracy Before IR: ', accuracy)
    
    # Apply importance reweighting
    rho0 = 0.2
    rho1 = 0.4
    prob = model.predict(x_train)
    weights = estimateBeta(y_train, prob, rho0, rho1)

    # Apply non-linear activation function and normalize the weights
    for j in range(len(weights)):
        if weights[j] < 0:
            weights[j] = 0.0
        
    weights = normalise(weights)
    
    # Retrain model with new IR weights and create a new set of predictions
    model.fit(x_train,y_train,epochs=3,batch_size=batch_size,validation_data=(x_val, y_val), sample_weight = weights.flatten())    
    results_final_importance_weighting = model.predict_classes(x_test)
    accuracy_final_importance_weighting = accuracy_score(y_test[:,1].flatten(), results_final_importance_weighting)
    accuracy_IR.append(accuracy_final_importance_weighting)
    print('Epoch',i+1,'Classifier Accuracy After IR: ',accuracy_final_importance_weighting)

# Prediction results    
accuracy_without_IR = np.mean(accuracy_results)
std_without_IR = np.std(accuracy_results)

accuracy_with_IR = np.mean(accuracy_IR)
std_with_IR = np.std(accuracy_IR)

print()
print('Average Accuracy No IR: ',accuracy_without_IR)
print('Average Standard Deviation No IR: ',std_without_IR)
print('Average Accuracy With IR: ',accuracy_with_IR)
print('Average Standard Deviation With IR: ',std_with_IR)

Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 

Epoch 2/3
Epoch 3/3
Train on 8000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

Average Accuracy No IR:  0.7222
Average Standard Deviation No IR:  0.02620038167660922
Average Accuracy With IR:  0.7356
Average Standard Deviation With IR:  0.04546141220859731


## Noise Rate Estimation

This was our attempt to estimate the noise rate using the paper. Although unsucessful, we have outlined our method here.

In [None]:
pd.to_numeric(pd.DataFrame(prob*100).iloc[:,0])
counts = pd.DataFrame(y_train).iloc[:,1].value_counts()
counts[0]/(counts[0]+counts[1])
counts[1]/(counts[0]+counts[1])

left = pd.DataFrame(prob)
right_ytrain = pd.DataFrame(y_train).iloc[:,1]
right_predicted = pd.DataFrame(model.predict_classes(x_train))
table = pd.concat((left, right_predicted), axis = 1)
table.columns = ('0','1', 'labels')

ro0 = table[table.iloc[:,2]==0].iloc[:,1].min()
ro1 = table[table.iloc[:,2]==1].iloc[:,0].min()
print(ro0)
print(ro1)
#plt.hist(table[table.iloc[:,2]==1].iloc[:,1], bins = 5)