In [8]:
#Multi Layer Perceptron for Binary Classification
import tensorflow as tf
from matplotlib import pyplot
import numpy as np
import pandas as pd
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
import time
import os
import tempfile

In [9]:
#Load the Datasets
path = 'raw_mosaic_pixeldata_NN.csv'
df = read_csv(path, header=None)

feature_path = 'ETC_RFE_ANOVA.csv'
feature_df = read_csv(feature_path, header=None)
feature_array_triple = feature_df.to_numpy()
print(feature_array_triple)

feature_path = '12_features.csv'
feature_df = read_csv(feature_path, header=None)
feature_array_twelve = feature_df.to_numpy()
print(feature_array_twelve)

feature_path = 'LIT_Bands_Intersection_Feature_Selection.csv'
feature_df = read_csv(feature_path, header=None)
feature_array_LIT_intersect = feature_df.to_numpy()
print(feature_array_LIT_intersect)

feature_path = 'Literature_Bands_61.csv'
feature_df = read_csv(feature_path, header=None)
feature_array_LIT_61 = feature_df.to_numpy()
print(feature_array_LIT_61.size)

#Function used to return just the values in the list, can be used for 104 and 12 feature lists
def array_values(list):
    for i in list:
        return i

[[ 24  25  26  27  28  29  30  31  32  33  34  35  78  79  81  82  84  86
   87  88  89  91  92  93 155 156 157 158 159 160 161 162 163 164 165 166
  167 168 169 170 171 172 173 186 187 188 189 191 192 193 194 196 197 198
  199 200 201 203 204 205 206 207 208 210 281 283 284 290 292 293 306 309
  313 319 321 327 330 331 336 337 338 619 620 621 622 623 624 710 714 718
  719 721 727 732 735 738 743 744 747 749 751 758 759 766]]
[[164 165 166 189 191 192 193 196 199 200 203 204]]
[[ 79  82  87  88  89 167 168 169 170 171 196 197 620 622 624 727]]
61


In [10]:
#Split into input/output columns
#Encode Strings to integers
#Split into Training and Test Datasets
X, y = df.values[:, :-1], df.values[:, -1]
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=17)
n_features = X_train.shape[1]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(25247, 767) (10821, 767) (25247,) (10821,)


In [None]:
#Create the model first time

#Define Model
#767 Bands
N = 100
model_1 = Sequential()
model_1.add(Dense(N, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model_1.add(Dense(2*N, activation='relu', kernel_initializer='he_normal'))
model_1.add(Dense(3*N, activation='relu', kernel_initializer='he_normal'))
model_1.add(Dense(2*N, activation='relu', kernel_initializer='he_normal'))
model_1.add(Dense(1, activation='sigmoid'))

In [None]:
#Compile Model
model_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'Precision', 'Recall'])
model_1.summary()

In [None]:
#Save the initial weights
initial_weights = os.path.join(tempfile.mkdtemp(), 'initial_weights')
model_1.save_weights(initial_weights)

In [None]:
##DO ALL YOUR TRAINING USING MODEL_1###
start_time = time.time()
history_1 = model_1.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.3)
end_time = time.time()
training_time = end_time - start_time

In [None]:
#Save Model
model_1.save('767Bands_100200300_model.h5')

In [None]:
loss, accuracy, auc, precision, recall  = model_1.evaluate(X_test, y_test, verbose=0)
print('Test Loss: %.3f' % loss)
print('Test Accuracy: %.3f' % accuracy)
print('Test AUC: %.3f' % auc)
print('Test Precision: %.3f' % precision)
print('Test Recall: %.3f' % recall)
print("Training time:", training_time, "seconds")

y_pred = model_1.predict(X_test)
y_pred = np.round(y_pred).tolist()
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

hist_df = pd.DataFrame(history_1.history)
cm_df = pd.DataFrame(cm)

output_csv_file = '767Bands_100200300_Output.csv'
with open(output_csv_file, mode='w') as f:
    hist_df.to_csv(f)
    f.write('\n')
    cm_df.to_csv(f, index=False, header=False)

In [None]:
print(initial_weights)

In [None]:
##NOW ITS TIME TO RELOAD THE OLD WEIGHTS IN MODEL_1#

In [None]:
#This is where the old weights without training are being loaded
model_1.load_weights(initial_weights)

In [None]:
model_1.summary()
#The summary is same as model_1
#Our next goal is to modify the input shape. 
#For doing this we will 
#Then we will add a new layer which matching input dimensions that we need

In [None]:
#Model 2 -- 3 Feature Selection Algorithms
X, y = df.values[:, array_values(feature_array_triple)-1], df.values[:, -1]
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=17)
n_features = X_train.shape[1]

In [None]:
#model_1 has the loaded weights. 
#We are generating a new model_2 here.
#This is our final new model where we will copy the preliminary weights from model_1
model_2 = Sequential()
#create a new first layer with desired input_dimensions
model_2.add(Dense(N, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
#add layers with loaded weights sequentially 
# go through all layers but the first one
for layer in model_1.layers[1:]: 
    model_2.add(layer)

In [None]:
model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'Precision', 'Recall'])
model_2.summary()

In [None]:
start_time = time.time()
history_2 = model_2.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.3)
end_time = time.time()
training_time = end_time - start_time

In [None]:
model_2.save('3Feature_100200300_model.h5')

In [None]:
loss, accuracy, auc, precision, recall  = model_2.evaluate(X_test, y_test, verbose=0)
print('Test Loss: %.3f' % loss)
print('Test Accuracy: %.3f' % accuracy)
print('Test AUC: %.3f' % auc)
print('Test Precision: %.3f' % precision)
print('Test Recall: %.3f' % recall)
print("Training time:", training_time, "seconds")

y_pred = model_2.predict(X_test)
y_pred = np.round(y_pred).tolist()
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

hist_df = pd.DataFrame(history_2.history)
cm_df = pd.DataFrame(cm)

output_csv_file = '3Feature_100200300_Output.csv'
with open(output_csv_file, mode='w') as f:
    hist_df.to_csv(f)
    f.write('\n')
    cm_df.to_csv(f, index=False, header=False)

In [None]:
model_1.load_weights(initial_weights)
model_1.summary()

In [None]:
#Model 3 -- Literature Band Intersection
X, y = df.values[:, array_values(feature_array_LIT_intersect)-1], df.values[:, -1]
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=17)
n_features = X_train.shape[1]

In [None]:
#model_1 has the loaded weights. 
#We are generating a new model_3 here.
#This is our final new model where we will copy the preliminary weights from model_1
model_3 = Sequential()
#create a new first layer with desired input_dimensions
model_3.add(Dense(N, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
#add layers with loaded weights sequentially 
# go through all layers but the first one
for layer in model_1.layers[1:]: 
    model_3.add(layer)

In [None]:
model_3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'Precision', 'Recall'])
model_3.summary()

In [None]:
start_time = time.time()
history_3 = model_3.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.3)
end_time = time.time()
training_time = end_time - start_time

In [None]:
model_3.save('LitIntersection_100200300_model.h5')

In [None]:
loss, accuracy, auc, precision, recall  = model_3.evaluate(X_test, y_test, verbose=0)
print('Test Loss: %.3f' % loss)
print('Test Accuracy: %.3f' % accuracy)
print('Test AUC: %.3f' % auc)
print('Test Precision: %.3f' % precision)
print('Test Recall: %.3f' % recall)
print("Training time:", training_time, "seconds")

y_pred = model_3.predict(X_test)
y_pred = np.round(y_pred).tolist()
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

hist_df = pd.DataFrame(history_3.history)
cm_df = pd.DataFrame(cm)

output_csv_file = 'LitIntersection_100200300_Output.csv'
with open(output_csv_file, mode='w') as f:
    hist_df.to_csv(f)
    f.write('\n')
    cm_df.to_csv(f, index=False, header=False)

In [None]:
model_1.load_weights(initial_weights)
model_1.summary()

In [None]:
#Model 4 -- Literature Bands 61
X, y = df.values[:, array_values(feature_array_LIT_61)-1], df.values[:, -1]
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=17)
n_features = X_train.shape[1]

In [None]:
#model_1 has the loaded weights. 
#We are generating a new model_4 here.
#This is our final new model where we will copy the preliminary weights from model_1
model_4 = Sequential()
#create a new first layer with desired input_dimensions
model_4.add(Dense(N, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
#add layers with loaded weights sequentially 
# go through all layers but the first one
for layer in model_1.layers[1:]: 
    model_4.add(layer)

In [None]:
model_4.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', 'AUC', 'Precision', 'Recall'])
model_4.summary()

In [None]:
start_time = time.time()
history_4 = model_4.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.3)
end_time = time.time()
training_time = end_time - start_time

In [None]:
model_4.save('LitBands61_100200300_model.h5')

In [None]:
loss, accuracy, auc, precision, recall  = model_4.evaluate(X_test, y_test, verbose=0)
print('Test Loss: %.3f' % loss)
print('Test Accuracy: %.3f' % accuracy)
print('Test AUC: %.3f' % auc)
print('Test Precision: %.3f' % precision)
print('Test Recall: %.3f' % recall)
print("Training time:", training_time, "seconds")

y_pred = model_4.predict(X_test)
y_pred = np.round(y_pred).tolist()
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

hist_df = pd.DataFrame(history_4.history)
cm_df = pd.DataFrame(cm)

output_csv_file = 'LitBands61_100200300_Output.csv'
with open(output_csv_file, mode='w') as f:
    hist_df.to_csv(f)
    f.write('\n')
    cm_df.to_csv(f, index=False, header=False)