# Library

In [None]:
# Numpy, pandas, time
import numpy as np
import pandas as pd
import time

# Iterative Stratification untuk cross validation multilabel
from skmultilearn.model_selection import IterativeStratification

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

#Import Tensorflow dan extension
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.callbacks import  EarlyStopping
from tensorflow.keras import backend as K
from tensorflow.keras.backend import sigmoid
from tensorflow.keras.optimizers import Adam, Nadam, Adagrad, SGD, RMSprop, Adadelta

#Import keras tuner dan metrics untuk tuning parameter
import kerastuner as kt
from kerastuner.tuners import RandomSearch, BayesianOptimization, Sklearn
from sklearn import metrics
import tensorflow_addons as tfa

# Function

In [None]:
#Modifikasi IterativeStratification agar hasil random data tetap sama

def new_init(self, n_splits=3, order=1, sample_distribution_per_fold = None, random_state=None):

                  self.order = order
                  if random_state is not None:
                      do_shuffle = True
                  else:
                      do_shuffle = False
                  super(
                      IterativeStratification,
                      self).__init__(n_splits,
                                     shuffle=do_shuffle,
                                     random_state=random_state)
                  if sample_distribution_per_fold:
                      self.percentage_per_fold = sample_distribution_per_fold
                  else:
                      self.percentage_per_fold = [1 / float(self.n_splits) for _ in range(self.n_splits)]
    
IterativeStratification.__init__ = new_init
# cv = IterativeStratification(n_splits=5, random_state = 123)

In [None]:
#Fungsi model SAE
def sae_model(xt, xv= None, EPOCHS= 100,BATCH_SIZE= 32, opt= "adam",
              hl_node= 1024, lr= 0.01, af= "relu", num_layers= 3, do= 0.5, fr_node= 0.5,
              verbose = 0,return_fe = False):
  #Setting result placeholders
  xt_ae = [] ;xv_ae = [] ; w_ae = []
  #If validation set is not present, use train set as validation set
  if xv is None :
    xv = xt.copy()
  opt = tf.keras.optimizers.get(opt) #Set optimizer
  K.set_value(opt.learning_rate, lr) #Set learning rate

  #Stacked Autoencoder architecture
  for n_layers in range(num_layers):
    #Autoencoder
    inp = Input(shape=(xt.shape[1],))
    #Apply Dropout
    hidden_layer = Dropout(do)(inp)
    #Layer encoder (jumlah layer sesuai dengan n_layers)
    enc = Dense(int(hl_node*(fr_node**n_layers)), activation = af)(hidden_layer)  
    #Layer Decoder
    dec = Dense(xt.shape[1],activation="linear")(enc)
    ae = Model(inp, dec)
    #Compile model
    ae.compile(optimizer=opt, loss='mean_squared_error')
    #EarlyStop jika sudah konvergen 
    es = EarlyStopping(monitor='val_loss', patience=15, verbose=verbose)
    #Latih model
    ae.fit(xt, xt, 
           epochs=EPOCHS,batch_size=BATCH_SIZE, 
           shuffle=True, callbacks = [es] , verbose = verbose,
           validation_data = (xv,xv))
    #Ekstrak Feature extraction
    fe = Model(ae.input, enc)
    #Simpan data hasil latih
    xt = fe.predict(xt) ; xt_ae.append(xt)
    xv = fe.predict(xv) ; xv_ae.append(xv)
    #Simpan bobot hasil latih SAE
    w_ae.append([layer_name for layer_name in ae.layers if "dense" in layer_name.name][0].get_weights())
    if verbose:
      print("Layer {} trained".format(n_layers+1))

  return (w_ae,xv) if return_fe else w_ae

In [None]:
#Fungsi DNN

# kelas output 7
# n_outputs = 7

def dnn_model(xt, n_outputs= 7, sae_weights= None, EPOCHS= 100,BATCH_SIZE= 32, opt= "adam",
              hl_node= 1024, lr= 0.01, af= "relu", num_layers= 3, do= 0.5, fr_node= 0.5):
  opt = tf.keras.optimizers.get(opt) #Set optimizer
  K.set_value(opt.learning_rate, lr) #Set learning rate
  
  #Model architecture
  input_layer = Input(shape=(xt.shape[1],))
  hidden_layer = BatchNormalization()(input_layer)
  hidden_layer = Dropout(do)(hidden_layer)

#Set jumlah hidden layer
  for n_layers in range(num_layers):
    hidden_layer = Dense(int(hl_node*(fr_node**n_layers)), activation = af)(hidden_layer)
    hidden_layer = BatchNormalization()(hidden_layer)
    hidden_layer = Dropout(do)(hidden_layer)
  output_layer = Dense(n_outputs, activation = 'sigmoid')(hidden_layer)

#latih model
  dnn = Model(input_layer, output_layer)

  #Latih model DNN dengan bobot SAE (jika bobot ada)
  if sae_weights is not None:
    weights = sae_weights
    dnn_dense = [layer_name for layer_name in dnn.layers if "dense" in layer_name.name]
    for weight_from,weight_to in list(zip(weights,dnn_dense)):
      weight_to.set_weights(weight_from)

#Compile model
  dnn.compile(optimizer=opt, loss='binary_crossentropy', metrics = [tf.keras.metrics.BinaryAccuracy(),
               tf.keras.metrics.Precision(),
               tf.keras.metrics.Recall()],
               )
  return dnn

In [None]:
#Fungsi model untuk tuning
def build_model(hp):
    #Isi parameter yang akan dituning
    params = {
              'hl_node' : hp.Choice('units',values= para_hl_node),
              'af' : hp.Choice('activation',values= para_af),
              'lr' : hp.Choice('learning_rate',values= para_lr),
              'opt' : hp.Choice('optimizer',values= para_opt),
              'num_layers' : hp.Choice('num_layers',values= para_num_layers),
              'do' : hp.Choice('dropout_rate',values= para_do),
              'fr_node' : hp.Choice('fraction_node',values= para_fr_node)
              }
    #Latih model SAE
    sae_weights = sae_model(xt = X, xv = X_train, EPOCHS= 100,**params)
    #Latih model DNN dengan bobot SAE
    sae_dnn = dnn_model(X_train, sae_weights=sae_weights, EPOCHS= 100,**params)
    return sae_dnn

# SAE-DNN & DNN FUNCTION


In [None]:
def res_sae_dnn(X, hl_node, lr, opt, num_layers, do, fr_node):
  
  acc_results_tuned = list()
  f1_results_tuned = list()
  prec_results_tuned = list()
  rec_results_tuned = list()
  n_inputs, n_outputs = X.shape[1], 7
  
  print("finding sae weights....")
  ti0 = time.time()

  sae_weigths_tuned = sae_model(xt = X, hl_node = hl_node, af = "relu", lr = lr, opt= opt, num_layers = num_layers,
                                do = do, fr_node= fr_node)

  ti1 = time.time()
  print('done, processing time:', ti1-ti0)
  
  i=0
  t0 = time.time()
  # enumerate folds
  np.random.seed(123)

  for train_ix, test_ix in cv.split(X,Y):
    X_train, X_test = X.iloc[train_ix,:], X.iloc[test_ix,:]
    y_train, y_test = Y[train_ix], Y[test_ix]
    # define model
    model_tuned = dnn_model(xt = X_train, sae_weights = sae_weigths_tuned, hl_node = hl_node, af = "relu", lr = lr, opt= opt, num_layers = num_layers,
                              do = do, fr_node= fr_node)
    # fit model
    model_tuned.fit(X_train, y_train, verbose=False, epochs=100)
    # make a prediction on the test set
    yhat = model_tuned.predict(X_test)
    # round probabilities to class labels
    yhat = yhat.round()
    # calculate metrics
    acc = accuracy_score(y_test, yhat)
    f1 = f1_score(y_test, yhat, average='samples')
    prec = precision_score(y_test, yhat, average='samples')
    rec = recall_score(y_test, yhat, average='samples')
    
    # store result
    print("CV number: ", i)
    print('accuracy of :>%.3f' % acc)
    print('F1 of :>%.3f' % f1)
    print('Precision of :>%.3f' % prec)
    print('Recall of :>%.3f' % rec)
    acc_results_tuned.append(acc)
    f1_results_tuned.append(f1)
    prec_results_tuned.append(prec)
    rec_results_tuned.append(rec)
    i=i+1
  
  t1 = time.time()
  total_waktu = t1-t0

  print("waktu proses: ", total_waktu)
  print("Accuracy array:", acc_results_tuned)
  print("F1 array:", f1_results_tuned)
  print("Precision array:", prec_results_tuned)
  print("Recall array:", rec_results_tuned)

  return [acc_results_tuned, f1_results_tuned, prec_results_tuned, rec_results_tuned, model_tuned, total_waktu]

In [None]:
def dnn_saja(X, hl_node, lr, opt, num_layers, do, fr_node):
  acc_results2 = list()
  f1_results2 = list()
  prec_results2 = list()
  rec_results2 = list()
  n_inputs, n_outputs = X.shape[1], 7
  # define evaluation procedure
  # cv = IterativeStratification(n_splits=5, random_state = 123)
  i=0
  # enumerate folds
  t0 = time.time()

  for train_ix, test_ix in cv.split(X,Y):
    X_train, X_test = X.iloc[train_ix,:], X.iloc[test_ix,:]
    y_train, y_test = Y[train_ix], Y[test_ix]
    # define model tanpa bobot SAE
    model = dnn_model(xt= X_train, sae_weights= None, hl_node= hl_node, af= "relu", lr= lr, opt= opt, num_layers= num_layers,
                              do= do, fr_node= fr_node)
    # fit model
    model.fit(X_train, y_train, verbose=False, epochs=100)
    # make a prediction on the test set
    yhat = model.predict(X_test)
    # round probabilities to class labels
    yhat = yhat.round()
    # calculate metrics
    acc = accuracy_score(y_test, yhat)
    f1 = f1_score(y_test, yhat, average='samples')
    prec = precision_score(y_test, yhat, average='samples')
    rec = recall_score(y_test, yhat, average='samples')
    # store result
    print("CV number: ", i)
    print('accuracy of :>%.3f' % acc)
    print('F1 of :>%.3f' % f1)
    print('Precision of :>%.3f' % prec)
    print('Recall of :>%.3f' % rec)
    acc_results2.append(acc)
    f1_results2.append(f1)
    prec_results2.append(prec)
    rec_results2.append(rec)
    i=i+1
  
  t1 = time.time()
  total_waktu = t1-t0
  print("waktu proses", total_waktu)
  print("Accuracy array:", acc_results2)
  print("F1 array:", f1_results2)
  print("Precision array:", prec_results2)
  print("Recall array:", rec_results2)

  return [acc_results2, f1_results2, prec_results2, rec_results2, model, total_waktu]

        

# MAIN

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#load dataset
df_pubchem = pd.read_csv('/content/drive/MyDrive/skripsi/df_pubchem_rapi.csv')
df_maccs = pd.read_csv('/content/drive/MyDrive/skripsi/df_maccs_rapi.csv')
df_klekota = pd.read_csv('/content/drive/MyDrive/skripsi/df_klekota_rapi.csv')

#drop CID_senyawa
X_pubchem = df_pubchem.drop(['CID_senyawa'], axis=1)
X_maccs = df_maccs.drop(['CID_senyawa'], axis=1)
X_klekota = df_klekota.drop(['CID_senyawa'], axis=1)

Y = pd.read_csv('/content/drive/MyDrive/skripsi/kelas_data.csv')
Y = np.array(Y)

In [None]:
# define evaluation procedure
np.random.seed(123)
#Inisialisasi CV
cv = IterativeStratification(n_splits=5, random_state = 123)

In [None]:
X = X_maccs

In [None]:
para_hl_node = [320, 640, 1280, 1500, 1600, 1700, 1800]
para_af = ["relu"]
para_lr = [x for x in np.linspace(0.01,0.1)]
para_opt = ["adam", "adagrad"]
para_num_layers = [2,3,4,5]
para_do = [0.5,0.6,0.7,0.8]
para_fr_node = [0.5,0.66,0.75]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2)
#hypterparameter tuning
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_recall',patience = 25)
# Fungsi Bayesian Optimization di Keras Tuner.
tuner = BayesianOptimization(build_model,
    # Metrik yang dicari optimalnya
    objective= kt.Objective("val_recall", direction="max"), 
    # Jumlah percobaan
    max_trials=20,
    executions_per_trial=2,
    # Folder simpan hasil tuning
    directory='/home/fadilrisdian/skripsi-fadil',
    project_name='sae_dnn_tuning_bayesFiltered40k', overwrite = True)
#Jalankan Keras Tuner u
tuner.search(X_train, y_train, epochs=100, validation_data=(X_test, y_test),callbacks=[stop_early])
#Tampilkan hasil terbaik
tuner.results_summary()

Trial 20 Complete [00h 02m 56s]
val_recall: 0.8700819611549377

Best val_recall So Far: 0.9409835934638977
Total elapsed time: 01h 12m 43s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in /home/fadilrisdian/skripsi-fadil/sae_dnn_tuning_bayesFiltered40k
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7f5a00661610>
Trial summary
Hyperparameters:
units: 1280
activation: relu
learning_rate: 0.1
optimizer: adam
num_layers: 2
dropout_rate: 0.5
fraction_node: 0.66
Score: 0.9409835934638977
Trial summary
Hyperparameters:
units: 1280
activation: relu
learning_rate: 0.1
optimizer: adam
num_layers: 2
dropout_rate: 0.5
fraction_node: 0.5
Score: 0.9000000059604645
Trial summary
Hyperparameters:
units: 1280
activation: relu
learning_rate: 0.1
optimizer: adam
num_layers: 2
dropout_rate: 0.5
fraction_node: 0.66
Score: 0.8913934230804443
Trial summary
Hyperparameters:
units: 1280
activation: relu
learning_rate: 0.1
optimizer: adam
num_layers: 2
dropout_rate: 0

In [None]:
#Parameter
X = X_maccs
hl_node = 1800
fr_node = 0.66
num_layers = 2
opt = "adam"
lr = 0.1
do = 0.5


In [None]:
hasil_saednn = res_sae_dnn(X, hl_node, lr, opt, num_layers, do, fr_node)
dnn_aja = dnn_saja(X, hl_node, lr, opt, num_layers, do, fr_node)

finding sae weights....
done, processing time: 41.210047245025635


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.704
F1 of :>0.707
Precision of :>0.709
Recall of :>0.707


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.839
F1 of :>0.860
Precision of :>0.872
Recall of :>0.858


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.842
F1 of :>0.857
Precision of :>0.870
Recall of :>0.853
CV number:  3
accuracy of :>0.689
F1 of :>0.847
Precision of :>0.819
Recall of :>0.917
CV number:  4
accuracy of :>0.812
F1 of :>0.883
Precision of :>0.878
Recall of :>0.908
waktu proses:  371.9011871814728
Accuracy array: [0.7037037037037037, 0.8390501319261213, 0.8415929203539823, 0.6892857142857143, 0.812223206377325]
F1 array: [0.7071428571428572, 0.8597646270469489, 0.8571849978929624, 0.8465745464852606, 0.883447214138091]
Precision array: [0.708994708994709, 0.8720316622691293, 0.8699115044247787, 0.8186011904761904, 0.8782108060230293]
Recall array: [0.7070399764844209, 0.8577543242450894, 0.8526401179941003, 0.9173086734693876, 0.9076616474756422]
CV number:  0
accuracy of :>0.828
F1 of :>0.891
Precision of :>0.891
Recall of :>0.913
CV number:  1
accuracy of :>0.771
F1 of :>0.851
Precision of :>0.838
Recall of :>0.890
CV number:  2
accuracy of :>0.795
F1 of :>0.884
Precision of :>0.871
Recal

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:

print('===================================')
print('===================================')
print('SAE-DNN TUNED PERFORMANCE')
print('Accuracy    : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[0]), np.std(hasil_saednn[0])))
print('F1 Score    : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[1]), np.std(hasil_saednn[1])))
print('Precision   : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[2]), np.std(hasil_saednn[2])))
print('Recall      : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[3]), np.std(hasil_saednn[3])))
print('===================================')
print('===================================')
print('DNN ONLY PERFORMANCE')
print('Accuracy    : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[0]), np.std(dnn_aja[0])))
print('F1 Score    : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[1]), np.std(dnn_aja[1])))
print('Precision   : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[2]), np.std(dnn_aja[2])))
print('Recall       : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[3]), np.std(dnn_aja[3])))


SAE-DNN TUNED PERFORMANCE
Accuracy    : 0.77717±0.067
F1 Score    : 0.83082±0.063
Precision   : 0.82955±0.064
Recall      : 0.84848±0.075
DNN ONLY PERFORMANCE
Accuracy    : 0.80166±0.028
F1 Score    : 0.85476±0.037
Precision   : 0.85373±0.036
Recall       : 0.87507±0.049


In [None]:
sae_dnn_maccs_tuned = hasil_saednn[4]
dnn_maccs_tuned = dnn_aja[4]

sae_dnn_maccs_tuned.save("sae_dnn_maccs_tuned.h5")
dnn_maccs_tuned.save("dnn_maccs_tuned.h5")

In [None]:
SD_ACC = []
SD_F1 = []
SD_PRE = []
SD_RE = []

S_ACC = []
S_F1 = []
S_PRE = []
S_RE = []

In [None]:
t_awal = time.time()
for i in range(8):
  hasil_saednn = res_sae_dnn(X, hl_node, lr, opt, num_layers, do, fr_node)
  dnn_aja = dnn_saja(X, hl_node, lr, opt, num_layers, do, fr_node)
  
  SD_ACC.append(hasil_saednn[0])
  SD_F1.append(hasil_saednn[1])
  SD_PRE.append(hasil_saednn[2])
  SD_RE.append(hasil_saednn[3])

  S_ACC.append(dnn_aja[0])
  S_F1.append(dnn_aja[1])
  S_PRE.append(dnn_aja[2])
  S_RE.append(dnn_aja[3])
  
t_akhir = time.time()
total_waktu = t_akhir - t_awal
print("total waktu adalah" + str(total_waktu))

finding sae weights....
done, processing time: 88.39249634742737


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.434
F1 of :>0.751
Precision of :>0.680
Recall of :>0.901


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.674
F1 of :>0.809
Precision of :>0.776
Recall of :>0.910


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.846
F1 of :>0.876
Precision of :>0.885
Recall of :>0.882


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.741
F1 of :>0.752
Precision of :>0.756
Recall of :>0.755


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.815
F1 of :>0.854
Precision of :>0.856
Recall of :>0.868
waktu proses:  373.58583092689514
Accuracy array: [0.43386243386243384, 0.673702726473175, 0.8460176991150442, 0.7410714285714286, 0.8148804251550045]
F1 array: [0.7505416981607458, 0.8093011405676312, 0.8756300042140751, 0.7521230158730159, 0.8539077987262221]
Precision array: [0.6796296296296297, 0.775784227499267, 0.8847935103244837, 0.7563392857142858, 0.8558754059639799]
Recall array: [0.9013815402704293, 0.9096892406918792, 0.8820206489675517, 0.7554464285714286, 0.8683348095659876]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.857
F1 of :>0.880
Precision of :>0.891
Recall of :>0.882


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.811
F1 of :>0.826
Precision of :>0.828
Recall of :>0.836


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.821
F1 of :>0.861
Precision of :>0.862
Recall of :>0.881


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.829
F1 of :>0.854
Precision of :>0.858
Recall of :>0.863


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.772
F1 of :>0.791
Precision of :>0.791
Recall of :>0.803
waktu proses 356.7619755268097
Accuracy array: [0.8571428571428571, 0.8109318996415771, 0.8213660245183888, 0.8288770053475936, 0.7716535433070866]
F1 array: [0.8799721130688299, 0.826137139443591, 0.8610249353681929, 0.8543969102792632, 0.790851143607049]
Precision array: [0.8914374445430345, 0.82807859703021, 0.8622300058377117, 0.8579322638146166, 0.7911490230387868]
Recall array: [0.8824460218870157, 0.8359468339307049, 0.8810128429655575, 0.8626559714795009, 0.8031787693205016]
finding sae weights....
done, processing time: 55.448458194732666


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.828
F1 of :>0.882
Precision of :>0.880
Recall of :>0.905


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.776
F1 of :>0.818
Precision of :>0.814
Recall of :>0.837


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.757
F1 of :>0.776
Precision of :>0.778
Recall of :>0.784


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.787
F1 of :>0.802
Precision of :>0.808
Recall of :>0.806


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.705
F1 of :>0.829
Precision of :>0.808
Recall of :>0.883
waktu proses:  356.4778447151184
Accuracy array: [0.828042328042328, 0.7757255936675461, 0.7566371681415929, 0.7866071428571428, 0.7050487156775908]
F1 array: [0.8822373393801964, 0.8177451103572475, 0.7763492063492063, 0.8020195578231293, 0.829140832595217]
Precision array: [0.8795414462081128, 0.8143066549399002, 0.7782153392330383, 0.8079761904761905, 0.8077945084145262]
Recall array: [0.9048206937095825, 0.8374963353855175, 0.7844690265486726, 0.8060416666666667, 0.8831118984351933]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.846
F1 of :>0.870
Precision of :>0.881
Recall of :>0.877


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.744
F1 of :>0.792
Precision of :>0.789
Recall of :>0.817


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.675
F1 of :>0.816
Precision of :>0.786
Recall of :>0.885


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.848
F1 of :>0.887
Precision of :>0.895
Recall of :>0.895
CV number:  4
accuracy of :>0.831
F1 of :>0.862
Precision of :>0.869
Recall of :>0.873
waktu proses 337.66386461257935
Accuracy array: [0.8456078083407276, 0.7437275985663082, 0.6751313485113836, 0.8484848484848485, 0.8311461067366579]
F1 array: [0.8699729581273503, 0.7918757467144564, 0.8162663664414977, 0.8867901706137, 0.8624380285797608]
Precision array: [0.8807833692483205, 0.7887246117084827, 0.7864711033274956, 0.8952614379084968, 0.8690455359746698]
Recall array: [0.8771221532091097, 0.8172341696535245, 0.884748978400467, 0.8954481792717086, 0.8726013414989793]
finding sae weights....
done, processing time: 64.54549169540405


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.766
F1 of :>0.792
Precision of :>0.791
Recall of :>0.806


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.741
F1 of :>0.844
Precision of :>0.826
Recall of :>0.893


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.852
F1 of :>0.883
Precision of :>0.892
Recall of :>0.891


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.856
F1 of :>0.890
Precision of :>0.898
Recall of :>0.897


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.843
F1 of :>0.872
Precision of :>0.883
Recall of :>0.877
waktu proses:  340.03900480270386
Accuracy array: [0.7663139329805997, 0.7405452946350044, 0.852212389380531, 0.85625, 0.8432240921169176]
F1 array: [0.7918850256151844, 0.8440361016878167, 0.8829667088074168, 0.890376275510204, 0.8720612425661141]
Precision array: [0.7905496766607878, 0.8262470159567785, 0.8918289085545723, 0.8984970238095239, 0.8834071449660466]
Recall array: [0.8062316284538507, 0.8931398416886543, 0.8914011799410029, 0.8974638605442177, 0.8771183938588721]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.783
F1 of :>0.798
Precision of :>0.803
Recall of :>0.802


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.830
F1 of :>0.853
Precision of :>0.861
Recall of :>0.863


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.827
F1 of :>0.854
Precision of :>0.858
Recall of :>0.868


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.779
F1 of :>0.810
Precision of :>0.810
Recall of :>0.821


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.821
F1 of :>0.848
Precision of :>0.852
Recall of :>0.858
waktu proses 342.50080370903015
Accuracy array: [0.7834960070984915, 0.8297491039426523, 0.8266199649737302, 0.7789661319073083, 0.820647419072616]
F1 array: [0.7979732679821411, 0.8533851055356432, 0.8542281711283463, 0.809899414311179, 0.8478148564762739]
Precision array: [0.8032091097308488, 0.8614695340501792, 0.8578371278458843, 0.809863339275104, 0.8515456401283172]
Recall array: [0.802159124519373, 0.8631571087216249, 0.8681260945709282, 0.8213988625753331, 0.8579615048118985]
finding sae weights....
done, processing time: 67.71778225898743


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.767
F1 of :>0.856
Precision of :>0.843
Recall of :>0.894


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.840
F1 of :>0.866
Precision of :>0.873
Recall of :>0.872


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.842
F1 of :>0.866
Precision of :>0.878
Recall of :>0.870
CV number:  3
accuracy of :>0.838
F1 of :>0.878
Precision of :>0.884
Recall of :>0.891


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.846
F1 of :>0.870
Precision of :>0.882
Recall of :>0.875
waktu proses:  358.9281961917877
Accuracy array: [0.7671957671957672, 0.8399296394019349, 0.8424778761061947, 0.8383928571428572, 0.845881310894597]
F1 array: [0.8562778197698833, 0.8657955354525276, 0.8664201432785502, 0.8783985260770975, 0.8703635750137079]
Precision array: [0.8432539682539683, 0.8733216065669891, 0.8775811209439528, 0.8838988095238095, 0.881724239740183]
Recall array: [0.894429747207525, 0.8717824684843155, 0.8700442477876106, 0.8907461734693877, 0.8753469146737526]
CV number:  0
accuracy of :>0.834
F1 of :>0.876
Precision of :>0.883
Recall of :>0.891


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.863
F1 of :>0.885
Precision of :>0.894
Recall of :>0.891
CV number:  2
accuracy of :>0.820
F1 of :>0.865
Precision of :>0.869
Recall of :>0.888


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.814
F1 of :>0.827
Precision of :>0.831
Recall of :>0.832


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.795
F1 of :>0.829
Precision of :>0.831
Recall of :>0.845
waktu proses 374.1422555446625
Accuracy array: [0.834072759538598, 0.8629032258064516, 0.8204903677758318, 0.8137254901960784, 0.7952755905511811]
F1 array: [0.8762982211518148, 0.885347328895716, 0.8654428321240931, 0.8270944741532977, 0.8286339207599049]
Precision array: [0.8827713694173323, 0.8939217443249702, 0.8689954966224668, 0.8305407011289364, 0.8307669874599007]
Recall array: [0.8908015380065069, 0.8911589008363202, 0.8876970227670753, 0.8322786690433749, 0.8453047535724701]
finding sae weights....
done, processing time: 65.6452374458313


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.738
F1 of :>0.749
Precision of :>0.752
Recall of :>0.755


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.847
F1 of :>0.875
Precision of :>0.882
Recall of :>0.884


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.852
F1 of :>0.878
Precision of :>0.885
Recall of :>0.886


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.860
F1 of :>0.878
Precision of :>0.890
Recall of :>0.879


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.278
F1 of :>0.299
Precision of :>0.300
Recall of :>0.308
waktu proses:  343.64476919174194
Accuracy array: [0.7380952380952381, 0.8469656992084432, 0.852212389380531, 0.8598214285714286, 0.27812223206377323]
F1 array: [0.7493743176282859, 0.8752942161913138, 0.8780200870908835, 0.877999574829932, 0.29933569530558013]
Precision array: [0.75163139329806, 0.8822925828202873, 0.8852886641382216, 0.8899553571428571, 0.3001180986123413]
Recall array: [0.754673721340388, 0.8838463793608913, 0.8855752212389381, 0.8789370748299319, 0.30776498376144085]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.744
F1 of :>0.847
Precision of :>0.828
Recall of :>0.900


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.757
F1 of :>0.780
Precision of :>0.784
Recall of :>0.793


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.822
F1 of :>0.843
Precision of :>0.849
Recall of :>0.847


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.846
F1 of :>0.880
Precision of :>0.890
Recall of :>0.889


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.844
F1 of :>0.866
Precision of :>0.873
Recall of :>0.870
waktu proses 340.20745635032654
Accuracy array: [0.743566992014197, 0.757168458781362, 0.8222416812609457, 0.8458110516934046, 0.8442694663167104]
F1 array: [0.8471345474007408, 0.7802540251464983, 0.8426674172295889, 0.879860934272699, 0.8655334749822938]
Precision array: [0.8279207335107955, 0.783621351766513, 0.8492556917688266, 0.8903446226975639, 0.8730387868183143]
Recall array: [0.8997338065661047, 0.793384109916368, 0.8469497956800934, 0.8892687377981495, 0.8698600174978127]
finding sae weights....
done, processing time: 63.458539962768555


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.841
F1 of :>0.868
Precision of :>0.878
Recall of :>0.872
CV number:  1
accuracy of :>0.771
F1 of :>0.840
Precision of :>0.830
Recall of :>0.872


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.760
F1 of :>0.800
Precision of :>0.797
Recall of :>0.816
CV number:  3
accuracy of :>0.868
F1 of :>0.894
Precision of :>0.906
Recall of :>0.896


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.828
F1 of :>0.859
Precision of :>0.863
Recall of :>0.864
waktu proses:  342.18513798713684
Accuracy array: [0.8412698412698413, 0.7713280562884784, 0.7601769911504425, 0.8678571428571429, 0.8281665190434012]
F1 array: [0.867909632988998, 0.8399798969719814, 0.7996642786908273, 0.8935735544217687, 0.8588257623687207]
Precision array: [0.8777042915931805, 0.8299032541776604, 0.7969026548672566, 0.90625, 0.8634484794803661]
Recall array: [0.8715167548500882, 0.8723834652594546, 0.8162536873156342, 0.8956781462585034, 0.8642308827871272]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.851
F1 of :>0.871
Precision of :>0.883
Recall of :>0.872


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.777
F1 of :>0.796
Precision of :>0.801
Recall of :>0.802


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.795
F1 of :>0.839
Precision of :>0.837
Recall of :>0.854


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.832
F1 of :>0.862
Precision of :>0.868
Recall of :>0.871


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.774
F1 of :>0.844
Precision of :>0.835
Recall of :>0.875
waktu proses 340.19295358657837
Accuracy array: [0.8509316770186336, 0.7768817204301075, 0.7950963222416813, 0.8315508021390374, 0.7742782152230971]
F1 array: [0.8713482908691427, 0.7956007851169141, 0.8385288966725044, 0.861531279178338, 0.8440986543348749]
Precision array: [0.882860100561964, 0.8011947431302271, 0.8370548744892002, 0.8678698752228164, 0.8349810440361621]
Recall array: [0.8724637681159421, 0.8018219832735962, 0.8540280210157618, 0.8711824123588829, 0.8749781277340333]
finding sae weights....
done, processing time: 65.66404271125793


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.764
F1 of :>0.781
Precision of :>0.781
Recall of :>0.797


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.813
F1 of :>0.865
Precision of :>0.862
Recall of :>0.886


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.851
F1 of :>0.877
Precision of :>0.886
Recall of :>0.881


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.870
F1 of :>0.891
Precision of :>0.902
Recall of :>0.894


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.848
F1 of :>0.872
Precision of :>0.885
Recall of :>0.874
waktu proses:  356.5378873348236
Accuracy array: [0.763668430335097, 0.8126649076517151, 0.8513274336283185, 0.8696428571428572, 0.8476527900797166]
F1 array: [0.7812225301643291, 0.8645809775097374, 0.8771344289928361, 0.8911429988662132, 0.871941035049981]
Precision array: [0.780912488452171, 0.8621811785400175, 0.8864159292035397, 0.9019515306122449, 0.8850014762326543]
Recall array: [0.7973838918283362, 0.8862943418352389, 0.8813716814159293, 0.8940858843537415, 0.8741659285503396]


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.834
F1 of :>0.863
Precision of :>0.871
Recall of :>0.869


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.849
F1 of :>0.875
Precision of :>0.881
Recall of :>0.888


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.850
F1 of :>0.869
Precision of :>0.877
Recall of :>0.872


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.766
F1 of :>0.819
Precision of :>0.815
Recall of :>0.844


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.821
F1 of :>0.868
Precision of :>0.866
Recall of :>0.890
waktu proses 342.90056920051575
Accuracy array: [0.834072759538598, 0.8485663082437276, 0.850262697022767, 0.7655971479500892, 0.820647419072616]
F1 array: [0.8628892550809144, 0.8748783922171018, 0.869118505545826, 0.8190617661205896, 0.8680789901262342]
Precision array: [0.871413782904466, 0.881400409626216, 0.87689725627554, 0.8151069518716577, 0.8658292713410823]
Recall array: [0.8687666370896183, 0.8881720430107527, 0.872343841214244, 0.8440009337068161, 0.8902887139107611]
finding sae weights....
done, processing time: 59.11115860939026


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  0
accuracy of :>0.844
F1 of :>0.867
Precision of :>0.878
Recall of :>0.871


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.742
F1 of :>0.845
Precision of :>0.826
Recall of :>0.903


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.834
F1 of :>0.854
Precision of :>0.860
Recall of :>0.862


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.846
F1 of :>0.873
Precision of :>0.885
Recall of :>0.881


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  4
accuracy of :>0.778
F1 of :>0.802
Precision of :>0.809
Recall of :>0.809
waktu proses:  344.7183675765991
Accuracy array: [0.843915343915344, 0.7423043095866315, 0.8336283185840708, 0.8455357142857143, 0.7776793622674933]
F1 array: [0.8672629545645417, 0.8447397076684675, 0.8542941424357354, 0.8733949829931973, 0.801733518916867]
Precision array: [0.8779247501469724, 0.8259308120785692, 0.8595132743362832, 0.8852529761904762, 0.8091969294360791]
Recall array: [0.8706202233980012, 0.9026238639695103, 0.8615339233038348, 0.8807971938775511, 0.8085621493947446]
CV number:  0
accuracy of :>0.754
F1 of :>0.850
Precision of :>0.830
Recall of :>0.917


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  1
accuracy of :>0.805
F1 of :>0.853
Precision of :>0.855
Recall of :>0.870


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  2
accuracy of :>0.751
F1 of :>0.766
Precision of :>0.767
Recall of :>0.776


  _warn_prf(average, modifier, msg_start, len(result))


CV number:  3
accuracy of :>0.775
F1 of :>0.791
Precision of :>0.792
Recall of :>0.804
CV number:  4
accuracy of :>0.777
F1 of :>0.791
Precision of :>0.795
Recall of :>0.796
waktu proses 346.57118797302246
Accuracy array: [0.7542147293700089, 0.8046594982078853, 0.7513134851138353, 0.7745098039215687, 0.7769028871391076]
F1 array: [0.8501499978873538, 0.8531874039938556, 0.7656054540905679, 0.7906692980222392, 0.7914489855434738]
Precision array: [0.8298728186926945, 0.8545101553166068, 0.7667688266199649, 0.7922650878533232, 0.7954359871682706]
Recall array: [0.9168884945282461, 0.8698775388291518, 0.7758464681844717, 0.8039894745777099, 0.7961213181685622]
total waktu adalah6127.071844339371


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:

print('===================================')
print('===================================')
print('SAE-DNN TUNED PERFORMANCE')
print('Accuracy    : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[0]), np.std(hasil_saednn[0])))
print('F1 Score    : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[1]), np.std(hasil_saednn[1])))
print('Precision   : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[2]), np.std(hasil_saednn[2])))
print('Recall      : {0:.5f}±{1:.3f}'.format(np.mean(hasil_saednn[3]), np.std(hasil_saednn[3])))
print('===================================')
print('===================================')
print('DNN ONLY PERFORMANCE')
print('Accuracy    : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[0]), np.std(dnn_aja[0])))
print('F1 Score    : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[1]), np.std(dnn_aja[1])))
print('Precision   : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[2]), np.std(dnn_aja[2])))
print('Recall       : {0:.5f}±{1:.3f}'.format(np.mean(dnn_aja[3]), np.std(dnn_aja[3])))


SAE-DNN TUNED PERFORMANCE
Accuracy    : 0.83898±0.019
F1 Score    : 0.87019±0.016
Precision   : 0.87679±0.018
Recall      : 0.87866±0.015
DNN ONLY PERFORMANCE
Accuracy    : 0.81722±0.029
F1 Score    : 0.84732±0.033
Precision   : 0.85317±0.036
Recall       : 0.85633±0.033


In [None]:
SD_ACC = []
SD_F1 = []
SD_PRE = []
SD_RE = []

S_ACC = []
S_F1 = []
S_PRE = []
S_RE = []

In [None]:
import csv

In [None]:
fields = ['Name', 'Branch', 'Year', 'CGPA'] 

In [None]:
for i in range(8):
  print(i)

0
1
2
3
4
5
6
7


In [None]:
#percobaan 10 kali
akurasi = []
f1_score = []
precision = []
recall = []

akurasi_std = []
f1_score_std = []
precision_std = []
recall_std = []

akur_std = []
for i in range(10):
  akurasi.append(np.mean(S_ACC[i]))
  f1_score.append(np.mean(S_F1[i]))
  precision.append(np.mean(S_PRE[i]))
  recall.append(np.mean(S_RE[i]))
  
  akurasi_std.append(np.std(S_ACC[i]))
  f1_score_std.append(np.std(S_F1[i]))
  precision_std.append(np.std(S_PRE[i]))
  recall_std.append(np.std(S_RE[i]))

akhir_saednn = [akurasi, f1_score, precision, recall, akurasi_std, f1_score_std, precision_std, recall_std]

In [None]:
import pandas as pd


In [None]:
df = pd.DataFrame(akurasi, columns=['Accuracy'])

In [None]:
df['f1_score'] = f1_score
df['precision'] = precision
df['recall'] = recall
df['akurasi_std'] = akurasi_std
df['f1_score_std'] = f1_score_std
df['precision_std'] = precision_std
df['recall_std'] = recall_std

df

In [None]:
df.to_csv('Pubchem_1-7_DNN.csv', index=False)

In [None]:
model_tuned.save("sae_dnn_maccs_final.h5")
model.save("dnn_maccs_final.h5")