In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import random
import math
import itertools
import pprint

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin

drive_path = '/content/drive/MyDrive/Kuliah/Tugas Akhir/Final Project Shared Folder'
data_path = "Dataset/Data Versioning/"
model_path = "Model/ML Model/"
hyperparameter_log_path = "Notebook/Arif's Workspace/Hyperparameter Tuning/Log/Merged"
data_version = "Trained_V2-2.csv"
hyperparameter_log = "V2.csv"
base_url = "https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?CycleBeginYear=2017"
dataset_names = ['Demographics', 'Dietary', 'Examination', 'Laboratory', 'Questionnaire']

# Model Builder

In [2]:
class CNNModel(BaseEstimator, ClassifierMixin):
  def __init__(
      self,
      input_shape,
      output_shape,
      layer_metadata,
      convolution_part=['conv1d'],
      intermediate_part=['flatten'],
      dense_part=['dense'],
      convolution_hyperparameter=[{'filters':16, 'kernel_size':3, 'activation':'relu'}],
      intermediate_hyperparameter=[{}],
      dense_hyperparameter=[{'units':32, 'activation':'relu'}],
      optimizer='adam',
      loss='binary_crossentropy',
      metrics=['accuracy']
  ):
    self.input_shape = input_shape
    self.layer_metadata = layer_metadata
    self.convolution_part = convolution_part
    self.intermediate_part = intermediate_part
    self.dense_part = dense_part
    self.convolution_hyperparameter = convolution_hyperparameter
    self.intermediate_hyperparameter = intermediate_hyperparameter
    self.dense_hyperparameter = dense_hyperparameter
    self.optimizer = optimizer
    self.model = self.build_model(input_shape, output_shape, convolution_part, intermediate_part, dense_part, convolution_hyperparameter, intermediate_hyperparameter, dense_hyperparameter, optimizer, loss, metrics)



  def build_model(
      self,
      input_shape,
      output_shape,
      convolution_part,
      intermediate_part,
      dense_part,
      convolution_hyperparameter,
      intermediate_hyperparameter,
      dense_hyperparameter,
      optimizer,
      loss,
      metrics
  ):

    def fix_hyperparameter_type(layer, hyperparameter, layer_metadata, warnings=True):
      for layer_info in layer_metadata:
        if(layer == layer_info['layer']):
          for params in layer_info['hyperparameter']:
            val = hyperparameter.get(params['param'], params['default'])
            if(type(val) != params['type']):
              if(warnings): print("Hyperparameter ", params['param'], " Have Invalid Data Type! Using Default..")
              hyperparameter[params['param']] = params['default']
            else:
              hyperparameter[params['param']] = val
        else:
          pass
      return hyperparameter

    model = keras.models.Sequential()
    model.add(keras.layers.Input(input_shape))

    # Convolution Part
    for layer, hyperparameter in zip(convolution_part, convolution_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'conv1d'):
        model.add(keras.layers.Conv1D(filters=hyperparameter['filters'], kernel_size=hyperparameter['kernel_size'], activation=hyperparameter['activation']))
      elif(layer == 'maxpooling1d'):
        model.add(keras.layers.MaxPooling1D(pool_size=hyperparameter['pool_size']))
      elif(layer == 'conv2d'):
        model.add(keras.layers.Conv2D(filters=hyperparameter['filters'], kernel_size=hyperparameter['kernel_size'], activation=hyperparameter['activation']))
      elif(layer == 'maxpooling2d'):
        model.add(keras.layers.MaxPooling2D(pool_size=hyperparameter['pool_size']))
      else:
        print("'Convolution Part' Layer Invalid!")
        return None

    # Intermediate Part
    for layer, hyperparameter in zip(intermediate_part, intermediate_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'flatten'):
        model.add(keras.layers.Flatten())
      else:
        print("'Intermediate Part' Layer Invalid!")
        return None

    # Dense Part
    for layer, hyperparameter in zip(dense_part, dense_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'dense'):
        model.add(keras.layers.Dense(units=hyperparameter['units'], activation=hyperparameter['activation']))
      else:
        print("'Dense Part' Layer Invalid!")
        return None

    model.add(keras.layers.Dense(output_shape, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=optimizer,
                loss=loss,
                metrics=metrics)

    return model

  def fit(self, X_train, y_train, X_test, y_test, epochs, verbose=1):
    self.model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test), verbose=verbose)

  def score(self, X, y):
    loss, accuracy = self.model.evaluate(X, y, verbose=0)
    loss_inverse = 1/(loss+1e-20)

    return loss_inverse

  def evaluate(self, X, y):
    # Evaluate the model on the test set
    loss, accuracy = self.model.evaluate(X, y, verbose=0)

    print(f"Loss: %.3f%%" % (loss*100) )
    print(f"Accuracy: %.3f%%" % (accuracy*100) )

    return loss, accuracy

  def summary(self):
    self.model.summary()

  def predict(self, X):
    return self.model.predict(X, verbose=0)

In [3]:
layer_metadata = [
  {'layer': 'conv1d', 'hyperparameter': [
      {'param':'filters', 'type':int, 'default':16},
      {'param':'kernel_size', 'type':int, 'default':3},
      {'param':'activation', 'type':str, 'default':'linear'}
    ]
  },
  {'layer': 'maxpooling1d', 'hyperparameter': [
      {'param':'pool_size', 'type':int, 'default':2}
    ]
  },
  {'layer': 'flatten', 'hyperparameter': [

    ]
  },
  {'layer': 'dense', 'hyperparameter': [
      {'param':'units', 'type':int, 'default':8},
      {'param':'activation', 'type':str, 'default':'linear'}
    ]
  }
]

# Check Hyperparameter Log

In [4]:
hp_log = pd.read_csv(os.path.join(drive_path, hyperparameter_log_path+"/"+hyperparameter_log))

hp_log['convolution_hyperparameter'].replace(r"<class '(.*?)'>", r"\1", regex=True)
hp_log['dense_hyperparameter'].replace(r"<class '(.*?)'>", r"\1", regex=True)

hp_log

Unnamed: 0,convolution_part,convolution_hyperparameter,dense_part,dense_hyperparameter,input_shape,layer_metadata,loss,accuracy,train_loss,train_accuracy
0,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",[],[],"(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075551,0.976226,0.068099,0.978856
1,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 2, 'activation': 'linear'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075768,0.976767,0.067970,0.978640
2,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 2, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.079794,0.977415,0.075291,0.978784
3,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 16, 'activation': 'linear'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.078772,0.976335,0.068539,0.978496
4,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 16, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.079255,0.976983,0.054908,0.981233
...,...,...,...,...,...,...,...,...,...,...
1788,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 2, 'activation'...","['dense', 'dense']","[{'units': 4, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.089139,0.977631,0.077528,0.979144
1789,"['conv1d', 'conv1d', 'maxpooling1d']","[{'filters': 2, 'kernel_size': 3, 'activation'...","['dense', 'dense']","[{'units': 16, 'activation': 'relu'}, {'units'...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.079429,0.976983,0.058103,0.980153
1790,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 3, 'activation'...","['dense', 'dense']","[{'units': 8, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.076449,0.976119,0.060578,0.979756
1791,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 8, 'kernel_size': 3, 'activation'...","['dense', 'dense']","[{'units': 16, 'activation': 'relu'}, {'units'...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.102054,0.977739,0.098972,0.978244


In [5]:
np.sum(hp_log['convolution_part'] == "['conv1d', 'maxpooling1d']" )

21

In [6]:
hp_log_sorted = hp_log.copy()
hp_log_sorted = hp_log_sorted.sort_values(by=['accuracy', 'loss', 'train_accuracy', 'train_loss'], ascending=[True, False, True, False])

hp_log_sorted

Unnamed: 0,convolution_part,convolution_hyperparameter,dense_part,dense_hyperparameter,input_shape,layer_metadata,loss,accuracy,train_loss,train_accuracy
1288,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 16, 'kernel_size': 2, 'activation...","['dense', 'dense']","[{'units': 8, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.395260,0.777112,0.386128,0.780297
1270,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 8, 'kernel_size': 2, 'activation'...","['dense', 'dense']","[{'units': 4, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.394043,0.777112,0.390169,0.779973
1389,"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.387605,0.777934,0.383692,0.779771
1464,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 12, 'kernel_size': 3, 'activation...","['dense', 'dense']","[{'units': 8, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.385361,0.778258,0.380086,0.779987
1108,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 16, 'kernel_size': 2, 'activation...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.390873,0.778906,0.389107,0.778942
...,...,...,...,...,...,...,...,...,...,...
1664,"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.095816,0.978820,0.089106,0.978676
1112,"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.095787,0.978820,0.089088,0.978712
1689,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 2, 'activation'...",['dense'],"[{'units': 8, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075142,0.978928,0.063804,0.979396
1137,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 2, 'activation'...",['dense'],"[{'units': 8, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075541,0.978928,0.063969,0.979360


In [7]:
# Top 5 Model

for i in range(5):
  print("Best Model #", i+1)
  print("-"*10)
  pprint.pprint(hp_log_sorted.iloc[-i-1].to_dict())
  print("-"*10)

Best Model # 1
----------
{'accuracy': 0.9790357649326324,
 'convolution_hyperparameter': "[{'pool_size': 2}, {'filters': 4, "
                               "'kernel_size': 2, 'activation': 'relu'}, "
                               "{'filters': 2, 'kernel_size': 3, 'activation': "
                               "'relu'}]",
 'convolution_part': "['maxpooling1d', 'conv1d', 'conv1d']",
 'dense_hyperparameter': "[{'units': 16, 'activation': 'relu'}]",
 'dense_part': "['dense']",
 'input_shape': '(85, 1)',
 'layer_metadata': "[{'layer': 'conv1d', 'hyperparameter': [{'param': "
                   "'filters', 'type': <class 'int'>, 'default': 16}, "
                   "{'param': 'kernel_size', 'type': <class 'int'>, 'default': "
                   "3}, {'param': 'activation', 'type': <class 'str'>, "
                   "'default': 'linear'}]}, {'layer': 'maxpooling1d', "
                   "'hyperparameter': [{'param': 'pool_size', 'type': <class "
                   "'int'>, 'default': 2}]}

In [8]:
# Bottom 5 Model

for i in range(5):
  print("Worst Model #", i+1)
  print("-"*10)
  pprint.pprint(hp_log_sorted.iloc[i].to_dict())
  print("-"*10)

Worst Model # 1
----------
{'accuracy': 0.7771123945713043,
 'convolution_hyperparameter': "[{'filters': 16, 'kernel_size': 2, "
                               "'activation': 'relu'}, {'filters': 4, "
                               "'kernel_size': 2, 'activation': 'relu'}, "
                               "{'filters': 16, 'kernel_size': 2, "
                               "'activation': 'relu'}]",
 'convolution_part': "['conv1d', 'conv1d', 'conv1d']",
 'dense_hyperparameter': "[{'units': 8, 'activation': 'relu'}, {'units': 2, "
                         "'activation': 'relu'}]",
 'dense_part': "['dense', 'dense']",
 'input_shape': '(85, 1)',
 'layer_metadata': "[{'layer': 'conv1d', 'hyperparameter': [{'param': "
                   "'filters', 'type': <class 'int'>, 'default': 16}, "
                   "{'param': 'kernel_size', 'type': <class 'int'>, 'default': "
                   "3}, {'param': 'activation', 'type': <class 'str'>, "
                   "'default': 'linear'}]}, {'layer':

In [9]:
hp_log_sorted.set_index('convolution_part')

Unnamed: 0_level_0,convolution_hyperparameter,dense_part,dense_hyperparameter,input_shape,layer_metadata,loss,accuracy,train_loss,train_accuracy
convolution_part,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 16, 'kernel_size': 2, 'activation...","['dense', 'dense']","[{'units': 8, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.395260,0.777112,0.386128,0.780297
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 8, 'kernel_size': 2, 'activation'...","['dense', 'dense']","[{'units': 4, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.394043,0.777112,0.390169,0.779973
"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.387605,0.777934,0.383692,0.779771
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 12, 'kernel_size': 3, 'activation...","['dense', 'dense']","[{'units': 8, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.385361,0.778258,0.380086,0.779987
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 16, 'kernel_size': 2, 'activation...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.390873,0.778906,0.389107,0.778942
...,...,...,...,...,...,...,...,...,...
"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.095816,0.978820,0.089106,0.978676
"['maxpooling1d', 'conv1d', 'conv1d']","[{'pool_size': 2}, {'filters': 4, 'kernel_size...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.095787,0.978820,0.089088,0.978712
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 2, 'activation'...",['dense'],"[{'units': 8, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075142,0.978928,0.063804,0.979396
"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 2, 'activation'...",['dense'],"[{'units': 8, 'activation': 'relu'}]","(85, 1)","[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.075541,0.978928,0.063969,0.979360
