In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import random
import math
import itertools
import pprint

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin

drive_path = '/content/drive/MyDrive/Kuliah/Tugas Akhir/Final Project Shared Folder'
data_path = "Dataset/Data Versioning/"
model_path = "Model/ML Model/"
hyperparameter_log_path = "Notebook/Arif's Workspace/Hyperparameter Tuning/Log"
data_version = "Trained_V2-2.csv"
hyperparameter_log = "V2.csv"
base_url = "https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?CycleBeginYear=2017"
dataset_names = ['Demographics', 'Dietary', 'Examination', 'Laboratory', 'Questionnaire']

# Model Builder

In [None]:
class CNNModel(BaseEstimator, ClassifierMixin):
  def __init__(
      self,
      input_shape,
      output_shape,
      layer_metadata,
      convolution_part=['conv1d'],
      intermediate_part=['flatten'],
      dense_part=['dense'],
      convolution_hyperparameter=[{'filters':16, 'kernel_size':3, 'activation':'relu'}],
      intermediate_hyperparameter=[{}],
      dense_hyperparameter=[{'units':32, 'activation':'relu'}],
      optimizer='adam',
      loss='binary_crossentropy',
      metrics=['accuracy']
  ):
    self.input_shape = input_shape
    self.layer_metadata = layer_metadata
    self.convolution_part = convolution_part
    self.intermediate_part = intermediate_part
    self.dense_part = dense_part
    self.convolution_hyperparameter = convolution_hyperparameter
    self.intermediate_hyperparameter = intermediate_hyperparameter
    self.dense_hyperparameter = dense_hyperparameter
    self.optimizer = optimizer
    self.model = self.build_model(input_shape, output_shape, convolution_part, intermediate_part, dense_part, convolution_hyperparameter, intermediate_hyperparameter, dense_hyperparameter, optimizer, loss, metrics)



  def build_model(
      self,
      input_shape,
      output_shape,
      convolution_part,
      intermediate_part,
      dense_part,
      convolution_hyperparameter,
      intermediate_hyperparameter,
      dense_hyperparameter,
      optimizer,
      loss,
      metrics
  ):

    def fix_hyperparameter_type(layer, hyperparameter, layer_metadata, warnings=True):
      for layer_info in layer_metadata:
        if(layer == layer_info['layer']):
          for params in layer_info['hyperparameter']:
            val = hyperparameter.get(params['param'], params['default'])
            if(type(val) != params['type']):
              if(warnings): print("Hyperparameter ", params['param'], " Have Invalid Data Type! Using Default..")
              hyperparameter[params['param']] = params['default']
            else:
              hyperparameter[params['param']] = val
        else:
          pass
      return hyperparameter

    model = keras.models.Sequential()
    model.add(keras.layers.Input(input_shape))

    # Convolution Part
    for layer, hyperparameter in zip(convolution_part, convolution_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'conv1d'):
        model.add(keras.layers.Conv1D(filters=hyperparameter['filters'], kernel_size=hyperparameter['kernel_size'], activation=hyperparameter['activation']))
      elif(layer == 'maxpooling1d'):
        model.add(keras.layers.MaxPooling1D(pool_size=hyperparameter['pool_size']))
      elif(layer == 'conv2d'):
        model.add(keras.layers.Conv2D(filters=hyperparameter['filters'], kernel_size=hyperparameter['kernel_size'], activation=hyperparameter['activation']))
      elif(layer == 'maxpooling2d'):
        model.add(keras.layers.MaxPooling2D(pool_size=hyperparameter['pool_size']))
      else:
        print("'Convolution Part' Layer Invalid!")
        return None

    # Intermediate Part
    for layer, hyperparameter in zip(intermediate_part, intermediate_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'flatten'):
        model.add(keras.layers.Flatten())
      else:
        print("'Intermediate Part' Layer Invalid!")
        return None

    # Dense Part
    for layer, hyperparameter in zip(dense_part, dense_hyperparameter):
      hyperparameter = fix_hyperparameter_type(layer, hyperparameter, self.layer_metadata)

      if(layer == 'dense'):
        model.add(keras.layers.Dense(units=hyperparameter['units'], activation=hyperparameter['activation']))
      else:
        print("'Dense Part' Layer Invalid!")
        return None

    model.add(keras.layers.Dense(output_shape, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=optimizer,
                loss=loss,
                metrics=metrics)

    return model

  def fit(self, X_train, y_train, X_test, y_test, epochs, verbose=1):
    self.model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test), verbose=verbose)

  def score(self, X, y):
    loss, accuracy = self.model.evaluate(X, y, verbose=0)
    loss_inverse = 1/(loss+1e-20)

    return loss_inverse

  def evaluate(self, X, y):
    # Evaluate the model on the test set
    loss, accuracy = self.model.evaluate(X, y, verbose=0)

    print(f"Loss: %.3f%%" % (loss*100) )
    print(f"Accuracy: %.3f%%" % (accuracy*100) )

    return loss, accuracy

  def summary(self):
    self.model.summary()

  def predict(self, X):
    return self.model.predict(X, verbose=0)

In [None]:
layer_metadata = [
  {'layer': 'conv1d', 'hyperparameter': [
      {'param':'filters', 'type':int, 'default':16},
      {'param':'kernel_size', 'type':int, 'default':3},
      {'param':'activation', 'type':str, 'default':'linear'}
    ]
  },
  {'layer': 'maxpooling1d', 'hyperparameter': [
      {'param':'pool_size', 'type':int, 'default':2}
    ]
  },
  {'layer': 'flatten', 'hyperparameter': [

    ]
  },
  {'layer': 'dense', 'hyperparameter': [
      {'param':'units', 'type':int, 'default':8},
      {'param':'activation', 'type':str, 'default':'linear'}
    ]
  }
]

# Check Hyperparameter Log

In [None]:
hp_log = pd.read_csv(os.path.join(drive_path, hyperparameter_log_path+"/"+hyperparameter_log))

hp_log['convolution_hyperparameter'].replace(r"<class '(.*?)'>", r"\1", regex=True)
hp_log['dense_hyperparameter'].replace(r"<class '(.*?)'>", r"\1", regex=True)

hp_log

Unnamed: 0,convolution_part,convolution_hyperparameter,dense_part,dense_hyperparameter,input_shape,output_shape,layer_metadata,loss,metrics,accuracy,train_loss,train_accuracy
0,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",[],[],"(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.066495,['accuracy'],0.979834,0.079093,0.975297
1,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 2, 'activation': 'linear'}]","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.065173,['accuracy'],0.979114,0.075125,0.975760
2,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 2, 'activation': 'relu'}]","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.064310,['accuracy'],0.980194,0.072760,0.976996
3,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 16, 'activation': 'linear'}]","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.065463,['accuracy'],0.978394,0.074957,0.975297
4,['conv1d'],"[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 16, 'activation': 'relu'}]","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.065772,['accuracy'],0.981275,0.060922,0.978539
...,...,...,...,...,...,...,...,...,...,...,...,...
698,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'linear'}, {'units...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.061000,['accuracy'],0.981995,0.072596,0.977150
699,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'linear'}, {'units...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.062791,['accuracy'],0.979834,0.070558,0.975915
700,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'linear'}, {'units...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.061729,['accuracy'],0.980555,0.069003,0.977459
701,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.064235,['accuracy'],0.981635,0.071765,0.977459


In [None]:
hp_log_sorted = hp_log.copy()
hp_log_sorted = hp_log_sorted.sort_values(by=['accuracy', 'loss', 'train_accuracy', 'train_loss'], ascending=[True, False, True, False])

hp_log_sorted

Unnamed: 0,convolution_part,convolution_hyperparameter,dense_part,dense_hyperparameter,input_shape,output_shape,layer_metadata,loss,metrics,accuracy,train_loss,train_accuracy
490,['conv1d'],"[{'filters': 64, 'kernel_size': 3, 'activation...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.680632,['accuracy'],0.579402,0.680358,0.580361
100,['conv1d'],"[{'filters': 2, 'kernel_size': 3, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.680484,['accuracy'],0.579402,0.680176,0.580361
419,['conv1d'],"[{'filters': 64, 'kernel_size': 1, 'activation...","['dense', 'dense']","[{'units': 64, 'activation': 'linear'}, {'unit...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.680484,['accuracy'],0.579402,0.680175,0.580361
702,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'relu'}, {'units':...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.680466,['accuracy'],0.579402,0.680176,0.580361
639,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 64, 'activation': 'relu'}, {'units'...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.092228,['accuracy'],0.963990,0.076870,0.968658
...,...,...,...,...,...,...,...,...,...,...,...,...
268,['conv1d'],"[{'filters': 16, 'kernel_size': 3, 'activation...","['dense', 'dense']","[{'units': 2, 'activation': 'linear'}, {'units...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.063788,['accuracy'],0.982355,0.066408,0.978848
593,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...","['dense', 'dense']","[{'units': 64, 'activation': 'linear'}, {'unit...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.063735,['accuracy'],0.982355,0.062456,0.977304
691,"['conv1d', 'conv1d', 'conv1d']","[{'filters': 2, 'kernel_size': 1, 'activation'...",['dense'],"[{'units': 16, 'activation': 'linear'}]","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.061289,['accuracy'],0.982355,0.068228,0.977150
140,['conv1d'],"[{'filters': 2, 'kernel_size': 3, 'activation'...","['dense', 'dense']","[{'units': 2, 'activation': 'linear'}, {'units...","(79, 1)",1,"[{'layer': 'conv1d', 'hyperparameter': [{'para...",0.061549,['accuracy'],0.983075,0.070197,0.977922


In [None]:
# Top 5 Model

for i in range(5):
  print("Best Model #", i+1)
  print("-"*10)
  pprint.pprint(hp_log_sorted.iloc[-i-1].to_dict())
  print("-"*10)

Best Model # 1
----------
{'accuracy': 0.9830752611160278,
 'convolution_hyperparameter': "[{'filters': 16, 'kernel_size': 3, "
                               "'activation': 'relu'}]",
 'convolution_part': "['conv1d']",
 'dense_hyperparameter': "[{'units': 2, 'activation': 'relu'}, {'units': 64, "
                         "'activation': 'relu'}]",
 'dense_part': "['dense', 'dense']",
 'input_shape': '(79, 1)',
 'layer_metadata': "[{'layer': 'conv1d', 'hyperparameter': [{'param': "
                   "'filters', 'type': <class 'int'>, 'default': 16}, "
                   "{'param': 'kernel_size', 'type': <class 'int'>, 'default': "
                   "3}, {'param': 'activation', 'type': <class 'str'>, "
                   "'default': 'linear'}]}, {'layer': 'maxpooling1d', "
                   "'hyperparameter': [{'param': 'pool_size', 'type': <class "
                   "'int'>, 'default': 2}]}, {'layer': 'flatten', "
                   "'hyperparameter': []}, {'layer': 'dense', "
     

In [None]:
# Bottom 5 Model

for i in range(5):
  print("Worst Model #", i+1)
  print("-"*10)
  pprint.pprint(hp_log_sorted.iloc[i].to_dict())
  print("-"*10)

Worst Model # 1
----------
{'accuracy': 0.5794022083282471,
 'convolution_hyperparameter': "[{'filters': 64, 'kernel_size': 3, "
                               "'activation': 'relu'}]",
 'convolution_part': "['conv1d']",
 'dense_hyperparameter': "[{'units': 2, 'activation': 'relu'}, {'units': 64, "
                         "'activation': 'linear'}]",
 'dense_part': "['dense', 'dense']",
 'input_shape': '(79, 1)',
 'layer_metadata': "[{'layer': 'conv1d', 'hyperparameter': [{'param': "
                   "'filters', 'type': <class 'int'>, 'default': 16}, "
                   "{'param': 'kernel_size', 'type': <class 'int'>, 'default': "
                   "3}, {'param': 'activation', 'type': <class 'str'>, "
                   "'default': 'linear'}]}, {'layer': 'maxpooling1d', "
                   "'hyperparameter': [{'param': 'pool_size', 'type': <class "
                   "'int'>, 'default': 2}]}, {'layer': 'flatten', "
                   "'hyperparameter': []}, {'layer': 'dense', "
  