# Config

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import random
import math
import itertools
import pprint

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin

from google.colab import drive
drive.mount('/content/drive')

drive_path = '/content/drive/MyDrive/Kuliah/Tugas Akhir/Final Project Shared Folder'
data_path = "Dataset/Data Versioning/"
model_path = "Model/ML Model/"
data_version = "Trained_Oversampled.csv"
base_url = "https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?CycleBeginYear=2017"
dataset_names = ['Demographics', 'Dietary', 'Examination', 'Laboratory', 'Questionnaire']
hyperparameter_log_path = "Notebook/Arif's Workspace/Hyperparameter Tuning/Log"
hyperparameter_log_full_refresh = "V5"
hyperparameter_log_increment = [
    "V1/hyperparameter_tuning_log_2.csv"
]
hyperparameter_log_full_refresh_custom = [
    "V1/hyperparameter_tuning_log_1.csv",
]

output_file = os.path.join(drive_path, hyperparameter_log_path+"/Merged/V5.csv")

run_type = 'full_refresh'
# run_type = 'full_refresh_custom'
# run_type = 'increment'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# ETL

In [2]:
def full_refresh():

  file_paths = []

  for root, dirs, files in os.walk(os.path.join(drive_path, hyperparameter_log_path+"/"+hyperparameter_log_full_refresh)):
    for i, file in enumerate(files):
      file_path = os.path.join(root, file)
      log = pd.read_csv(file_path)

      print("Merging File", file, "With Size", str(log.shape[0]), "Row")

      if i == 0:
        log_merged = pd.read_csv(file_path)
        continue

      log_merged = pd.concat([log_merged, log])

  log_merged = log_merged.drop_duplicates()
  print("-"*10)
  print("Successfully Merged", str(log_merged.shape[0]), "Row Data")
  print("Saving to", output_file, "...")
  print("-"*10)

  log_merged.to_csv(output_file, index=False)

  print("Success!")
  print("-"*10)

  return log_merged

In [3]:
def full_refresh_custom():

  file_paths = []

  for i, file in enumerate(hyperparameter_log_full_refresh_custom):
    file_path = os.path.join(drive_path, os.path.join(hyperparameter_log_path, file))
    log = pd.read_csv(file_path)

    print("Merging File", file, "With Size", str(log.shape[0]), "Row")

    if i == 0:
      log_merged = log
      continue

    log_merged = pd.concat([log_merged, log])

  log_merged = log_merged.drop_duplicates()
  print("-"*10)
  print("Successfully Merged", str(log_merged.shape[0]), "Row Data")
  print("Saving to", output_file, "...")
  print("-"*10)

  log_merged.to_csv(output_file, index=False)

  print("Success!")
  print("-"*10)

  return log_merged

In [4]:
def incremental():

  log_merged = pd.read_csv(output_file)
  print("Existing File Has", str(log_merged.shape[0]), "Row -", output_file)
  print("-"*10)

  file_paths = []

  for i, file in enumerate(hyperparameter_log_increment):
    file_path = os.path.join(drive_path, os.path.join(hyperparameter_log_path, file))
    log = pd.read_csv(file_path)

    print("Merging File", file, "With Size", str(log.shape[0]), "Row")

    log_merged = pd.concat([log_merged, log])

  log_merged = log_merged.drop_duplicates()
  print("-"*10)
  print("Successfully Merged", str(log_merged.shape[0]), "Row Data")
  print("Saving to", output_file, "...")
  print("-"*10)

  log_merged.to_csv(output_file, index=False)

  print("Success!")
  print("-"*10)

  return log_merged


# Run ETL

In [5]:
if run_type == 'full_refresh':
  full_refresh()
elif run_type == 'full_refresh_custom':
  full_refresh_custom()
elif run_type == 'increment':
  incremental()

Merging File hyperparameter_tuning_log (2).csv With Size 81 Row
Merging File hyperparameter_tuning_log (3).csv With Size 11 Row
Merging File hyperparameter_tuning_log (4).csv With Size 188 Row
Merging File hyperparameter_tuning_log (5).csv With Size 74 Row
Merging File hyperparameter_tuning_log (6).csv With Size 74 Row
Merging File hyperparameter_tuning_log (7).csv With Size 75 Row
Merging File hyperparameter_tuning_log (8).csv With Size 73 Row
Merging File hyperparameter_tuning_log (9).csv With Size 71 Row
Merging File hyperparameter_tuning_log (11).csv With Size 72 Row
Merging File hyperparameter_tuning_log (12).csv With Size 72 Row
----------
Successfully Merged 791 Row Data
Saving to /content/drive/MyDrive/Kuliah/Tugas Akhir/Final Project Shared Folder/Notebook/Arif's Workspace/Hyperparameter Tuning/Log/Merged/V5.csv ...
----------
Success!
----------
