In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import random

from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings('ignore')

drive_path = '/content/drive/MyDrive/Kuliah/Tugas Akhir/Final Project Shared Folder'
data_path = "Dataset/Data Versioning/"
model_path = "Model/ML Model/"
data_version = "Trained_V2-2.csv"
base_url = "https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?CycleBeginYear=2017"
dataset_names = ['Demographics', 'Dietary', 'Examination', 'Laboratory', 'Questionnaire']

# Ingest Data

df_raw = pd.read_csv(os.path.join(drive_path, data_path+data_version), index_col = 0)
try:
  df_raw = df_raw.set_index('SEQN', drop=True)
  df_raw = df_raw.drop(columns = "Unnamed: 0")
except:
  pass


# Data Preparation For Model

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def prep_data(df):
  X = df.copy().drop(columns=['Quest16_MCQ010', 'Quest16_MCQ160B', 'Quest16_MCQ220', 'Quest16_MCQ300C', 'Quest16_MCQ300A', 'Quest16_MCQ366A', 'Quest16_MCQ366B'])
  y = df['Quest16_MCQ160B']

  y = y.replace({2: 0})
  y = y.replace({9: 0})

  y = y.astype(int)

  X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_val = scaler.transform(X_val)

  X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
  X_val = np.array(X_val).reshape(X_val.shape[0], X_val.shape[1], 1)

  print("Train: ", X_train.shape, " ", y_train.shape)
  print("Val: ", X_val.shape, " ", y_val.shape)
  print("Column Used: ", X.columns.tolist())

  return X_train, X_val, y_train, y_val

X_train, X_val, y_train, y_val = prep_data(df_raw)

Train:  (6477, 79, 1)   (6477,)
Val:  (2777, 79, 1)   (2777,)
Column Used:  ['Dieta1_DR1TKCAL', 'Exami2_BMXBMI', 'Dieta1_DR1TCARB', 'Dieta1_DR1TFIBE', 'Dieta1_DR1TSFAT', 'Dieta1_DR1TTFAT', 'Dieta1_DR1TPFAT', 'Dieta1_DR1TCALC', 'Quest6_DED125', 'Dieta1_DR1TMFAT', 'Quest21_SLQ330', 'Quest21_SLD013', 'Quest21_SLQ300', 'Quest19_PAQ635', 'Quest19_PAD615', 'Quest19_PAQ640', 'Quest21_SLQ320', 'Quest21_SLD012', 'Dieta1_DRDINT', 'Quest19_PAQ610', 'Quest19_PAQ655', 'Dieta1_DR1DAY', 'Dieta1_DR1TPROT', 'Quest6_DED120', 'Quest19_PAD645', 'Dieta1_DR1TCHOL', 'Quest19_PAD660', 'Dieta1_DR1TSUGR', 'Demog1_RIDRETH3', 'Demog1_DMDMARTL', 'Quest14_INQ012', 'Demog1_DMDHHSZA', 'Quest15_KIQ026', 'Quest9_DLQ050', 'Demog1_DMDHHSIZ', 'Exami1_BPXPULS', 'Quest18_OCQ210', 'Demog1_INDIN2', 'Demog1_RIDAGEYR', 'Quest15_KIQ022', 'Quest20_PFQ061C', 'Quest4_CBD111', 'Quest7_DIQ010', 'Quest14_IND235', 'Demog1_DMDEDUC', 'Quest10_ECQ020', 'Labor1_LBDTCSI', 'Quest17_DPQ020', 'Quest3_CDQ008', 'Quest14_INQ020', 'Demog1_DMDFMSIZ