In [46]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from carla.data.catalog import CsvCatalog

import warnings
import tensorflow as tf
import numpy as np
import pandas as pd
import json

seed = 44

tf.compat.v1.random.set_random_seed(seed)
np.random.seed(seed)

In [51]:
# Prep adult dataset

with open('../data/adult_constraints.json', 'r') as f:
    constraints = json.load(f)

continuous = constraints['continuous']
categorical = constraints['categorical']
immutable = constraints['immutable']
columns_order = constraints['features_order']

In [52]:
dataset = CsvCatalog(file_path="../data/adult_prep.csv",
                    continuous=continuous,
                    categorical=categorical,
                    immutables=immutable,
                    target='income')

In [53]:
df = dataset.df_train

X_train = dataset.df_train[columns_order]
Y_train = df['income']
Y_train = pd.get_dummies(Y_train)
Y_train

Unnamed: 0,<=50K,>50K
13244,1,0
15553,1,0
14055,1,0
31865,1,0
18019,1,0
...,...,...
30718,1,0
24670,1,0
17683,1,0
12022,1,0


In [54]:
df = dataset.df_test
X_test = dataset.df_test[columns_order]
Y_test = df['income']
Y_test = pd.get_dummies(Y_test)
Y_test

Unnamed: 0,<=50K,>50K
18866,1,0
15307,1,0
17081,1,0
23167,1,0
10763,1,0
...,...,...
28286,0,1
32205,1,0
15645,1,0
3103,1,0


In [55]:
from tensorflow import keras


model = tf.keras.Sequential()
model.add(tf.keras.layers.Input((len(X_train.columns),)))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(2, activation='softmax'))


model.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

model.fit(
    X_train.to_numpy(), 
    Y_train.to_numpy(),
    epochs=100,
    batch_size=128,
    validation_data=(X_test.to_numpy(), Y_test.to_numpy()),
    shuffle=True,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=10)
    ]
)

Train on 24420 samples, validate on 8141 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100


<tensorflow.python.keras.callbacks.History at 0x2418145e908>

In [56]:
model.save('../models/adult_NN.h5', overwrite=True, save_format='h5')