## Keras Model for similar problem set as the Logistic Regression example. 

Adapted from https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/

### Initialization

In [1]:
import warnings
warnings.filterwarnings('ignore')

from keras.models import Sequential
from keras.layers import Dense
import numpy
import pandas as pd

# fix random seed for reproducibility
numpy.random.seed(7)

ModuleNotFoundError: No module named 'keras'

### Data Pre-processing

In [None]:
# The data URI
csv_file_uri = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"

column_names = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "target"
]

data_original = pd.read_csv(csv_file_uri, names=column_names, index_col=False)

### Split into train and test set

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# Make a copy so that we always have the original data to refer to
data_pre_dummies = data_original.copy(deep=True)

# Drop the US weights (don't have any value)
data_pre_dummies.drop(["fnlwgt"], axis=1, inplace=True)

data = pd.get_dummies(data_pre_dummies)

# Deletes the original column in this dataframe.
data.drop(["target_ <=50K"], axis=1, inplace=True)

# Rename the target
data.rename(columns={'target_ >50K': 'target' }, inplace=True)
    
feature_columns = data.columns.tolist()
feature_columns.remove("target")

X = data[feature_columns].values
y = data["target"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Select and Configure Algorithm & Parameters

In [None]:
model = Sequential()
model.add(Dense(12, input_dim=107, activation='relu'))
model.add(Dense(107, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Train/Fit Model

In [None]:
# Fit the model
model.fit(X_train, y_train, epochs=25, batch_size=10)

### Validate the model

In [None]:
# evaluate the model
scores = model.evaluate(X_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))