### Useful imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras import models, activations, layers, metrics, optimizers, regularizers, callbacks

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

### Load CSV

In [2]:
dataset = pd.read_csv("income.csv", header=0, skipinitialspace=True)
del dataset['Unnamed: 0']

### Useful python functions

In [3]:
def replace_on_condition(dataset, column_name, condition_value, replace_value):
    condition = dataset.loc[:, column_name].str.contains(condition_value)
    dataset.loc[condition, column_name] = replace_value
    
    return dataset

north_america = ['United-States', 'Cuba', 'Jamaica', 'Mexico', 'Honduras', 'Canada', 'Puerto-Rico', 'Outlying']
south_america = ['South', 'Philippines', 'Columbia', 'Cambodia', 'Ecuador', 'Haiti', 'Dominican-Republic', 'El-Salvador', 'Guatemala', 'Peru', 'Outlying-US(Guam-USVI-etc)', 'Trinadad&Tobago', 'Nicaragua']
asia = ['India', 'Iran', 'Thailand', 'Laos', 'Taiwan', 'China', 'Japan', 'Vietnam', 'Hong']
europe = ['England', 'Germany', 'Italy', 'Poland', 'Portugal', 'France', 'Yugoslavia', 'Scotland', 'Greece', 'Ireland', 'Hungary', 'Holand-Netherlands']
other = ['\?']

def replace_native_country(dataset, values, replacement):
    for value in values:
        dataset = replace_on_condition(dataset, "native country", value, replacement)
    
    return dataset

def standardize_number(dataframe, column_name):
    return (dataframe[column_name] - dataframe[column_name].mean()) / dataframe[column_name].std()

def expand_category(dataframe, column_name):
    expanded_dataframe = pd.concat([dataframe, pd.get_dummies(dataframe[column_name], prefix=column_name)], axis=1)

    expanded_dataframe.drop([column_name], axis=1, inplace=True)

    return expanded_dataframe

def unison_shuffled_copies(a, b):
    mask = np.random.permutation(len(a))
    return a[mask], b[mask]

def predict_and_plot(n, inputs, outputs):
    selection = np.arange(n)

    predicted = model.predict(inputs)[selection]

    absolute_distance_from_outputs = np.abs(outputs[selection] - predicted)
    missclassified_number = np.sum((absolute_distance_from_outputs > 0.5) * 1.0)
    
    plt.figure(figsize=(20,3))
    plt.plot(np.zeros(n), 'b')
    plt.plot(np.ones(n) * 0.5, 'b')
    plt.plot(np.ones(n), 'b')
    plt.plot(absolute_distance_from_outputs, 'xr')
    
    print(str(missclassified_number / n * 100) + "%")
    plt.show()

### Base NN model

In [4]:
def build(output_size):
    model = models.Sequential()
    model.add(layers.Dense(units=output_size, activation=activations.sigmoid))

    model.compile(loss='binary_crossentropy',
                  optimizer='sgd')

    return model

def fit(epochs, batch_size, inputs, outputs):
    model.fit(inputs, outputs,
              batch_size=batch_size,
              epochs=epochs)
