In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import random

In [None]:
df1 = pd.read_csv('census-income.data.csv')
df2 = pd.read_csv('census-income.test.csv')


In [None]:
def determine_type_of_feature(df):
    feature_types = []
    n_unique_values_threshold = 15

    for column in df.columns:
        unique_values = df[column].unique()
        example_value = unique_values[0]

        if (isinstance(example_value, str)) or (len(unique_values) <= n_unique_values_threshold):
            feature_types.append("Discrete")
        else:
            feature_types.append("Continuous")
    
    return feature_types

In [None]:
global FEATURE_TYPES
FEATURE_TYPES = determine_type_of_feature(df1)

In [None]:
def missing_values(df):
    c=0
    for i in df.columns:
        x,count_missing = np.unique(df[i].eq('?'),return_counts=True)
        if len(x)==2:
            print(i,"-",FEATURE_TYPES[c],"-",count_missing[1])
        # elif len(x)==1 and x[0] == False:
        #     print(i,"-",FEATURE_TYPES[c],"- 0",)
        c+=1

In [None]:
def fill_values(df):
    #Since all columns are having the discrete value. We replace it with the missing values with the modes, i.e. the highest appearing value
    mode_workclass = df.workclass.mode()[0]
    mode_occupation = df.occupation.mode()[0]
    mode_native_countr = df.native_countr.mode()[0]

    #Filling the train and test data with the modes of missing values as they are discrete
    df.workclass = df.workclass.replace('?', mode_workclass)
    df.occupation = df.occupation.replace('?', mode_occupation)
    df.native_countr = df.native_countr.replace('?', mode_native_countr)
    missing_values(df)#No missing values
    return df

In [None]:
df2['label'] = df2['label'].str.replace('.', '')
missing_values(df1)
missing_values(df2)
fill_values(df1)
fill_values(df2)

In [None]:
def train_test_split(df,test_size):
    #checks whether the test_size is a proportion of the total number of samples
    if isinstance(test_size,float): 
        test_size = round(test_size*len(df))

    #store random samples in the test and training data
    indices = df.index.tolist()
    test_indices = random.sample(population=indices, k = test_size)

    #random data points from sample sent to test and training data
    test_df = df.loc[test_indices]
    train_df = df.drop(test_indices)

    return train_df , test_df

In [None]:
df_combined = pd.concat([df1,df2],ignore_index=True)

In [None]:
workclass_dict = {
    'Federal-gov': 1,
    'Local-gov': 2,
    'Never-worked': 3,
    'Private': 4,
    'Self-emp-inc': 5,
    'Self-emp-not-inc': 6,
    'State-gov': 7,
    'Without-pay': 8
}

education_dict = {
    '10th': 1,
    '11th': 2,
    '12th': 3,
    '1st-4th': 4,
    '5th-6th': 5,
    '7th-8th': 6,
    '9th': 7,
    'Assoc-acdm': 8,
    'Assoc-voc': 9,
    'Bachelors': 10,
    'Doctorate': 11,
    'HS-grad': 12,
    'Masters': 13,
    'Preschool': 14,
    'Prof-school': 15,
    'Some-college': 16
}

marital_status_dict = {
    'Divorced': 1,
    'Married-AF-spouse': 2,
    'Married-civ-spouse': 3,
    'Married-spouse-absent': 4,
    'Never-married': 5,
    'Separated': 6,
    'Widowed': 7
}

occupation_dict = {
    'Adm-clerical': 1,
    'Armed-Forces': 2,
    'Craft-repair': 3,
    'Exec-managerial': 4,
    'Farming-fishing': 5,
    'Handlers-cleaners': 6,
    'Machine-op-inspct': 7,
    'Other-service': 8,
    'Priv-house-serv': 9,
    'Prof-specialty': 10,
    'Protective-serv': 11,
    'Sales': 12,
    'Tech-support': 13,
    'Transport-moving': 14
}

relationship_dict = {
    'Husband': 1,
    'Not-in-family': 2,
    'Other-relative': 3,
    'Own-child': 4,
    'Unmarried': 5,
    'Wife': 6
}

race_dict = {
    'Amer-Indian-Eskimo': 1,
    'Asian-Pac-Islander': 2,
    'Black': 3,
    'Other': 4,
    'White': 5
}

sex_dict = {
    'Female': 1,
    'Male': 2
}

native_countr_dict = {
    'Cambodia': 1,
    'Canada': 2,
    'China': 3,
    'Columbia': 4,
    'Cuba': 5,
    'Dominican-Republic': 6,
    'Ecuador': 7,
    'El-Salvador': 8,
    'England': 9,
    'France': 10,
    'Germany': 11,
    'Greece': 12,
    'Guatemala': 13,
    'Haiti': 14,
    'Holand-Netherlands': 15,
    'Honduras': 16,
    'Hong': 17,
    'Hungary': 18,
    'India': 19,
    'Iran': 20,
    'Ireland': 21,
    'Italy': 22,
    'Jamaica': 23,
    'Japan': 24,
    'Laos': 25,
    'Mexico': 26,
    'Nicaragua': 27,
    'Outlying-US(Guam-USVI-etc)': 28,
    'Peru': 29,
    'Philippines': 30,
    'Poland': 31,
    'Portugal': 32,
    'Puerto-Rico': 33,
    'Scotland': 34,
    'South': 35,
    'Taiwan': 36,
    'Thailand': 37,
    'Trinadad&Tobago': 38,
    'United-States': 39,
    'Vietnam': 40,
    'Yugoslavia': 41
}

label_dict = {
    '<=50K': 0,
    '>50K': 1
}

In [None]:
df_combined.workclass = df_combined.workclass.map(workclass_dict)

df_combined.education = df_combined.education.map(education_dict)

df_combined.marital_status = df_combined.marital_status.map(marital_status_dict)

df_combined.occupation = df_combined.occupation.map(occupation_dict)

df_combined.relationship = df_combined.relationship.map(relationship_dict)

df_combined.race = df_combined.race.map(race_dict)

df_combined.sex = df_combined.sex.map(sex_dict)

df_combined.native_countr = df_combined.native_countr.map(native_countr_dict)

df_combined.label = df_combined.label.map(label_dict)

In [None]:
df_combined.head()

In [None]:
df_train , df_test = train_test_split(df_combined , 0.33)



In [None]:
df_test.head()

In [None]:
y_test = df_test.label.values

df_test = df_test.drop('label',axis = 1)

x_test = df_test.values

y_train = df_train.label.values

df_train = df_train.drop('label',axis =1)

x_train = df_train.values

x_train = x_train.T
x_test = x_test.T

In [None]:
def normalize_data(data):
    max_row = np.amax(data,axis=1)
    min_row = np.amin(data,axis=1)
    diff = max_row-min_row
    diff = diff.reshape(diff.shape[0],1)
    max_row = max_row.reshape(max_row.shape[0],1)
    min_row = min_row.reshape(min_row.shape[0],1)
    diff = diff.reshape(diff.shape[0],1)
    data = np.divide((data-min_row),diff)

    return(data)

In [None]:
x_test = normalize_data(x_test)
x_train = normalize_data(x_train)
y_test =  y_test.reshape(1,y_test.shape[0])
y_train =  y_train.reshape(1,y_train.shape[0])


In [None]:
m_train = x_train.shape[1]
m_test = x_test.shape[1]

print ("Number of training examples: m_train = " + str(m_train))
print ("Number of testing examples: m_test = " + str(m_test))

print ("train_set_x shape: " + str(x_train.shape))
print ("train_set_y shape: " + str(y_train.shape))
print ("test_set_x shape: " + str(x_test.shape))
print ("test_set_y shape: " + str(y_test.shape))

In [None]:
def initialize_parameters_deep(layer_dims):

    parameters = {}
    L = len(layer_dims) # number of layers in the network

    for l in range(1, L):

        parameters['W'+ str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['b'+ str(l)] = np.zeros((layer_dims[l],1))

        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

    return parameters

In [None]:
def sigmoid(Z):

    A = 1/(1+np.exp(-Z))
    cache = Z

    return A, cache

In [None]:
def relu(Z):

    A = np.maximum(0,Z)
    assert(A.shape == Z.shape)

    cache = Z
    return A, cache


In [None]:
def tanh_(Z):

    A = np.tanh(Z)
    cache = Z

    return A,cache 

In [None]:
def tanh_backward(dA,cache):

    Z = cache
    dZ = dA*(1-np.square(np.tanh(Z)))

    return dZ

In [None]:
def relu_backward(dA, cache):

    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object

    # When z <= 0,setting dz to 0 as well.
    dZ[Z <= 0] = 0
    assert (dZ.shape == Z.shape)
    return dZ

In [None]:
def sigmoid_backward(dA, cache):

    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)

    assert (dZ.shape == Z.shape)
    return dZ


In [None]:
def linear_forward(A, W, b):

    Z = np.dot(W,A) + b
    cache = (A, W, b)

    return Z, cache

In [None]:
def linear_activation_forward(A_prev, W, b, activation):

    if activation == "sigmoid":

        Z , linear_cache = linear_forward(A_prev,W,b)
        A , activation_cache = sigmoid(Z)

    elif activation == "relu":

        Z , linear_cache = linear_forward(A_prev,W,b)
        A , activation_cache = relu(Z)

    elif activation == "tanh":
        Z, linear_cache = linear_forward(A_prev,W,b)
        A , activation_cache = tanh_(Z)

    cache = (linear_cache, activation_cache)

    return A, cache

In [None]:
def L_model_forward(X, parameters):

    caches = [] #will contain the list of caches
    A = X #for the input layer
    L = len(parameters) // 2 #Since we have W and b(bias) we divide by 2

    for l in range(1, L):
        A_prev = A

        A , cache = linear_activation_forward(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activation = "relu")
        caches.append(cache)

    AL , cache = linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],activation="sigmoid")
    caches.append(cache)

    return AL, caches


In [None]:
def compute_cost(AL, Y):

    m = Y.shape[1]
    cost = (-1/m)*np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))
    cost = np.squeeze(cost)

    return cost

In [None]:
def linear_backward(dZ, cache):

    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1/m)*np.dot(dZ,A_prev.T)
    db = (1/m)*np.sum(dZ, axis =1,keepdims = True)
    dA_prev = np.dot(W.T,dZ)

    return dA_prev, dW, db

In [None]:
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache

    if activation == "relu":

        dZ = relu_backward(dA,activation_cache)
        dA_prev,dW,db = linear_backward(dZ,linear_cache)
        # YOUR CODE ENDS HERE

    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db = linear_backward(dZ,linear_cache)

    elif activation == "tanh":
        dZ = tanh_backward(dA,activation_cache)
        dA_prev,dW,db = linear_backward(dZ,linear_cache)

    return dA_prev, dW, db

In [None]:
def L_model_backward(AL, Y, caches):
    
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL

    dAL = -( np.divide(Y,AL) - np.divide(1-Y,1-AL) )

    current_cache = caches[L-1]
    dA_prev_temp,dW_temp,db_temp = linear_activation_backward(dAL, current_cache , 'sigmoid') 
    grads["dA"+str(L-1)]= dA_prev_temp
    grads["dW"+str(L)]= dW_temp
    grads["db"+str(L)]= db_temp

    for l in reversed(range(L-1)):

        current_cache = caches[l]
        dA_prev_temp,dW_temp,db_temp = linear_activation_backward(grads["dA"+str(l+1)], current_cache , 'relu')
        grads["dA"+str(l)]= dA_prev_temp
        grads["dW"+str(l+1)]= dW_temp
        grads["db"+str(l+1)]= db_temp

    return grads

In [None]:
def update_parameters(params, grads, learning_rate):

    parameters = params.copy()
    L = len(parameters) // 2 # number of layers in the neural network

    for l in range(L):

        parameters["W" + str(l+1)] = parameters["W"+str(l+1)] - learning_rate*grads["dW"+str(l+1)]
        parameters["b" + str(l+1)] = parameters["b"+str(l+1)] - learning_rate*grads["db"+str(l+1)]

    return parameters



In [None]:
def L_layer_model(X, Y, layers_dims, learning_rate = 1, num_iterations = 3000, print_cost=False):

    costs = [] 

    parameters = initialize_parameters_deep(layers_dims)

    for i in range(0, num_iterations):

        AL , caches = L_model_forward(X, parameters)

        cost = compute_cost(AL, Y)

        grads = L_model_backward(AL, Y, caches)

        parameters = update_parameters(parameters, grads, learning_rate)

        # Print the cost every 100 iterations
        if print_cost and i % 1000 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 1000 == 0 or i == num_iterations:
            costs.append(cost)

    return parameters, costs

In [None]:
def predict(X, y, parameters):

    m = X.shape[1]
    n = len(parameters) // 2 
    p = np.zeros((1,m))


    probas, caches = L_model_forward(X, parameters)

    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0

    # print("Accuracy: "  + str(np.sum((p == y)/m)))

    return p

In [None]:
layer_single_hidden_dims = [14,7,1]
layer_double_hidden_dims = [14,11,7,1]
layer_triple_hidden_dims = [14,10,6,4,1]


For Single Hidden Layer

In [None]:
layers_dims = layer_single_hidden_dims
parameters_1, costs_1 = L_layer_model(x_train, y_train, layers_dims,learning_rate = 3, num_iterations = 10000, print_cost = False)
print(costs_1)
pred_train1 = predict(x_train, y_train, parameters_1)
pred_test1 = predict(x_test, y_test,parameters_1)
acc_train1 = np.sum((pred_train1 == y_train)/y_train.shape[1])
acc_test1 = np.sum((pred_test1 == y_test)/y_test.shape[1])
print("Training accuracy for 1 hidden layer",np.sum((pred_train1 == y_train)/y_train.shape[1]))
print("Test accuracy for 1 hidden layer",np.sum(np.sum((pred_test1 == y_test)/y_test.shape[1])))

For Two Hidden Layers

In [None]:
layers_dims = layer_double_hidden_dims
parameters_2, costs_2 = L_layer_model(x_train, y_train, layers_dims,learning_rate = 3, num_iterations = 10000, print_cost = False)
print(costs_2)
pred_train2 = predict(x_train, y_train, parameters_2)
pred_test2 = predict(x_test, y_test,parameters_2)
acc_train2 = np.sum((pred_train2 == y_train)/y_train.shape[1])
acc_test2 = np.sum((pred_test2 == y_test)/y_test.shape[1])
print("Training accuracy for 2 hidden layer",np.sum((pred_train2 == y_train)/y_train.shape[1]))
print("Test accuracy for 2 hidden layer",np.sum(np.sum((pred_test2 == y_test)/y_test.shape[1])))


For Three Hidden Layers

In [None]:
layers_dims = layer_triple_hidden_dims
parameters_3, costs_3 = L_layer_model(x_train, y_train, layers_dims,learning_rate =3, num_iterations = 10000, print_cost = False)
print(costs_3)
pred_train3 = predict(x_train, y_train, parameters_3)
pred_test3 = predict(x_test, y_test,parameters_3)
acc_train3 = np.sum((pred_train3 == y_train)/y_train.shape[1])
acc_test3 = np.sum((pred_test3 == y_test)/y_test.shape[1])
print("Training accuracy for 3 hidden layer",np.sum((pred_train3 == y_train)/y_train.shape[1]))
print("Test accuracy for 3 hidden layer",np.sum(np.sum((pred_test3 == y_test)/y_test.shape[1])))


In [None]:
# np.sum((pred_test1 == y_test)/y_test.shape[1])

In [39]:
layer_new = [14,1]
layers_dims = layer_new
parameters_4, costs_4 = L_layer_model(x_train, y_train, layers_dims,learning_rate =1.5, num_iterations = 10000, print_cost = False)
print(costs_4)
pred_train4 = predict(x_train, y_train, parameters_4)
pred_test4 = predict(x_test, y_test,parameters_4)
acc_train4 = np.sum((pred_train4 == y_train)/y_train.shape[1])
acc_test4 = np.sum((pred_test4 == y_test)/y_test.shape[1])
print("Training accuracy for 0 hidden layer",acc_train4)
print("Test accuracy for 0 hidden layer",acc_test4)

Cost after iteration 9999: 0.3853412494985926
[0.6920383234986953, 0.4035053465788617, 0.39744958628845545, 0.39418526006856147, 0.3918465211091589, 0.390062844463366, 0.3886686780666483, 0.38756080203736204, 0.38666823407917167, 0.3859404386929294]
Training accuracy for 0 hidden layer 0.8226378193374896
Test accuracy for 0 hidden layer 0.8251023700210942
