In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


### Reading the dataset
https://www.kaggle.com/uciml/adult-census-income

In [97]:
census= pd.read_csv('adult.csv')

### Data Cleaning

In [121]:
census_1= census.mask(census.eq('?')).dropna()
census_1= census_1[census['native.country']=='United-States']
census_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27504 entries, 1 to 32560
Data columns (total 15 columns):
age               27504 non-null int64
workclass         27504 non-null object
fnlwgt            27504 non-null int64
education         27504 non-null object
education.num     27504 non-null int64
marital.status    27504 non-null object
occupation        27504 non-null object
relationship      27504 non-null object
race              27504 non-null object
sex               27504 non-null object
capital.gain      27504 non-null int64
capital.loss      27504 non-null int64
hours.per.week    27504 non-null int64
native.country    27504 non-null object
income            27504 non-null object
dtypes: int64(6), object(9)
memory usage: 3.4+ MB


  


### Preprocessing numeric columns using Min max scaler

In [127]:
scaler = preprocessing.MinMaxScaler()
census_1[['age', 'fnlwgt','education.num','capital.gain','capital.loss','hours.per.week']] = scaler.fit_transform(census_1[['age', 'fnlwgt','education.num','capital.gain','capital.loss','hours.per.week']])

In [151]:
census_1['income'].value_counts()

<=50K    20509
>50K      6995
Name: income, dtype: int64

### Converting data in arrays

In [130]:
data_x,data_y = census_1.drop('income',axis=1), census_1['income']

### One- Hot encoding on categorical columns

In [131]:
data_x= pd.get_dummies(data_x).values


### Label Encoding target variable

In [132]:
le= LabelEncoder()
le.fit(data_y)
data_y=le.transform(data_y)
data_y

array([0, 0, 0, ..., 1, 0, 0])

### Train-test split (80-20)

In [133]:
X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.20, random_state=42)
X_test.shape

(5501, 64)

### Reshaping data into (n x m), n being the number of features and m being the number of training examples. Target label array is also reshaped into (1 x m) signifying 1 output and m examples.

In [134]:
X_train= X_train.T
y_train= y_train.reshape(1,22003)
X_test = X_test.T
y_test =y_test.reshape(1,5501)

In [274]:
X = X_train
Y = y_train

n_x = X.shape[0] #number of input layers
n_h = 2*n_x # number of hidden layers
learning_rate = 0.2 

### Sigmoid and sigmoid derivative

In [142]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s
def der_sigmoid(z):
    s= sigmoid(z) * (1 - sigmoid(z))
    return s

### Weighted Cross entropy Loss function

In [187]:
def compute_loss_1(Y, Y_hat,a,b):

    m = Y.shape[1]
    L = -(1./m) * ( np.sum(a* np.multiply(np.log(Y_hat),Y) ) + np.sum(b*np.multiply(np.log(1-Y_hat),(1-Y)) ) )

    return L

### Neural Network function (Numpy implementation)

In [275]:
def neuralnet_modified(activation,derivative,epochs,a,b):
    np.random.seed(101)
    W1 = np.random.randn(n_h, n_x)*np.sqrt(2/n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(1, n_h)*np.sqrt(2/n_h)
    b2 = np.zeros((1, 1))
    m = Y.shape[1]
    act=activation
    der=derivative

    for i in range(epochs):

        Z1 = np.matmul(W1, X) + b1
        A1 = act(Z1)
        Z2 = np.matmul(W2, A1) + b2
        A2 = sigmoid(Z2)

        cost = compute_loss_1(Y, A2,a,b)

        dZ2 = -a*np.multiply(Y,(1-A2))+ b*np.multiply(A2,(1-Y))
        dW2 = (1./m) * np.matmul(dZ2, A1.T)
        db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.matmul(W2.T, dZ2)
        dZ1 = dA1* der(Z1)
        dW1 = (1./m) * np.matmul(dZ1, X.T)
        db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

       
    
    Z1 = np.matmul(W1, X_test) + b1
    A1 = act(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)

    predictions = (A2>.5)[0,:]
    labels = (y_test == 1)[0,:]
    tn, fp, fn, tp = confusion_matrix(labels,predictions).ravel()
    fpr= fp/(fp+tn)
    fnr= fn/(fn+tp)
    acc= accuracy_score(labels, predictions)
    return(fpr,fnr,acc)

### Enter the value of threshold

In [266]:
threshold = input()

0.4


In [267]:
a=1
b=1
epochs=400
fpr,fnr,test_acc = neuralnet_modified(sigmoid,der_sigmoid,epochs,a,b)
while (fpr< float(threshold)):
    fpr,fnr,test_acc= neuralnet_modified(sigmoid,der_sigmoid,epochs,a,b)
    print('FPR: ',fpr,'FNR: ',fnr, 'Test Accuracy: ',tagest_acc)
    a=a+0.2

FPR:  0.07149829184968277 FNR:  0.5110477548111191 Test Accuracy:  0.8163970187238684
FPR:  0.09175207418252807 FNR:  0.45759087669280113 Test Accuracy:  0.8149427376840574
FPR:  0.11078574914592484 FNR:  0.42052744119743407 Test Accuracy:  0.8102163243046718
FPR:  0.12347486578818936 FNR:  0.383464005702067 Test Accuracy:  0.8102163243046718
FPR:  0.148365056124939 FNR:  0.3435495367070563 Test Accuracy:  0.8018542083257589
FPR:  0.16861883845778428 FNR:  0.3050605844618674 Test Accuracy:  0.7965824395564443
FPR:  0.18570034163006344 FNR:  0.2758374910905203 Test Accuracy:  0.7913106707871296
FPR:  0.20717423133235724 FNR:  0.24019957234497505 Test Accuracy:  0.7844028358480276
FPR:  0.232796486090776 FNR:  0.20028510334996436 Test Accuracy:  0.7754953644791857
FPR:  0.258174719375305 FNR:  0.17320028510334998 Test Accuracy:  0.7634975459007454
FPR:  0.28184480234260617 FNR:  0.1525302922309337 Test Accuracy:  0.7511361570623523
FPR:  0.3008784773060029 FNR:  0.12829650748396293 Test 

The final model achieved after 27 iterations with FPR = 0.40, has FNR= 0.055 and has an accuracy of 68.69%.

### Checking confusion matrix and model evaluators using the a value obtained (a=6.2)

In [278]:
def neuralnet_modified_test(activation,derivative,epochs,a,b):
    np.random.seed(101)
    W1 = np.random.randn(n_h, n_x)*np.sqrt(2/n_x)
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(1, n_h)*np.sqrt(2/n_h)
    b2 = np.zeros((1, 1))
    m = Y.shape[1]
    act=activation
    der=derivative

    for i in range(epochs):

        Z1 = np.matmul(W1, X) + b1
        A1 = act(Z1)
        Z2 = np.matmul(W2, A1) + b2
        A2 = sigmoid(Z2)

        cost = compute_loss_1(Y, A2,a,b)

        dZ2 = -a*np.multiply(Y,(1-A2))+ b*np.multiply(A2,(1-Y))
        dW2 = (1./m) * np.matmul(dZ2, A1.T)
        db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.matmul(W2.T, dZ2)
        dZ1 = dA1* der(Z1)
        dW1 = (1./m) * np.matmul(dZ1, X.T)
        db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

        if i % 100 == 0:
            print("Epoch", i, "cost: ", cost)

    print("Final cost:", cost)
    
    Z1 = np.matmul(W1, X_train) + b1
    A1 = act(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)

    predictions = (A2>.5)[0,:]
    labels = (y_train == 1)[0,:]
    print(confusion_matrix(labels,predictions))
    print(classification_report(labels,predictions))
    print('Train Accuracy =' , accuracy_score(labels, predictions))
    
    Z1 = np.matmul(W1, X_test) + b1
    A1 = act(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)

    predictions = (A2>.5)[0,:]
    labels = (y_test == 1)[0,:]
    print(confusion_matrix(labels,predictions))
    print(classification_report(labels,predictions))
    print('Test Accuracy =' , accuracy_score(labels, predictions))
    tn, fp, fn, tp = confusion_matrix(labels,predictions).ravel()
    fpr= fp/(fp+tn)
    fnr= fn/(fn+tp)
    acc= accuracy_score(labels, predictions)
    return(fpr,fnr,acc)

In [280]:
neuralnet_modified_test(sigmoid,der_sigmoid,400,6.2,1)

Epoch 0 cost:  1.996789705916368
Epoch 100 cost:  1.0048146717777364
Epoch 200 cost:  0.947619566830966
Epoch 300 cost:  0.9224960900714368
Final cost: 0.906582293324698
[[9872 6539]
 [ 346 5246]]
             precision    recall  f1-score   support

      False       0.97      0.60      0.74     16411
       True       0.45      0.94      0.60      5592

avg / total       0.83      0.69      0.71     22003

Train Accuracy = 0.68708812434668
[[2454 1644]
 [  78 1325]]
             precision    recall  f1-score   support

      False       0.97      0.60      0.74      4098
       True       0.45      0.94      0.61      1403

avg / total       0.84      0.69      0.71      5501

Test Accuracy = 0.6869660061806944


(0.4011713030746706, 0.05559515324305061, 0.6869660061806944)

### Conclusion- 
By setting a threshold of 0.4 i.e 40% FPR we were able to reduce FNR to approximately 5.5% from 51% in the beginning. We can see that the accuracy drops each time we make an increment to a. In this case the accuracy dropped from 81.6 to 68.69 %. Significant computation effort was required to reach this threshold with 27 iterations and 400 epochs the total number of epochs were 10,800.

### References:- 
http://jonathanweisberg.org/post/A%20Neural%20Network%20from%20Scratch%20-%20Part%201/
https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits