In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from implementations import *
from helpers import *

In [2]:
# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the neural network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize the weights and biases for the input and hidden layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.lr = 0.3  # Learning rate
        self.lr_decay = 0.95  # Learning rate decay factor
        self.min_lr = 0.001 

        self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)
        self.bias_hidden = np.zeros((1, self.hidden_size))

        # Initialize the weights and biases for the hidden and output layers
        self.weights_hidden_output = np.random.rand(self.hidden_size, self.output_size)
        self.bias_output = np.zeros((1, self.output_size))

    def forward(self, x):
        # Forward propagation
        self.hidden_layer_input = np.dot(x, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = sigmoid(self.hidden_layer_input)

        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output_layer_output = sigmoid(self.output_layer_input)

        return self.output_layer_output

    def backward(self, x, y):
        # Backpropagation with cross-entropy loss
        output = self.forward(x)
        error = y - output

        # Calculate gradients
        delta_output = error

        # Update weights and biases
        self.weights_hidden_output += self.hidden_layer_output.T.dot(delta_output) * self.lr
        delta_hidden = delta_output.dot(self.weights_hidden_output.T) * self.hidden_layer_output * (1 - self.hidden_layer_output)
        self.weights_input_hidden += x.T.reshape(-1, 1).dot(delta_hidden) * self.lr
        self.bias_output += np.sum(delta_output, axis=0, keepdims=True) * self.lr
        self.bias_hidden += np.sum(delta_hidden, axis=0, keepdims=True) * self.lr

    def train(self, X, y, epochs,batching=True, batch_size=16):
        show_loss_every=100 if batching else 1
        show_f1_every=1000 if batching else 1
        prev_loss = float('inf')  # Store previous loss to check for stagnation
        consecutive_bad_epochs = 0
        for epoch in range(epochs):
            
            indices = np.arange(len(X))
            np.random.shuffle(indices)

            if batching:
                batch_indices = indices[:batch_size]
            else:
                batch_indices=indices
            x_batch = X[batch_indices]
            y_batch = y[batch_indices]

            for j in range(len(x_batch)):
                x = x_batch[j]
                target = y_batch[j]
                self.backward(x, target)
            
            
            loss = self.calculate_cross_entropy_loss(X, y)
            if epoch % show_loss_every == 0:
                
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
                print(f"learning rate: {self.lr}")

            if epoch % show_f1_every == 0:
                f1 = self.compute_f1_score(X, y)
                print(f"Epoch {epoch}, F1 Score: {f1:.4f}")

            # Decay the learning rate
            if loss > prev_loss:
                consecutive_bad_epochs += 1
                if consecutive_bad_epochs >= 3:
                    self.lr *= self.lr_decay  # Decrease the learning rate
            else:
                consecutive_bad_epochs = 0

            prev_loss = loss

            # Ensure the learning rate doesn't go below a minimum value
            if self.lr < self.min_lr:
                self.lr = self.min_lr

    def predict(self, x):
        return np.round(self.forward(x))

    def calculate_cross_entropy_loss(self, X, y):
        predictions = self.forward(X)
        # Avoid division by zero and numerical instability
        epsilon = 1e-15
        predictions = np.clip(predictions, epsilon, 1 - epsilon)
        loss = - (y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
        return np.mean(loss)

    def compute_f1_score(self, X, y):
        true_positives = 0
        false_positives = 0
        false_negatives = 0

        predictions = []

        for i in range(len(X)):
            x = X[i]
            target = y[i]
            prediction = self.predict(x)
            predictions.append(prediction)  # Store predictions

            if target == 1 and prediction == 1:
                true_positives += 1
            elif target == 0 and prediction == 1:
                false_positives += 1
            elif target == 1 and prediction == 0:
                false_negatives += 1
        if true_positives==0 and (false_positives==0 or false_negatives==0):
            return 0
        precision = true_positives / (true_positives + false_positives)
        recall = true_positives / (true_positives + false_negatives)
        
        # Calculate F1 score
        f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

        return f1




In [3]:
x_tr = pd.read_csv('data/x_train.csv')
y_tr = pd.read_csv('data/y_train.csv')

In [8]:
len(list(x_tr.columns)[1:])

321

In [10]:
{col:i for i,col in enumerate(list(x_tr.columns)[1:])}

['_STATE',
 'FMONTH',
 'IDATE',
 'IMONTH',
 'IDAY',
 'IYEAR',
 'DISPCODE',
 'SEQNO',
 '_PSU',
 'CTELENUM',
 'PVTRESD1',
 'COLGHOUS',
 'STATERES',
 'CELLFON3',
 'LADULT',
 'NUMADULT',
 'NUMMEN',
 'NUMWOMEN',
 'CTELNUM1',
 'CELLFON2',
 'CADULT',
 'PVTRESD2',
 'CCLGHOUS',
 'CSTATE',
 'LANDLINE',
 'HHADULT',
 'GENHLTH',
 'PHYSHLTH',
 'MENTHLTH',
 'POORHLTH',
 'HLTHPLN1',
 'PERSDOC2',
 'MEDCOST',
 'CHECKUP1',
 'BPHIGH4',
 'BPMEDS',
 'BLOODCHO',
 'CHOLCHK',
 'TOLDHI2',
 'CVDSTRK3',
 'ASTHMA3',
 'ASTHNOW',
 'CHCSCNCR',
 'CHCOCNCR',
 'CHCCOPD1',
 'HAVARTH3',
 'ADDEPEV2',
 'CHCKIDNY',
 'DIABETE3',
 'DIABAGE2',
 'SEX',
 'MARITAL',
 'EDUCA',
 'RENTHOM1',
 'NUMHHOL2',
 'NUMPHON2',
 'CPDEMO1',
 'VETERAN3',
 'EMPLOY1',
 'CHILDREN',
 'INCOME2',
 'INTERNET',
 'WEIGHT2',
 'HEIGHT3',
 'PREGNANT',
 'QLACTLM2',
 'USEEQUIP',
 'BLIND',
 'DECIDE',
 'DIFFWALK',
 'DIFFDRES',
 'DIFFALON',
 'SMOKE100',
 'SMOKDAY2',
 'STOPSMK2',
 'LASTSMK2',
 'USENOW3',
 'ALCDAY5',
 'AVEDRNK2',
 'DRNK3GE5',
 'MAXDRNKS',
 'FRUITJU

In [4]:
intresting_features=["GENHLTH","POORHLTH","HLTHPLN1","CHECKUP1","BPMEDS","TOLDHI2","CVDSTRK3","LADULT","PHYSHLTH","MENTHLTH",
                     "HLTHPLN1","CHECKUP1","TOLDHI2","CVDSTRK3","CHCOCNCR","CHCCOPD1","HAVARTH3","CHCKIDNY","DIABETE3","SEX"]

In [5]:
dico_ever_told={1:1,2:0,7:0.5,9:0}
dico_transfos={"GENHLTH":{7:4,9:4},"POORHLTH":{88:0,77:0,99:0},"HLTHPLN1":{1:1, 2:0,7:0.5,9:0},"CHECKUP1":{1:1,2:2,3:3,4:4,8:15,7:0.5,9:0},"BPMEDS":{1:1,2:0,7:0.5,9:0},"TOLDHI2":{1:1,2:0,7:0.5,9:0},
               "CVDCRHD4":{1:1,2:0,7:0.5,9:0},"LADULT":{2:0},"PHYSHLTH":{88:0,77:np.nan,99:np.nan},"MENTHLTH":{88:0,77:0,99:0},"HLTHPLN1":dico_ever_told," CHECKUP1":{3:5,4:10,7:2,8:100,9:3},
               "CVDSTRK3":dico_ever_told,"TOLDHI2":dico_ever_told,"CHCOCNCR":dico_ever_told,"CHCCOPD1":dico_ever_told,"CVDSTRK3":dico_ever_told,"HAVARTH3":dico_ever_told,
               "CHCKIDNY":dico_ever_told,"DIABETE3":dico_ever_told,"SEX":{2:0}}

In [6]:
x_tr2=x_tr.copy()
for col in intresting_features:
    x_tr2[col].replace(dico_transfos[col],inplace=True)

In [7]:
x_tr_cleaned=x_tr2[intresting_features].copy()
fill_mean=False
if fill_mean:
    for col in intresting_features:
        x_tr_cleaned[col]=x_tr_cleaned[col].fillna(x_tr_cleaned[col].mean())
else:
    x_tr_cleaned=x_tr_cleaned.fillna(0).copy()
#x_tr_cleaned=x_tr2.fillna(0).copy()
y_tr_cleaned=y_tr.copy()
# x_tr_cleaned=x_tr[intresting_features+["Id"]].dropna()
# not_na_ids=x_tr_cleaned["Id"].values
# y_tr_cleaned=y_tr.loc[y_tr["Id"].isin(not_na_ids)]
# x_tr_cleaned=x_tr_cleaned[intresting_features]
y_tr_cleaned=y_tr_cleaned["_MICHD"]
y_tr_cleaned.replace({-1:0},inplace=True)

In [8]:
np.where(np.isnan(x_tr_cleaned.values))

(array([], dtype=int64), array([], dtype=int64))

In [9]:
x_tr_cleaned.values.shape

(328135, 20)

In [10]:
x_tra, x_val, y_tra, y_val=split_data(x_tr_cleaned.values,y_tr_cleaned.values.ravel(),ratio=0.75,seed=69)

In [11]:
def make_predictions(x,w,threshold,apply_sigmoid):
    w2=w.ravel()
    y_pred=x.dot(w2.T)
    if threshold==None:
        threshold=0.5
    if apply_sigmoid:
        y_pred=sigmoid(y_pred)
    y_pred=np.array([0 if prediction<threshold else 1 for prediction in y_pred])
    return y_pred
def compute_scores(x,w,y,threshold=None,apply_sigmoid=False):
    y_pred=make_predictions(x,w,threshold,apply_sigmoid) 
    TP=np.sum(np.logical_and(y_pred==1,y==1))
    FP=np.sum(np.logical_and(y_pred==1,y==0))
    FN=np.sum(np.logical_and(y_pred==0,y==1))
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    f1=2*precision*recall/(precision+recall)
    return precision,recall,f1
    
x_tra_scaled=(x_tra-np.mean(x_tra,axis=0)[None,:])/np.std(x_tra,axis=0)
x_val_scaled=(x_val-np.mean(x_tra,axis=0)[None,:])/np.std(x_tra,axis=0)

In [12]:
np.where(y_tra==1)[0].shape[0]/y_tra.shape[0]

0.08872373537693874

In [13]:
required_ones = int(0.3 * len(y_tra))
existing_ones_indices = np.where(y_tra == 1)[0]
zeros_indices = np.where(y_tra == 0)[0]
ones_indices = np.random.choice(existing_ones_indices, required_ones - len(existing_ones_indices), replace=True)
zeros_indices=np.random.choice(zeros_indices,len(y_tra)-required_ones,replace=False)
result_indices = np.concatenate([existing_ones_indices, ones_indices,zeros_indices])

In [14]:
x_tra_scaled1=x_tra_scaled[result_indices].copy()
y_tra1=y_tra[result_indices].copy()

## The NN model :

In [15]:
input_size = x_tra_scaled.shape[1]
hidden_size = 16
output_size = 1
model = NeuralNetwork(input_size, hidden_size, output_size)

# Train the model
model.train(x_tra_scaled1, y_tra1.reshape(-1,1), epochs=3000,batching=True,batch_size=64)

Epoch 0, Loss: 0.7175
learning rate: 0.3
Epoch 0, F1 Score: 0.5983
Epoch 100, Loss: 0.4978
learning rate: 0.23213428124999994
Epoch 200, Loss: 0.5192
learning rate: 0.1796210817715136
Epoch 300, Loss: 0.4593
learning rate: 0.11916429553746546
Epoch 400, Loss: 0.4839
learning rate: 0.11320608076059219
Epoch 500, Loss: 0.4601
learning rate: 0.08759670730163172
Epoch 600, Loss: 0.4836
learning rate: 0.07905602833972261
Epoch 700, Loss: 0.4578
learning rate: 0.06778066229776968
Epoch 800, Loss: 0.4652
learning rate: 0.04496707621464464
Epoch 900, Loss: 0.4537
learning rate: 0.03662596462105439
Epoch 1000, Loss: 0.4539
learning rate: 0.02983207709612768
Epoch 1000, F1 Score: 0.6153
Epoch 1100, Loss: 0.4665
learning rate: 0.028340473241321294
Epoch 1200, Loss: 0.4608
learning rate: 0.025577277100292468
Epoch 1300, Loss: 0.4723
learning rate: 0.02308349258301395
Epoch 1400, Loss: 0.4512
learning rate: 0.017861566531658827
Epoch 1500, Loss: 0.4517
learning rate: 0.013820939696085584
Epoch 1600

In [16]:
print(model.compute_f1_score(x_tra_scaled1,y_tra1.reshape(-1,1)))
print(model.compute_f1_score(x_tra_scaled,y_tra.reshape(-1,1)))
print(model.compute_f1_score(x_val_scaled,y_val.reshape(-1,1)))

0.6045310690283505
0.3970527904954134
0.39395580056110646


In [85]:
input_size = x_tra_scaled.shape[1]
hidden_size = 16
output_size = 1
model2 = NeuralNetwork(input_size, hidden_size, output_size)

# Train the model
model2.train(x_tra_scaled1, y_tra1.reshape(-1,1), epochs=2000,batching=True,batch_size=32)

Epoch 0, Loss: 0.8617
learning rate: 0.3
Epoch 0, F1 Score: 0.5893
Epoch 100, Loss: 1.1485
learning rate: 0.24435187499999994
Epoch 200, Loss: 0.8364
learning rate: 0.20950118882812493
Epoch 300, Loss: 0.4935
learning rate: 0.1389873690479259
Epoch 400, Loss: 0.5399
learning rate: 0.10754577672256257
Epoch 500, Loss: 0.4658
learning rate: 0.08321687193655013
Epoch 600, Loss: 0.4576
learning rate: 0.058113445337550265
Epoch 700, Loss: 0.4578
learning rate: 0.03855364696953094
Epoch 800, Loss: 0.5230
learning rate: 0.03140218641697651
Epoch 900, Loss: 0.4588
learning rate: 0.02308349258301395
Epoch 1000, Loss: 0.4698
learning rate: 0.021929317953863253
Epoch 1000, F1 Score: 0.6492
Epoch 1100, Loss: 0.4764
learning rate: 0.017861566531658827
Epoch 1200, Loss: 0.4566
learning rate: 0.014548357574826932
Epoch 1300, Loss: 0.4588
learning rate: 0.008275107131032489
Epoch 1400, Loss: 0.4529
learning rate: 0.006403120153763253
Epoch 1500, Loss: 0.4526
learning rate: 0.0032869910392218115
Epoch 

In [86]:
print(model2.compute_f1_score(x_tra_scaled1,y_tra1.reshape(-1,1)))
print(model2.compute_f1_score(x_tra_scaled,y_tra.reshape(-1,1)))
print(model2.compute_f1_score(x_val_scaled,y_val.reshape(-1,1)))

0.6013986013986014
0.3947952821963778
0.390928405553634


In [55]:
input_size = x_tra_scaled.shape[1]
hidden_size = 16
output_size = 1
model3 = NeuralNetwork(input_size, hidden_size, output_size)

# Train the model
model3.train(x_tra_scaled1, y_tra1.reshape(-1,1), epochs=3000,batching=True,batch_size=32)

Epoch 0, Loss: 0.7281
Epoch 0, F1 Score: 0.5904
Epoch 100, Loss: 1.0362
Epoch 200, Loss: 0.4790
Epoch 300, Loss: 0.6093
Epoch 400, Loss: 0.4745
Epoch 500, Loss: 0.4682
Epoch 600, Loss: 0.4897
Epoch 700, Loss: 0.4666
Epoch 800, Loss: 0.5118
Epoch 900, Loss: 0.4573
Epoch 1000, Loss: 0.4683
Epoch 1000, F1 Score: 0.6438
Epoch 1100, Loss: 0.4528
Epoch 1200, Loss: 0.4602
Epoch 1300, Loss: 0.4564
Epoch 1400, Loss: 0.4834
Epoch 1500, Loss: 0.4525
Epoch 1600, Loss: 0.4559
Epoch 1700, Loss: 0.4536
Epoch 1800, Loss: 0.4556
Epoch 1900, Loss: 0.4530
Epoch 2000, Loss: 0.4517
Epoch 2000, F1 Score: 0.6031
Epoch 2100, Loss: 0.4515
Epoch 2200, Loss: 0.4537
Epoch 2300, Loss: 0.4519
Epoch 2400, Loss: 0.4514
Epoch 2500, Loss: 0.4513
Epoch 2600, Loss: 0.4514
Epoch 2700, Loss: 0.4513
Epoch 2800, Loss: 0.4512
Epoch 2900, Loss: 0.4524


In [56]:
print(model3.compute_f1_score(x_tra_scaled1,y_tra1.reshape(-1,1)))
print(model3.compute_f1_score(x_tra_scaled,y_tra.reshape(-1,1)))
print(model3.compute_f1_score(x_val_scaled,y_val.reshape(-1,1)))

0.603412727151799
0.39604350826459855
0.39403515702152875


In [76]:
input_size = x_tra_scaled.shape[1]
hidden_size = 16
output_size = 1
model4 = NeuralNetwork(input_size, hidden_size, output_size)

# Train the model
model4.train(x_tra_scaled1, y_tra1.reshape(-1,1), epochs=1000,batching=True,batch_size=32)



Epoch 0, Loss: 0.7273
learning rate: 0.3
Epoch 0, F1 Score: 0.0000
Epoch 100, Loss: 0.5250
learning rate: 0.27075
Epoch 200, Loss: 0.5520
learning rate: 0.18907482291738273
Epoch 300, Loss: 0.7252
learning rate: 0.1389873690479259
Epoch 400, Loss: 0.5555
learning rate: 0.11916429553746546
Epoch 500, Loss: 0.4840
learning rate: 0.08759670730163172
Epoch 600, Loss: 0.4569
learning rate: 0.07905602833972261
Epoch 700, Loss: 0.5831
learning rate: 0.058113445337550265
Epoch 800, Loss: 0.4821
learning rate: 0.05244738441713911
Epoch 900, Loss: 0.4817
learning rate: 0.04496707621464464


In [77]:
the_model=model4
print(the_model.compute_f1_score(x_tra_scaled1,y_tra1.reshape(-1,1)))
print(the_model.compute_f1_score(x_tra_scaled,y_tra.reshape(-1,1)))
print(the_model.compute_f1_score(x_val_scaled,y_val.reshape(-1,1)))

0.5908772464209565
0.3977957995425244
0.39393781573876363


## Submission

In [17]:
x_train, x_test, y_train, train_ids, test_ids=load_csv_data("data")
x_test=pd.read_csv("data/x_test.csv")

In [18]:
x_test2=x_test.copy()
fill_mean=False
for col in intresting_features:
    x_test2[col].replace(dico_transfos[col],inplace=True)

        
    
if fill_mean:
    x_test_cleaned=x_test2[intresting_features].copy()
    for col in intresting_features:
        x_test_cleaned[col]=x_test_cleaned[col].fillna(x_tr_cleaned[col].mean())
else:
    x_test_cleaned=x_test2[intresting_features].fillna(0).copy()
x_test_scaled=(x_test_cleaned-np.mean(x_tra,axis=0)[None,:])/np.std(x_tra,axis=0)

In [19]:
the_model=model

In [20]:
predictions=the_model.predict(x_test_scaled)
predictions=predictions.reshape(x_test_scaled.shape[0])
predictions[np.where(predictions==0)[0]]=-1
predictions

array([-1., -1., -1., ..., -1., -1.,  1.])

In [21]:
np.where(predictions==1)[0].shape

(17333,)

In [22]:
create_csv_submission(test_ids,predictions,name="predictions_nn_model.csv")