In [1]:
import numpy as np
import pandas as pd
import math
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
def load_data():
    path = r"../input/datasets-for-regression/Bank_Customer_Churn_Prediction/Bank_Customer_Churn_Prediction.csv"
    
    df = pd.read_csv(path, header=None, delimiter=",")
    df = df.drop(0, axis = 1)
    features = df.iloc[1:, 0:10]
    labels = df.iloc[1:, -1]
    return (features, labels)

In [3]:
def one_hot_encode(features):
    encoded_country = pd.get_dummies(features[2])
    encoded_gender = pd.get_dummies(features[3])
    
    merged_columns = pd.concat([encoded_country, encoded_gender], axis = "columns")
    features = pd.concat([features, merged_columns], axis = "columns")
    features = features.drop([2, 3], axis = "columns")
    return features

In [4]:
def split_data(features, labels):
    total_samples = features.shape[0]
    feature_columns = features.columns.values.tolist()
    test_split_size = int(np.ceil((20 / 100) * total_samples))
    
    train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size = test_split_size)

    train_x = train_x.reset_index(drop = True)
    test_x = test_x.reset_index(drop = True)
    train_y = train_y.reset_index(drop = True)
    test_y = test_y.reset_index(drop = True)
    
    train_y = train_y.astype(float)
    test_y = test_y.astype(float)

    for column in feature_columns:
        train_x[column] = train_x[column].astype(float)
        test_x[column] = test_x[column].astype(float)

    return (train_x, test_x, train_y, test_y)

In [5]:
def min_max_normalization(df):
    normalized_df = (df - df.min()) / (df.max() - df.min())
    return normalized_df

In [6]:
def init_parameters(input_features, hidden_units, output_units):
    # Parameters initialization for 2 layer neural network
    
    num_weights_l1 = input_features * hidden_units
    num_weights_l2 = hidden_units * output_units
    num_biases_l1 = hidden_units
    num_biases_l2 = output_units
    
    num_network_parameters = [num_weights_l1, num_weights_l2, num_biases_l1, num_biases_l2]
    network_parameters = []
    
    for index in range(4):
        temp = []
        for _ in range(num_network_parameters[index]):
            temp.append(round(random.uniform(-0.5, 0.5), 1))
            
        network_parameters.append(temp)
    
    network_weights = [network_parameters[0], network_parameters[1]]
    network_biases = [network_parameters[2], network_parameters[3]]
    
    return (network_weights, network_biases)

In [7]:
def calculate_weighted_sum(weights, inputs):
    weighted_sum = np.dot(weights, inputs)
    return weighted_sum

def add_biases(weighted_sum, biases):
    return np.add(weighted_sum, biases_vector)

In [8]:
def sigmoid(z_vector):
    z_vector = 1 / (1 + np.exp(-z_vector))
    return z_vector

def relu(z):
    return np.maximum(z, 0)

In [9]:
def cross_entropy_cost():
    pass

def cost_function(y, y_hat, total_samples):
    cost = (-1 / total_samples) * np.sum(y * np.log(0.0001 + y_hat) + (1 - y) * np.log(0.0001 + 1 - y_hat))
    return cost

In [10]:
def model_fit():
    pass

In [11]:
features, labels = load_data()
print("Fetures Before One Hot Encoding")
print(features)
print("-" * 150, "\n")

print("Labels")
print(labels)
print("-" * 150, "\n")

features = one_hot_encode(features)
print("Fetures After One Hot Encoding")
print(features)
print("-" * 150, "\n")

train_f, test_f, train_l, test_l = split_data(features, labels)

print("Training Features")
print(train_f)
print("-" * 150, "\n")

print("Testing Features")
print(test_f)
print("-" * 150, "\n")

print("Training Labels")
print(train_l)
print("-" * 150, "\n")

print("Testing Labels")
print(test_l)
print("-" * 150, "\n")

Fetures Before One Hot Encoding
        1        2       3   4   5          6  7  8  9          10
1      619   France  Female  42   2          0  1  1  1  101348.88
2      608    Spain  Female  41   1   83807.86  1  0  1  112542.58
3      502   France  Female  42   8   159660.8  3  1  0  113931.57
4      699   France  Female  39   1          0  2  0  0   93826.63
5      850    Spain  Female  43   2  125510.82  1  1  1    79084.1
...    ...      ...     ...  ..  ..        ... .. .. ..        ...
9996   771   France    Male  39   5          0  2  1  0   96270.64
9997   516   France    Male  35  10   57369.61  1  1  1  101699.77
9998   709   France  Female  36   7          0  1  0  1   42085.58
9999   772  Germany    Male  42   3   75075.31  2  1  0   92888.52
10000  792   France  Female  28   4  130142.79  1  1  0   38190.78

[10000 rows x 10 columns]
-----------------------------------------------------------------------------------------------------------------------------------------

In [12]:
train_f = min_max_normalization(train_f)
test_f = min_max_normalization(test_f)

print("Training Features After Min-Max Normalization")
print(train_f)
print("-" * 150, "\n")

print("Testing Features After Min-Max Normalization")
print(test_f)
print("-" * 150, "\n")

Training Features After Min-Max Normalization
          1         4    5         6         7    8    9        10  France  \
0     0.600  0.121622  0.6  0.000000  0.333333  1.0  0.0  0.004955     1.0   
1     0.386  0.310811  0.6  0.571349  0.000000  1.0  1.0  0.960385     0.0   
2     0.628  0.243243  0.0  0.412527  0.000000  1.0  1.0  0.730971     1.0   
3     0.620  0.175676  0.9  0.498967  0.333333  1.0  1.0  0.699381     0.0   
4     0.802  0.405405  0.9  0.000000  0.000000  1.0  0.0  0.687550     1.0   
...     ...       ...  ...       ...       ...  ...  ...       ...     ...   
7995  0.714  0.351351  0.3  0.000000  0.333333  1.0  1.0  0.675392     0.0   
7996  0.498  0.094595  0.3  0.000000  0.333333  1.0  1.0  0.603950     0.0   
7997  0.736  0.229730  0.2  0.669295  0.000000  1.0  0.0  0.215086     1.0   
7998  0.454  0.243243  0.3  0.482636  0.333333  0.0  1.0  0.718928     1.0   
7999  0.512  0.256757  0.8  0.616635  0.333333  1.0  0.0  0.445481     0.0   

      Germany  Sp

In [13]:
# num_hidden_units = int(input("Enter Number of Units in Hidden Layer: "))
num_hidden_units = 3

network_weights, network_biases = init_parameters(13, num_hidden_units, 1)
print(f"{network_weights} \n")
print(network_biases)

[[0.0, 0.0, -0.3, -0.4, 0.2, -0.4, -0.4, 0.1, 0.2, -0.2, -0.2, -0.5, -0.3, -0.3, 0.3, 0.1, -0.1, 0.5, 0.2, 0.5, -0.3, -0.2, -0.2, 0.3, 0.3, 0.3, 0.4, -0.3, -0.2, -0.0, 0.1, 0.4, -0.5, 0.4, -0.4, -0.4, 0.3, -0.1, -0.2], [-0.3, -0.4, 0.4]] 

[[-0.2, -0.3, 0.1], [-0.2]]
