In [1]:
from nn import Linear, Relu, Sigmoid
from loss import CrossEntropyLoss
from network import NeuralNet
import numpy as np
import logging
logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s",
                    level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
# https://www.kaggle.com/datasets/hassaneskikri/online-retail-customer-churn-dataset
import pandas as pd
df = pd.read_csv("data/online_retail_customer_churn.csv")
df.head()

Unnamed: 0,Customer_ID,Age,Gender,Annual_Income,Total_Spend,Years_as_Customer,Num_of_Purchases,Average_Transaction_Amount,Num_of_Returns,Num_of_Support_Contacts,Satisfaction_Score,Last_Purchase_Days_Ago,Email_Opt_In,Promotion_Response,Target_Churn
0,1,62,Other,45.15,5892.58,5,22,453.8,2,0,3,129,True,Responded,True
1,2,65,Male,79.51,9025.47,13,77,22.9,2,2,3,227,False,Responded,False
2,3,18,Male,29.19,618.83,13,71,50.53,5,2,2,283,False,Responded,True
3,4,21,Other,79.63,9110.3,3,33,411.83,5,3,5,226,True,Ignored,True
4,5,21,Other,77.66,5390.88,15,43,101.19,3,0,5,242,False,Unsubscribed,False


In [3]:
# X_columns = ["Age", "Annual_Income", "Total_Spend", "Years_as_Customer",
#         "Num_of_Purchases", "Average_Transaction_Amount", "Num_of_Returns"] 
X_columns = ["Age", "Annual_Income"]
y_column = "Target_Churn"
df_x = df[X_columns]
df_y = df[y_column]
print("data size:", len(df_y))
df_x.head()

data size: 1000


Unnamed: 0,Age,Annual_Income
0,62,45.15
1,65,79.51
2,18,29.19
3,21,79.63
4,21,77.66


In [4]:
df_x_norm = (df_x-df_x.mean()) / df_x.std()

In [5]:
N_features = len(X_columns)
N_features

2

In [6]:
from sklearn.model_selection import train_test_split
X, y = df_x_norm.values, df_y.values.astype("float32")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=388765865)

In [7]:
learning_rate = 0.001
net = NeuralNet()
net.add_layer(Linear(N_features, 1, learning_rate))
# net.add_layer(Relu())
# net.add_layer(Linear(3, 1, learning_rate))
net.add_layer(Sigmoid())

In [8]:
net.loss = CrossEntropyLoss()

In [9]:
net.train(X_train, y_train, n_epoch=20)

2024-02-29 11:53:03,491 INFO: Running epoch:    0
2024-02-29 11:53:03,515 INFO: Epoch: 0, Loss:        0.862

2024-02-29 11:53:03,544 INFO: Running epoch:    2
2024-02-29 11:53:03,575 INFO: Epoch: 2, Loss:        0.782

2024-02-29 11:53:03,599 INFO: Running epoch:    4
2024-02-29 11:53:03,621 INFO: Epoch: 4, Loss:        0.736

2024-02-29 11:53:03,643 INFO: Running epoch:    6
2024-02-29 11:53:03,663 INFO: Epoch: 6, Loss:        0.712

2024-02-29 11:53:03,685 INFO: Running epoch:    8
2024-02-29 11:53:03,707 INFO: Epoch: 8, Loss:        0.701

2024-02-29 11:53:03,728 INFO: Running epoch:   10
2024-02-29 11:53:03,747 INFO: Epoch: 10, Loss:        0.695

2024-02-29 11:53:03,769 INFO: Running epoch:   12
2024-02-29 11:53:03,792 INFO: Epoch: 12, Loss:        0.693

2024-02-29 11:53:03,811 INFO: Running epoch:   14
2024-02-29 11:53:03,834 INFO: Epoch: 14, Loss:        0.692

2024-02-29 11:53:03,858 INFO: Running epoch:   16
2024-02-29 11:53:03,881 INFO: Epoch: 16, Loss:        0.691

2024-0

In [10]:
predicted = net.test(X_test, y_test)
predicted

2024-02-29 11:53:03,952 INFO: Number of test data: 200
2024-02-29 11:53:03,956 INFO: Error of test data:        0.699


array([[0.53939854],
       [0.52759204],
       [0.50133213],
       [0.50409599],
       [0.54305883],
       [0.54844238],
       [0.50495554],
       [0.50056651],
       [0.49962963],
       [0.52294999],
       [0.54748928],
       [0.50538822],
       [0.55833884],
       [0.50901319],
       [0.51666512],
       [0.52123392],
       [0.53467398],
       [0.54821513],
       [0.50528989],
       [0.54555255],
       [0.51211329],
       [0.52717701],
       [0.51882717],
       [0.53651708],
       [0.50744203],
       [0.5039528 ],
       [0.52146854],
       [0.52950969],
       [0.50889646],
       [0.51844149],
       [0.51357461],
       [0.53742372],
       [0.52821088],
       [0.54948626],
       [0.5468202 ],
       [0.50993395],
       [0.51437862],
       [0.51602843],
       [0.51673697],
       [0.52173483],
       [0.5556308 ],
       [0.52936848],
       [0.50274755],
       [0.52936534],
       [0.52128533],
       [0.53110248],
       [0.55233875],
       [0.552

In [11]:
predicted.shape

(200, 1)

In [12]:
predicted = predicted.reshape((len(predicted)))
predicted.shape

(200,)

In [13]:
TP = np.sum((predicted > 0.5 ) & y_test.astype(bool))
TP

97

In [14]:
TN = np.sum((predicted < 0.5 ) & np.logical_not(y_test))
TN

2

In [15]:
FP = np.sum((predicted > 0.5 ) & np.logical_not(y_test))
FP

99

In [16]:
FN = np.sum((predicted < 0.5 ) & y_test.astype(bool))
FN



2

In [17]:
precision = TP / (TP + FP)
recall = TP / (TP + FN)
print("precision: %.1f%%, recall: %.1f%%" % (100* precision, 100* recall))

precision: 49.5%, recall: 98.0%
