# Import

In [1]:
#Library
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA

# Selfmade
from KNN_Selfmade import KNN_Selfmade
from Logistic_Regression_Selfmade import Logistic_Regression_Selfmade
from Gaussian_Naive_Bayes_Selfmade import Gaussian_Naive_Bayes_Selfmade
from CART_Selfmade import CART_Selfmade
from SVM_Selfmade import SVC_Selfmade
from ANN_Selfmade import ANN_Selfmade
from KMeans_Cluster_Selfmade import KMeans_Selfmade
from DBSCAN_Selfmade import DBSCAN_Selfmade
from PCA_Selfmade import PCA_Selfmade
from Ensemble_Bagging_Selfmade import Ensemble_Bagging_Selfmade

# Read Dataset & Splitting

In [2]:
subsampled_df = pd.read_csv("../dataset/subsampled_df.csv")

In [3]:
X = subsampled_df.drop(columns='deposit')
y = subsampled_df['deposit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelling & Testing

## KNN

### KNN Selfmade

In [5]:
model_knn_selfmade = KNN_Selfmade(neighbors=7, metric='euclidean')
model_knn_selfmade.fit(X_train, y_train)
y_pred_knn_selfmade = model_knn_selfmade.predict(X_test)

print(classification_report(y_test, y_pred_knn_selfmade))

              precision    recall  f1-score   support

           0       0.92      0.98      0.95      5757
           1       0.39      0.12      0.18       537

    accuracy                           0.91      6294
   macro avg       0.66      0.55      0.57      6294
weighted avg       0.88      0.91      0.89      6294



### KNN Library

In [6]:
model_knn_library = KNeighborsClassifier(n_neighbors=7, metric='manhattan')
model_knn_library.fit(X_train, y_train)
y_pred_knn_library = model_knn_library.predict(X_test)

print(classification_report(y_test, y_pred_knn_library))

              precision    recall  f1-score   support

           0       0.92      0.98      0.95      5757
           1       0.45      0.14      0.22       537

    accuracy                           0.91      6294
   macro avg       0.69      0.56      0.58      6294
weighted avg       0.88      0.91      0.89      6294



### Comparing KNN

In [7]:
print(np.mean(y_pred_knn_selfmade == y_pred_knn_library))

0.986495074674293


## Logistic Regression

### Regression Selfmade

In [8]:
model_log_reg_selfmade = Logistic_Regression_Selfmade(learning_rate=0.5, n_iterations=1000, regularization='l1', reg_lambda=0.01, loss_function='cross_entropy')
model_log_reg_selfmade.fit(X_train, y_train)
y_pred_log_reg_selfmade = model_log_reg_selfmade.predict(X_test)

print(classification_report(y_pred_log_reg_selfmade, y_test))

Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 1.4212618613019445
Iteration 200, Loss: 1.5332447147530426
Iteration 300, Loss: 7.696789596601651
Iteration 400, Loss: 1.4253936000381593
Iteration 500, Loss: 10.575828108074568
Iteration 600, Loss: 1.38666008661083
Iteration 700, Loss: 1.3783174837188952
Iteration 800, Loss: 1.4239339739118122
Iteration 900, Loss: 1.3783174837188952
              precision    recall  f1-score   support

           0       1.00      0.91      0.96      6289
           1       0.00      0.40      0.01         5

    accuracy                           0.91      6294
   macro avg       0.50      0.66      0.48      6294
weighted avg       1.00      0.91      0.95      6294



### Regression Library

In [9]:
model_log_reg_library = LogisticRegression(max_iter=100, C=1/0.1, penalty='l2')
model_log_reg_library.fit(X_train, y_train)
y_pred_log_reg_library = model_log_reg_library.predict(X_test)

print(classification_report(y_pred_log_reg_library, y_test))

              precision    recall  f1-score   support

           0       0.99      0.93      0.96      6133
           1       0.15      0.52      0.24       161

    accuracy                           0.92      6294
   macro avg       0.57      0.72      0.60      6294
weighted avg       0.97      0.92      0.94      6294



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Comparing Logistic Regression

In [10]:
print(np.mean(y_pred_log_reg_selfmade == y_pred_log_reg_library))

0.9736256752462663


## Gaussian Naive Bayes

### Naive Bayes Selfmade

In [11]:
model_naive_bayes_selfmade = Gaussian_Naive_Bayes_Selfmade()
model_naive_bayes_selfmade.fit(X_train, y_train)
y_pred_naive_bayes_selfmade = model_naive_bayes_selfmade.predict(X_test)

print(classification_report(y_pred_naive_bayes_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.89      0.96      0.92      5349
           1       0.60      0.34      0.43       945

    accuracy                           0.87      6294
   macro avg       0.74      0.65      0.68      6294
weighted avg       0.85      0.87      0.85      6294



### Naive Bayes Library

In [12]:
model_naive_bayes_library = GaussianNB()
model_naive_bayes_library.fit(X_train, y_train)
y_pred_naive_bayes_library = model_naive_bayes_library.predict(X_test)

print(classification_report(y_pred_naive_bayes_library, y_test))

              precision    recall  f1-score   support

           0       0.89      0.96      0.92      5345
           1       0.60      0.34      0.44       949

    accuracy                           0.87      6294
   macro avg       0.75      0.65      0.68      6294
weighted avg       0.85      0.87      0.85      6294



### Comparing Naive Bayes

In [13]:
print(np.mean(y_pred_naive_bayes_selfmade == y_pred_naive_bayes_library))

0.9971401334604385


## CART

### CART Selfmade

In [14]:
model_CART_selfmade = CART_Selfmade(max_depth=3)
model_CART_selfmade.fit(X_train, y_train)
y_pred_CART_selfmade = model_CART_selfmade.predict(X_test)

print(classification_report(y_pred_CART_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.99      0.93      0.96      6150
           1       0.20      0.76      0.32       144

    accuracy                           0.93      6294
   macro avg       0.60      0.84      0.64      6294
weighted avg       0.98      0.93      0.95      6294



### CART Library

In [15]:
model_CART_library = DecisionTreeClassifier(max_depth=3)
model_CART_library.fit(X_train, y_train)
y_pred_CART_library = model_CART_library.predict(X_test)

print(classification_report(y_pred_CART_library, y_test))

              precision    recall  f1-score   support

           0       0.99      0.93      0.96      6150
           1       0.20      0.76      0.32       144

    accuracy                           0.93      6294
   macro avg       0.60      0.84      0.64      6294
weighted avg       0.98      0.93      0.95      6294



### Comparing CART

In [16]:
print(np.mean(y_pred_CART_selfmade == y_pred_CART_library))

1.0


## SVC

### SVM Selfmade

In [17]:
model_SVC_selfmade = SVC_Selfmade(learning_rate=0.01, lambda_param=0.01, n_iters=100, kernel='linear')
model_SVC_selfmade.fit(X_train.values, y_train.values)
y_pred_SVC_selfmade = model_SVC_selfmade.predict(X_test)

print(classification_report(y_pred_SVC_selfmade, y_test))

              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00    6294.0
         0.0       0.00      0.00      0.00       0.0
         1.0       0.00      0.00      0.00       0.0

    accuracy                           0.00    6294.0
   macro avg       0.00      0.00      0.00    6294.0
weighted avg       0.00      0.00      0.00    6294.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### SVC Library

In [18]:
model_SVC_library = SVC(max_iter=100, kernel='linear')
model_SVC_library.fit(X_train.values, y_train.values)
y_pred_SVC_library = model_SVC_library.predict(X_test)

print(classification_report(y_pred_SVC_library, y_test))

              precision    recall  f1-score   support

           0       0.24      0.88      0.38      1575
           1       0.64      0.07      0.13      4719

    accuracy                           0.27      6294
   macro avg       0.44      0.47      0.25      6294
weighted avg       0.54      0.27      0.19      6294





### Comparing SVC

In [19]:
print(np.mean(y_pred_SVC_selfmade == y_pred_SVC_library))

0.0


## ANN

### ANN Selfmade

In [20]:
# model_ANN_selfmade = ANN_Selfmade(sizes=[51, 50, 1], activation='sigmoid')
# model_ANN_selfmade.train(X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy(), batch_size=20, optimizer='sgd', l_rate=0.0001, beta=.9)
# y_pred_ANN_selfmade = model_ANN_selfmade.predict()

# print(classification_report(y_pred_ANN_selfmade, y_test))

### ANN Library

In [21]:
model_ANN_library = MLPClassifier(hidden_layer_sizes=(50,), activation='logistic', solver='sgd',
                    learning_rate_init=0.0001, max_iter=10, batch_size=20, momentum=0.9)
model_ANN_library.fit(X_train, y_train)
y_pred_ANN_library = model_ANN_library.predict(X_test)

print(classification_report(y_pred_ANN_library, y_test))

              precision    recall  f1-score   support

           0       1.00      0.91      0.96      6294
           1       0.00      0.00      0.00         0

    accuracy                           0.91      6294
   macro avg       0.50      0.46      0.48      6294
weighted avg       1.00      0.91      0.96      6294



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## K-Means Clustering

### K-Means Clustering Selfmade

In [22]:
model_KMeans_selfmade = KMeans_Selfmade(n_clusters=2, max_iter=100)
model_KMeans_selfmade.fit(X_train)
y_pred_KMeans_selfmade = model_KMeans_selfmade.predict(X_test)

print(y_pred_KMeans_selfmade)

[1 0 1 ... 0 1 1]


### K-Means Clustering Library

In [23]:
model_KMeans_library = KMeans(n_clusters=2, max_iter=100)
model_KMeans_library.fit(X_train)
y_pred_KMeans_library = model_KMeans_library.predict(X_test)

print(y_pred_KMeans_library)

[0 1 0 ... 1 0 0]


### Comparing KMeans

In [24]:
print(classification_report(y_pred_KMeans_library, y_pred_KMeans_selfmade))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00    4967.0
           1       0.00      0.00      0.00    1327.0

    accuracy                           0.00    6294.0
   macro avg       0.00      0.00      0.00    6294.0
weighted avg       0.00      0.00      0.00    6294.0



## DBSCAN

### DBSCAN Selfmade 

In [25]:
model_DBSCAN_selfmade = DBSCAN_Selfmade(eps=200, min_samples=3, metric='euclidean', p=5)
model_DBSCAN_selfmade.fit(X_train)
y_pred_DBSCAN_selfmade = model_DBSCAN_selfmade.predict()

print(y_pred_DBSCAN_selfmade)

[ 0  1  0 ... 13 75  1]


### DBSCAN Library

In [26]:
model_DBSCAN_library = DBSCAN(eps=200, min_samples=3).fit(X_train)
y_pred_DBSCAN_library = model_DBSCAN_library.labels_

print(y_pred_DBSCAN_library)

[0 0 0 ... 0 0 0]


In [27]:
print(classification_report(y_pred_DBSCAN_library, y_pred_DBSCAN_selfmade))

              precision    recall  f1-score   support

          -1       0.67      1.00      0.80         8
           0       1.00      0.16      0.28     25164
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## PCA

### PCA Selfmade

In [28]:
kmeans = KMeans(n_clusters=3)

In [29]:
model_PCA_selfmade = PCA_Selfmade(n_components=2)
label_pca = model_PCA_selfmade.fit_transform(X_train)
print("Transformed Data:\n", label_pca) 
print("Explained Variance:", model_PCA_selfmade.explained_variance)

y_pred_PCA_selfmade = kmeans.fit_predict(label_pca)
print(y_pred_PCA_selfmade)
 

Transformed Data:
 [[ 1.71816803e-01 -2.92513393e+00]
 [-3.51052896e-01 -8.38781739e-01]
 [ 1.87254594e+00  1.88551110e+00]
 ...
 [-4.98992492e-03 -1.70346875e+00]
 [ 3.34779199e-01  1.06595711e-03]
 [-9.09152723e-01 -1.02063245e+00]]
Explained Variance: [0.07276258 0.0642876 ]
[0 0 1 ... 0 1 0]


### PCA Library

In [30]:
model_PCA_library = PCA(n_components=2)
label_pca_library = model_PCA_library.fit_transform(X_train)

print("Transformed Data (Library):\n", label_pca_library)
print("Explained Variance (Library):", model_PCA_library.explained_variance_ratio_)

y_pred_PCA_library = kmeans.fit_predict(label_pca_library)
print(y_pred_PCA_library)


Transformed Data (Library):
 [[-508.04679324 -121.36759196]
 [-866.01820312 -111.79415467]
 [-592.63864497  -17.9766803 ]
 ...
 [-228.06263749 -120.40551488]
 [ -97.15186143  330.71865728]
 [-760.9520981   -92.16969219]]
Explained Variance (Library): [0.96623848 0.02197796]
[2 2 2 ... 2 2 2]


In [31]:
print(classification_report(y_pred_PCA_library, y_pred_PCA_selfmade))

              precision    recall  f1-score   support

           0       0.24      0.41      0.31      6124
           1       0.10      0.33      0.15      2968
           2       0.60      0.17      0.27     16080

    accuracy                           0.25     25172
   macro avg       0.31      0.31      0.24     25172
weighted avg       0.45      0.25      0.26     25172



## Bagging

### Bagging Selfmade

In [8]:
model_bagging_selfmade = Ensemble_Bagging_Selfmade(base_estimator=DecisionTreeClassifier, n_estimators=5, max_samples=0.6)

model_bagging_selfmade.fit(X_train, y_train)
y_pred_bagging_selfmade = model_bagging_selfmade.predict(X_test)

print(classification_report(y_pred_bagging_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.91      0.74      0.82        27
           1       0.61      0.85      0.71        13

    accuracy                           0.78        40
   macro avg       0.76      0.79      0.76        40
weighted avg       0.81      0.78      0.78        40

