# Import

In [1]:
#Library
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA

# Selfmade
from KNN_Selfmade import KNN_Selfmade
from Logistic_Regression_Selfmade import Logistic_Regression_Selfmade
from Gaussian_Naive_Bayes_Selfmade import Gaussian_Naive_Bayes_Selfmade
from CART_Selfmade import CART_Selfmade
from SVM_Selfmade import SVC_Selfmade
from ANN_Selfmade import ANN_Selfmade
from KMeans_Cluster_Selfmade import KMeans_Selfmade
from DBSCAN_Selfmade import DBSCAN_Selfmade
from PCA_Selfmade import PCA_Selfmade
from Ensemble_Bagging_Selfmade import Ensemble_Bagging_Selfmade

# Read Dataset & Splitting

In [2]:
subsampled_df = pd.read_csv("../dataset/subsampled_df.csv")

In [3]:
X = subsampled_df.drop(columns='deposit')
y = subsampled_df['deposit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Modelling & Testing

## KNN

### KNN Selfmade

#### KNN Hold-Out

In [17]:
model_knn_selfmade = KNN_Selfmade(neighbors=7, metric='euclidean')
model_knn_selfmade.fit(X_train, y_train)
y_pred_knn_selfmade = model_knn_selfmade.predict(X_test)

print(classification_report(y_test, y_pred_knn_selfmade))

              precision    recall  f1-score   support

           0       0.68      0.68      0.68       100
           1       0.68      0.68      0.68       100

    accuracy                           0.68       200
   macro avg       0.68      0.68      0.68       200
weighted avg       0.68      0.68      0.68       200



#### KNN K-Fold

In [18]:
all_true_labels_knn_selfmade = []
all_pred_labels_knn_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_knn_selfmade = KNN_Selfmade(neighbors=7, metric='euclidean')
    model_knn_selfmade.fit(X_train, y_train)
    y_pred_knn_selfmade = model_knn_selfmade.predict(X_test)
    
    all_true_labels_knn_selfmade.extend(y_test)
    all_pred_labels_knn_selfmade.extend(y_pred_knn_selfmade)

print(classification_report(all_true_labels_knn_selfmade, all_pred_labels_knn_selfmade))


              precision    recall  f1-score   support

           0       0.68      0.68      0.68       500
           1       0.68      0.68      0.68       500

    accuracy                           0.68      1000
   macro avg       0.68      0.68      0.68      1000
weighted avg       0.68      0.68      0.68      1000



### KNN Library

In [19]:
model_knn_library = KNeighborsClassifier(n_neighbors=7, metric='euclidean')
model_knn_library.fit(X_train, y_train)
y_pred_knn_library = model_knn_library.predict(X_test)

print(classification_report(y_test, y_pred_knn_library))

              precision    recall  f1-score   support

           0       0.68      0.65      0.66        96
           1       0.69      0.72      0.70       104

    accuracy                           0.69       200
   macro avg       0.68      0.68      0.68       200
weighted avg       0.68      0.69      0.68       200



### Comparing KNN

In [20]:
print(np.mean(y_pred_knn_selfmade == y_pred_knn_library))

1.0


Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## Logistic Regression

### Regression Selfmade

#### Log Reg Hold-Out

In [23]:
model_log_reg_selfmade = Logistic_Regression_Selfmade(learning_rate=0.5, n_iterations=1000, regularization='l2', lambda_param=0.01, loss_function='logit')
model_log_reg_selfmade.fit(X_train, y_train)
y_pred_log_reg_selfmade = model_log_reg_selfmade.predict(X_test)

print(classification_report(y_pred_log_reg_selfmade, y_test))

Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 6.669666868388253
Iteration 200, Loss: 7.350000305793731
Iteration 300, Loss: 7.368750305793454
Iteration 400, Loss: 5.700000305828988
Iteration 500, Loss: 7.443750305899775
Iteration 600, Loss: 4.725000311649565
Iteration 700, Loss: 7.012500305799562
Iteration 800, Loss: 6.975000305800395
Iteration 900, Loss: 4.762500305857858
              precision    recall  f1-score   support

           0       0.97      0.53      0.68       177
           1       0.19      0.87      0.31        23

    accuracy                           0.56       200
   macro avg       0.58      0.70      0.50       200
weighted avg       0.88      0.57      0.64       200



#### Log Reg K-Fold

In [27]:
all_true_labels_log_reg_selfmade = []
all_pred_labels_log_reg_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_log_reg_selfmade = Logistic_Regression_Selfmade(learning_rate=0.5, n_iterations=1000, regularization='l2', lambda_param=0.01, loss_function='cross_entropy')
    model_log_reg_selfmade.fit(X_train, y_train)
    y_pred_log_reg_selfmade = model_log_reg_selfmade.predict(X_test)
    
    all_true_labels_log_reg_selfmade.extend(y_test)
    all_pred_labels_log_reg_selfmade.extend(y_pred_log_reg_selfmade)

print(classification_report(all_true_labels_log_reg_selfmade, all_pred_labels_log_reg_selfmade))


Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 5.4684733407746595
Iteration 200, Loss: 5.574270112202605
Iteration 300, Loss: 5.212711401612636
Iteration 400, Loss: 5.212500305834262
Iteration 500, Loss: 5.081250305837038
Iteration 600, Loss: 5.025000305838981
Iteration 700, Loss: 4.8562503058428685
Iteration 800, Loss: 4.987500305843978
Iteration 900, Loss: 6.525000305808168
Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 7.443750305792066
Iteration 200, Loss: 7.143750305796785
Iteration 300, Loss: 4.950000305842035
Iteration 400, Loss: 4.426419409335672
Iteration 500, Loss: 6.543750305895335
Iteration 600, Loss: 6.543750305895057
Iteration 700, Loss: 6.337500305894503
Iteration 800, Loss: 6.262500305893947
Iteration 900, Loss: 6.150000305982212
Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 7.6312503059014425
Iteration 200, Loss: 5.793750305882843
Iteration 300, Loss: 6.900000305800949
Iteration 400, Loss: 6.862500305801506
Iteration 500, Loss: 6.60

### Regression Library

In [None]:
model_log_reg_library = LogisticRegression(max_iter=100, C=1/0.1, penalty='l2')
model_log_reg_library.fit(X_train, y_train)
y_pred_log_reg_library = model_log_reg_library.predict(X_test)

print(classification_report(y_pred_log_reg_library, y_test))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86        22
           1       0.83      0.83      0.83        18

    accuracy                           0.85        40
   macro avg       0.85      0.85      0.85        40
weighted avg       0.85      0.85      0.85        40



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Comparing Logistic Regression

In [None]:
print(np.mean(y_pred_log_reg_selfmade == y_pred_log_reg_library))

0.725


Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## Gaussian Naive Bayes

### Naive Bayes Selfmade

#### Naive Bayes Hold-Out

In [None]:
model_naive_bayes_selfmade = Gaussian_Naive_Bayes_Selfmade()
model_naive_bayes_selfmade.fit(X_train, y_train)
y_pred_naive_bayes_selfmade = model_naive_bayes_selfmade.predict(X_test)

print(classification_report(y_pred_naive_bayes_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.95      0.68      0.79        31
           1       0.44      0.89      0.59         9

    accuracy                           0.72        40
   macro avg       0.70      0.78      0.69        40
weighted avg       0.84      0.72      0.75        40



  class_conditional = np.sum(np.log(self._pdf(idx, x)))


#### Naive Bayes K-Fold

In [28]:
all_true_labels_naive_bayes_selfmade = []
all_pred_labels_naive_bayes_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_naive_bayes_selfmade = Gaussian_Naive_Bayes_Selfmade()
    model_naive_bayes_selfmade.fit(X_train, y_train)
    y_pred_naive_bayes_selfmade = model_naive_bayes_selfmade.predict(X_test)
    
    all_true_labels_naive_bayes_selfmade.extend(y_test)
    all_pred_labels_naive_bayes_selfmade.extend(y_pred_naive_bayes_selfmade)

print(classification_report(all_true_labels_naive_bayes_selfmade, all_pred_labels_naive_bayes_selfmade))


              precision    recall  f1-score   support

           0       0.72      0.83      0.77       500
           1       0.80      0.67      0.73       500

    accuracy                           0.75      1000
   macro avg       0.76      0.75      0.75      1000
weighted avg       0.76      0.75      0.75      1000



  class_conditional = np.sum(np.log(self._pdf(idx, x)))


### Naive Bayes Library

In [None]:
model_naive_bayes_library = GaussianNB()
model_naive_bayes_library.fit(X_train, y_train)
y_pred_naive_bayes_library = model_naive_bayes_library.predict(X_test)

print(classification_report(y_pred_naive_bayes_library, y_test))

              precision    recall  f1-score   support

           0       0.91      0.77      0.83        26
           1       0.67      0.86      0.75        14

    accuracy                           0.80        40
   macro avg       0.79      0.81      0.79        40
weighted avg       0.82      0.80      0.80        40



### Comparing Naive Bayes

In [None]:
print(np.mean(y_pred_naive_bayes_selfmade == y_pred_naive_bayes_library))

0.875


Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## CART

### CART Selfmade

#### CART Hold Out

In [None]:
model_CART_selfmade = CART_Selfmade(max_depth=3)
model_CART_selfmade.fit(X_train, y_train)
y_pred_CART_selfmade = model_CART_selfmade.predict(X_test)

print(classification_report(y_pred_CART_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.50      0.85      0.63        13
           1       0.89      0.59      0.71        27

    accuracy                           0.68        40
   macro avg       0.69      0.72      0.67        40
weighted avg       0.76      0.68      0.68        40



#### CART K-Fold

In [29]:
all_true_labels_CART_selfmade = []
all_pred_labels_CART_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_CART_selfmade = CART_Selfmade(max_depth=3)
    model_CART_selfmade.fit(X_train, y_train)
    y_pred_CART_selfmade = model_CART_selfmade.predict(X_test)
    
    all_true_labels_CART_selfmade.extend(y_test)
    all_pred_labels_CART_selfmade.extend(y_pred_CART_selfmade)

print(classification_report(all_true_labels_CART_selfmade, all_pred_labels_CART_selfmade))


              precision    recall  f1-score   support

           0       0.77      0.70      0.73       500
           1       0.73      0.79      0.75       500

    accuracy                           0.74      1000
   macro avg       0.75      0.74      0.74      1000
weighted avg       0.75      0.74      0.74      1000



### CART Library

In [None]:
model_CART_library = DecisionTreeClassifier(max_depth=3)
model_CART_library.fit(X_train, y_train)
y_pred_CART_library = model_CART_library.predict(X_test)

print(classification_report(y_pred_CART_library, y_test))

              precision    recall  f1-score   support

           0       0.50      0.79      0.61        14
           1       0.83      0.58      0.68        26

    accuracy                           0.65        40
   macro avg       0.67      0.68      0.65        40
weighted avg       0.72      0.65      0.66        40



### Comparing CART

In [None]:
print(np.mean(y_pred_CART_selfmade == y_pred_CART_library))

0.975


Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## SVC

### SVC Selfmade

#### SVC Hold Out

In [6]:
model_SVC_selfmade = SVC_Selfmade(learning_rate=0.1, lambda_param=0.01, n_iterations=100, kernel='polynomial')
model_SVC_selfmade.fit(X_train, y_train)
y_pred_SVC_selfmade = model_SVC_selfmade.predict(X_test)

print(classification_report(y_pred_SVC_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.84      0.53      0.65       159
           1       0.25      0.61      0.35        41

    accuracy                           0.55       200
   macro avg       0.54      0.57      0.50       200
weighted avg       0.72      0.55      0.59       200



#### SVC K-Fold

In [7]:
all_true_labels_SVC_selfmade = []
all_pred_labels_SVC_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_SVC_selfmade = SVC_Selfmade(learning_rate=0.1, lambda_param=0.01, n_iterations=100, kernel='polynomial')
    model_SVC_selfmade.fit(X_train, y_train)
    y_pred_SVC_selfmade = model_SVC_selfmade.predict(X_test)
    
    all_true_labels_SVC_selfmade.extend(y_test)
    all_pred_labels_SVC_selfmade.extend(y_pred_SVC_selfmade)

print(classification_report(all_true_labels_SVC_selfmade, all_pred_labels_SVC_selfmade))


              precision    recall  f1-score   support

           0       0.51      0.84      0.64       500
           1       0.55      0.19      0.29       500

    accuracy                           0.52      1000
   macro avg       0.53      0.52      0.46      1000
weighted avg       0.53      0.52      0.46      1000



### SVC Library

In [None]:
model_SVC_library = SVC(max_iter=100, kernel='poly')
model_SVC_library.fit(X_train.values, y_train.values)
y_pred_SVC_library = model_SVC_library.predict(X_test)

print(classification_report(y_pred_SVC_library, y_test))

              precision    recall  f1-score   support

           0       0.03      0.43      0.06         7
           1       0.96      0.50      0.66       193

    accuracy                           0.49       200
   macro avg       0.49      0.46      0.36       200
weighted avg       0.93      0.49      0.63       200





### Comparing SVC

In [None]:
print(np.mean(y_pred_SVC_selfmade == y_pred_SVC_library))

0.24


Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## ANN

### ANN Selfmade

#### ANN Hold Out

In [5]:
model_ANN_selfmade = ANN_Selfmade(sizes=[51, 50, 1], activation='sigmoid')
model_ANN_selfmade.train(X_train, y_train, X_test, y_test, batch_size=20, optimizer='sgd', l_rate=0.0001, beta=.9)
y_pred_ANN_selfmade = model_ANN_selfmade.predict(X_test)

print(classification_report(y_pred_ANN_selfmade, y_test))

Epoch 1: 0.02s, train acc=0.50, train loss=18.74, test acc=0.50, test loss=0.75
Epoch 2: 0.03s, train acc=0.50, train loss=18.69, test acc=0.50, test loss=0.75
Epoch 3: 0.05s, train acc=0.50, train loss=18.69, test acc=0.50, test loss=0.75
Epoch 4: 0.06s, train acc=0.50, train loss=18.65, test acc=0.50, test loss=0.75
Epoch 5: 0.08s, train acc=0.50, train loss=18.69, test acc=0.50, test loss=0.75
Epoch 6: 0.09s, train acc=0.50, train loss=18.69, test acc=0.50, test loss=0.74
Epoch 7: 0.10s, train acc=0.50, train loss=18.61, test acc=0.50, test loss=0.74
Epoch 8: 0.12s, train acc=0.50, train loss=18.61, test acc=0.50, test loss=0.74
Epoch 9: 0.13s, train acc=0.50, train loss=18.61, test acc=0.50, test loss=0.74
Epoch 10: 0.15s, train acc=0.50, train loss=18.65, test acc=0.50, test loss=0.74
              precision    recall  f1-score   support

           0       0.22      0.31      0.26        72
           1       0.50      0.39      0.44       128

    accuracy                       

#### ANN K-Fold

In [8]:
all_true_labels_ANN_selfmade = []
all_pred_labels_ANN_selfmade = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_ANN_selfmade = ANN_Selfmade(sizes=[51, 50, 1], activation='sigmoid')
    model_ANN_selfmade.train(X_train, y_train, X_test, y_test, batch_size=20, optimizer='sgd', l_rate=0.0001, beta=.9)
    y_pred_ANN_selfmade = model_ANN_selfmade.predict(X_test)
    
    all_true_labels_ANN_selfmade.extend(y_test)
    all_pred_labels_ANN_selfmade.extend(y_pred_ANN_selfmade)

print(classification_report(all_true_labels_ANN_selfmade, all_pred_labels_ANN_selfmade))


Epoch 1: 0.01s, train acc=0.50, train loss=16.97, test acc=0.50, test loss=0.72
Epoch 2: 0.03s, train acc=0.50, train loss=16.75, test acc=0.50, test loss=0.72
Epoch 3: 0.04s, train acc=0.50, train loss=16.75, test acc=0.50, test loss=0.71
Epoch 4: 0.05s, train acc=0.50, train loss=16.88, test acc=0.50, test loss=0.71
Epoch 5: 0.06s, train acc=0.50, train loss=16.88, test acc=0.50, test loss=0.71
Epoch 6: 0.08s, train acc=0.50, train loss=16.84, test acc=0.50, test loss=0.70
Epoch 7: 0.09s, train acc=0.50, train loss=16.75, test acc=0.50, test loss=0.70
Epoch 8: 0.10s, train acc=0.50, train loss=16.79, test acc=0.50, test loss=0.70
Epoch 9: 0.12s, train acc=0.50, train loss=16.84, test acc=0.50, test loss=0.70
Epoch 10: 0.13s, train acc=0.50, train loss=16.88, test acc=0.50, test loss=0.70
Epoch 1: 0.01s, train acc=0.50, train loss=17.96, test acc=0.50, test loss=0.70
Epoch 2: 0.03s, train acc=0.50, train loss=17.96, test acc=0.50, test loss=0.70
Epoch 3: 0.04s, train acc=0.50, train l

### ANN Library

In [None]:
model_ANN_library = MLPClassifier(hidden_layer_sizes=(50,), activation='logistic', solver='sgd',
                    learning_rate_init=0.0001, max_iter=10, batch_size=20, momentum=0.9)
model_ANN_library.fit(X_train, y_train)
y_pred_ANN_library = model_ANN_library.predict(X_test)

print(classification_report(y_pred_ANN_library, y_test))



              precision    recall  f1-score   support

           0       0.64      0.64      0.64        22
           1       0.56      0.56      0.56        18

    accuracy                           0.60        40
   macro avg       0.60      0.60      0.60        40
weighted avg       0.60      0.60      0.60        40



Metrik yang digunakan: 
- precision: untuk mengetes kelas positif;
- recall: untuk mengetes semua sampel yang benar-benar positif;
- F1 score: gabungan precision dan recall. <br>
Namun jika hanya boleh memilih 1 metrik saja, saya akan menggunakan F1 score karena F1 score sudah mencakup precision dan recall dan prediksi deposit ini tidak akan berakibat fatal sampai menyebabkan korban jiwa. Selain itu, F1 score juga dapat menjaga keseimbangan antara false positive dan false negative.

## K-Means Clustering

### K-Means Clustering Selfmade

In [None]:
model_KMeans_selfmade = KMeans_Selfmade(n_clusters=2, max_iter=100)
model_KMeans_selfmade.fit(X_train)
y_pred_KMeans_selfmade = model_KMeans_selfmade.predict(X_test)

print(y_pred_KMeans_selfmade)

[1 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 0 1]


### K-Means Clustering Library

In [None]:
model_KMeans_library = KMeans(n_clusters=2, max_iter=100)
model_KMeans_library.fit(X_train)
y_pred_KMeans_library = model_KMeans_library.predict(X_test)

print(y_pred_KMeans_library)

[0 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0]


### Comparing KMeans

In [None]:
print(classification_report(y_pred_KMeans_library, y_pred_KMeans_selfmade))

              precision    recall  f1-score   support

           0       0.29      0.06      0.10        35
           1       0.00      0.00      0.00         5

    accuracy                           0.05        40
   macro avg       0.14      0.03      0.05        40
weighted avg       0.25      0.05      0.08        40



## DBSCAN

### DBSCAN Selfmade 

In [None]:
model_DBSCAN_selfmade = DBSCAN_Selfmade(eps=200, min_samples=3, metric='euclidean', p=5)
model_DBSCAN_selfmade.fit(X_train)
y_pred_DBSCAN_selfmade = model_DBSCAN_selfmade.predict()

print(y_pred_DBSCAN_selfmade)

[ 0  1  2  3  4  5  6  4  4 -1  7  6  1  1  4  7  8  7 -1  1  1  1  0  6
  9  1  1  1  6 -1  1 10  9 11 -1 -1  1 21  1  7  1  2 11 10 -1  5  3  0
  7  4  8 12  1  1 13 20 -1  7  1  3  0  5 14  2 14  4  2  7 13  7  9  8
 -1  2  1  7 12  1  7 -1 15  6 21  1  4 -1  2 -1  5 16 17 18 -1  4  6 19
  8  9 17 17  7  8  0  1 20  4 13 15 -1 -1  7  1  1  9  1  1 -1 21 -1  2
  0  7  1 -1  1 13  5  8 15 16  0 11  4  7  8 22  3 23  2  1  5  5  1 11
  7 14  7  1  4 15 -1  2  0 -1 17  7  4  7 -1  0]


### DBSCAN Library

In [None]:
model_DBSCAN_library = DBSCAN(eps=200, min_samples=3).fit(X_train)
y_pred_DBSCAN_library = model_DBSCAN_library.labels_

print(y_pred_DBSCAN_library)

[ 0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0  0  0 -1  0  0  0  0  0
  0  0  0  0  0  2  0  0  0  0 -1 -1  0  3  0  0  0  0  0  0 -1  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0  0
  1  0  0  0  0  0  0 -1  0  0  3  0  0 -1  0 -1  0  0  2  0  0  0  0  0
  0  0  2  2  0  0  0  0  0  0  0  0 -1 -1  0  0  0  0  0  0 -1  3 -1  0
  0  0  0 -1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  1  0  0  0  0 -1  0  0 -1  2  0  0  0 -1  0]


In [None]:
print(classification_report(y_pred_DBSCAN_library, y_pred_DBSCAN_selfmade))

              precision    recall  f1-score   support

          -1       0.80      1.00      0.89        16
           0       1.00      0.07      0.13       132
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         0
          16       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## PCA

### PCA Selfmade

In [None]:
kmeans = KMeans(n_clusters=3)

In [None]:
model_PCA_selfmade = PCA_Selfmade(n_components=2)
label_pca = model_PCA_selfmade.fit_transform(X_train)
print("Transformed Data:\n", label_pca) 
print("Explained Variance:", model_PCA_selfmade.explained_variance)

y_pred_PCA_selfmade = kmeans.fit_predict(label_pca)
print(y_pred_PCA_selfmade)
 

Transformed Data:
 [[-2.97320017e+00 -2.81347529e-01]
 [ 8.67671742e-01  1.56627551e+00]
 [-4.46660518e-01  1.81989737e+00]
 [ 1.78344417e+00  1.53863085e+00]
 [ 2.17997923e+00  2.40002453e+00]
 [-2.92151341e+00  1.46653275e+00]
 [-3.02988824e-01  7.57628035e-01]
 [ 1.50100516e+00  2.24523955e+00]
 [ 3.21840006e+00  1.79756988e+00]
 [-1.54942956e+00 -2.25314145e+00]
 [ 2.27899982e+00  2.57726159e+00]
 [ 1.25155021e+00  3.94493471e-01]
 [-8.71547634e-01  1.76393695e-01]
 [-7.27236255e-01  1.02551361e+00]
 [-1.19623477e+00  2.23679127e-01]
 [ 5.38284119e-01  2.96737926e+00]
 [ 1.83089244e-02  2.41307974e+00]
 [ 1.76732763e+00 -1.27513093e+00]
 [ 3.28725137e+00  1.19769700e-01]
 [-1.37544608e+00  1.36272537e+00]
 [ 1.38122975e+00  2.21575164e+00]
 [ 6.65684362e-01  1.85613640e+00]
 [-2.18009973e+00 -1.34418100e+00]
 [-5.66611334e-01  2.18986896e+00]
 [-2.94133630e+00 -7.73986164e-02]
 [ 2.12978152e+00  1.64595069e+00]
 [ 4.16526699e+00 -8.39723087e-01]
 [ 2.58950376e+00 -1.82392320e+00]
 

### PCA Library

In [None]:
model_PCA_library = PCA(n_components=2)
label_pca_library = model_PCA_library.fit_transform(X_train)

print("Transformed Data (Library):\n", label_pca_library)
print("Explained Variance (Library):", model_PCA_library.explained_variance_ratio_)

y_pred_PCA_library = kmeans.fit_predict(label_pca_library)
print(y_pred_PCA_library)


Transformed Data (Library):
 [[-6.37156472e+02  1.87967502e+02]
 [-9.36766763e+02 -1.15286540e+02]
 [-8.73073946e+02  1.27047600e+02]
 [ 2.06191396e+02 -1.69752601e+02]
 [-2.69006748e+02 -9.06967105e+01]
 [ 7.89780116e+01 -4.99300410e+00]
 [ 4.09624055e+02 -1.10677396e+02]
 [-3.23937356e+02 -2.12559521e+02]
 [-3.54204899e+02 -5.52805882e+01]
 [-2.06775951e+01  3.06145386e+02]
 [-6.71626451e+02 -1.97925606e+02]
 [ 4.10924074e+02  2.87538759e+01]
 [-9.16394919e+02 -5.52711465e+01]
 [-9.02645819e+02  6.11521805e+01]
 [-2.35993749e+02 -1.30266049e+02]
 [-6.02605061e+02 -6.97209940e+01]
 [ 8.47845755e+02 -6.44099181e+01]
 [-6.50705347e+02 -2.94498646e+01]
 [ 1.84307558e+03 -1.89899653e+02]
 [-7.78542123e+02 -1.48217640e+02]
 [-8.97011712e+02  5.50590574e-01]
 [-8.75939310e+02 -2.07531332e+02]
 [-5.92737235e+02  6.24029577e+01]
 [ 5.69312943e+02 -8.26820243e+01]
 [-4.85187518e+02  3.16806434e+02]
 [-8.66490237e+02 -1.87714236e+02]
 [-8.76828196e+02  5.13197543e+01]
 [-9.18814679e+02 -1.16408

In [None]:
print(classification_report(y_pred_PCA_library, y_pred_PCA_selfmade))

              precision    recall  f1-score   support

           0       0.27      0.42      0.33        40
           1       0.65      0.35      0.45        98
           2       0.18      0.36      0.24        22

    accuracy                           0.37       160
   macro avg       0.37      0.38      0.34       160
weighted avg       0.49      0.37      0.39       160



## Bagging

### Bagging Selfmade

In [None]:
model_bagging_selfmade = Ensemble_Bagging_Selfmade(base_estimator=DecisionTreeClassifier, n_estimators=5, max_samples=0.6)

model_bagging_selfmade.fit(X_train, y_train)
y_pred_bagging_selfmade = model_bagging_selfmade.predict(X_test)

print(classification_report(y_pred_bagging_selfmade, y_test))

              precision    recall  f1-score   support

           0       0.86      0.83      0.84        23
           1       0.78      0.82      0.80        17

    accuracy                           0.82        40
   macro avg       0.82      0.82      0.82        40
weighted avg       0.83      0.82      0.83        40

