In [1]:
from algo import preprocess, distance, ml_supervised 
from sklearn.datasets import load_breast_cancer, load_iris # load dataset
from sklearn.model_selection import train_test_split # split into train and test
from sklearn.metrics import classification_report # classication report for model evaluation

# Breast Cancer dataset for binary classification (0 or 1) 
# Iris dataset for categorical classification (multiclass), eg. Specifically for Softmax Regression, 
# Since Softmax Regression handle multiclass problem

# Load Datasets :

In [2]:
breast_cancer = load_breast_cancer()
X_bc, y_bc = breast_cancer.data, breast_cancer.target

X_train_bc, X_test_bc, y_train_bc, y_test_bc = train_test_split(X_bc, y_bc, test_size=0.15, random_state=2022)

iris = load_iris()
X_iris, y_iris = iris.data, iris.target

X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.15, random_state=2022)

# Preprocess :

In [3]:
sc_bc = preprocess.Standardize()
sc_bc.calc(X_train_bc)

sc_train_bc = sc_bc.scale(X_train_bc)
sc_test_bc = sc_bc.scale(X_test_bc)


sc_iris = preprocess.Standardize()
sc_iris.calc(X_train_iris)

sc_train_iris = sc_iris.scale(X_train_iris)
sc_test_iris = sc_iris.scale(X_test_iris)

# Supervised Machine Learning :

## Binary Classification :

### Naive Bayes :

In [4]:
nb = ml_supervised.NaiveBayes()
nb.fit(sc_train_bc, y_train_bc)

nb_pred_bc = nb.predict(sc_test_bc)

In [5]:
print("Naive Bayes Evaluation : ")
print(classification_report(y_test_bc, nb_pred_bc))

Naive Bayes Evaluation : 
              precision    recall  f1-score   support

           0       0.89      0.94      0.92        35
           1       0.96      0.92      0.94        51

    accuracy                           0.93        86
   macro avg       0.93      0.93      0.93        86
weighted avg       0.93      0.93      0.93        86



### Logistic Regression : 

In [6]:
log_reg = ml_supervised.LogisticRegression()
log_reg.fit(sc_train_bc, y_train_bc)

log_reg_pred = log_reg.predict(sc_test_bc)

In [7]:
print("Logistic Regression Evaluation : ")
print(classification_report(y_test_bc, log_reg_pred))

Logistic Regression Evaluation : 
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        35
           1       1.00      0.96      0.98        51

    accuracy                           0.98        86
   macro avg       0.97      0.98      0.98        86
weighted avg       0.98      0.98      0.98        86



### K-Nearest Neighbor :

In [8]:
knn = ml_supervised.KNearestNeighbor()
knn.fit(sc_train_bc, y_train_bc)

knn_pred = knn.predict(sc_test_bc)

In [9]:
print("KNN Evaluation :")
print(classification_report(y_test_bc, knn_pred))

KNN Evaluation :
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        35
           1       0.96      1.00      0.98        51

    accuracy                           0.98        86
   macro avg       0.98      0.97      0.98        86
weighted avg       0.98      0.98      0.98        86



# Multiclass Classification :

### Softmax Regression :

In [10]:
sr = ml_supervised.SoftmaxRegression()
sr.fit(sc_train_iris, y_train_iris)

sr_pred = sr.predict(sc_test_iris)

In [11]:
print(classification_report(y_test_iris, sr_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      0.57      0.73         7
           2       0.75      1.00      0.86         9

    accuracy                           0.87        23
   macro avg       0.92      0.86      0.86        23
weighted avg       0.90      0.87      0.86        23

