<a href="https://colab.research.google.com/github/asrulharunismail/BA-Machine-Learning/blob/main/N_Coverage_GAB_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import warnings
import numpy as np
from scipy import sparse as sp
import random

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.validation import check_array, check_X_y, check_is_fitted
from sklearn.utils.sparsefuncs import csc_median_axis_0
from sklearn.utils.multiclass import check_classification_targets

class NearestCoverage(BaseEstimator, ClassifierMixin):

    def __init__(self, metric='euclidean', shrink_threshold=None):
        self.metric = metric
        self.shrink_threshold = shrink_threshold

    def fit(self, X, y):
        # If X is sparse and the metric is "manhattan", store it in a csc
        # format is easier to calculate the median.
        if self.metric == 'manhattan':
            X, y = check_X_y(X, y, ['csc'])
        else:
            X, y = check_X_y(X, y, ['csr', 'csc'])
        is_X_sparse = sp.issparse(X)
        if is_X_sparse and self.shrink_threshold:
            raise ValueError("threshold shrinking not supported"
                             " for sparse input")
        check_classification_targets(y)
        
        n_samples, n_features = X.shape
        le = LabelEncoder()
        y_ind = le.fit_transform(y)
        self.classes_ = classes = le.classes_
        n_classes = classes.size
        if n_classes < 2:
            raise ValueError('y has less than 2 classes')

        # Mask mapping each class to it's members.
        self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)
        self.radius_ = np.empty((n_classes,1), dtype=np.float64)
        # Number of clusters in each class.
        nk = np.zeros(n_classes)

        for cur_class in range(n_classes):
            center_mask = y_ind == cur_class
            nk[cur_class] = np.sum(center_mask)
            if is_X_sparse:
                center_mask = np.where(center_mask)[0]

            # XXX: Update other averaging methods according to the metrics.
            if self.metric == "manhattan":
                # NumPy does not calculate median of sparse matrices.
                if not is_X_sparse:
                    self.centroids_[cur_class] = np.median(X[center_mask], axis=0)
                else:
                    self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])
            else:
                if self.metric != 'euclidean':
                    warnings.warn("Averaging for metrics other than "
                                  "euclidean and manhattan not supported. "
                                  "The average is set to be the mean."
                                  )
                self.centroids_[cur_class] = X[center_mask].mean(axis=0)
                self.radius_[cur_class]=pairwise_distances(X[center_mask], [self.centroids_[cur_class]], metric=self.metric).mean(axis=0)
                
                
        if self.shrink_threshold:
            dataset_centroid_ = np.mean(X, axis=0)

            # m parameter for determining deviation
            m = np.sqrt((1. / nk) + (1. / n_samples))
            # Calculate deviation using the standard deviation of centroids.
            variance = (X - self.centroids_[y_ind]) ** 2
            variance = variance.sum(axis=0)
            s = np.sqrt(variance / (n_samples - n_classes))
            s += np.median(s)  # To deter outliers from affecting the results.
            mm = m.reshape(len(m), 1)  # Reshape to allow broadcasting.
            ms = mm * s
            deviation = ((self.centroids_ - dataset_centroid_) / ms)
            # Soft thresholding: if the deviation crosses 0 during shrinking,
            # it becomes zero.
            signs = np.sign(deviation)
            deviation = (np.abs(deviation) - self.shrink_threshold)
            deviation[deviation < 0] = 0
            deviation *= signs
            # Now adjust the centroids using the deviation
            msd = ms * deviation
            self.centroids_ = dataset_centroid_[np.newaxis, :] + msd
        return self

    def predict(self, X):
        radius_regulator=0.5
        check_is_fitted(self, 'centroids_')

        X = check_array(X, accept_sparse='csr')
        
        self.radius_= np.array(self.radius_)
        
        Z1=pairwise_distances(X, self.centroids_, metric=self.metric)
        Z1=np.array(Z1)

        n,m = Z1.shape
        Z2 = np.empty((n, m), dtype=np.float64)

        for i in range (n):
            Z2[i,:]= Z1[i,:] - np.transpose (self.radius_)*radius_regulator

        return self.classes_[Z2.argmin(axis=1)]

    
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
 
# Loading the dataset
dataset = load_breast_cancer()
 
# Separating data and target labels
X = pd.DataFrame(dataset.data)
y = pd.DataFrame(dataset.target)
 
ts = 0.3 # test size ##
R=range(10) # Random Seed

# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[0]) #random.randint(1,10))
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=0")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=0
Training Set Score : 90.20100502512562 %
Test Set Score : 92.98245614035088 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.96      0.84      0.90        63
           1       0.91      0.98      0.95       108

    accuracy                           0.93       171
   macro avg       0.94      0.91      0.92       171
weighted avg       0.93      0.93      0.93       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[1])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=1")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=1
Training Set Score : 91.20603015075378 %
Test Set Score : 90.64327485380117 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.96      0.78      0.86        63
           1       0.88      0.98      0.93       108

    accuracy                           0.91       171
   macro avg       0.92      0.88      0.89       171
weighted avg       0.91      0.91      0.90       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[2])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=2")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=2
Training Set Score : 90.95477386934674 %
Test Set Score : 90.64327485380117 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.93      0.82      0.87        67
           1       0.89      0.96      0.93       104

    accuracy                           0.91       171
   macro avg       0.91      0.89      0.90       171
weighted avg       0.91      0.91      0.91       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[3])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=3")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=3
Training Set Score : 91.20603015075378 %
Test Set Score : 90.64327485380117 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.93      0.81      0.86        62
           1       0.90      0.96      0.93       109

    accuracy                           0.91       171
   macro avg       0.91      0.88      0.90       171
weighted avg       0.91      0.91      0.90       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[4])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=4")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=4
Training Set Score : 91.70854271356784 %
Test Set Score : 88.88888888888889 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.87      0.76      0.81        54
           1       0.90      0.95      0.92       117

    accuracy                           0.89       171
   macro avg       0.88      0.85      0.87       171
weighted avg       0.89      0.89      0.89       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[5])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=5")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=5
Training Set Score : 88.94472361809045 %
Test Set Score : 95.32163742690058 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       1.00      0.87      0.93        61
           1       0.93      1.00      0.96       110

    accuracy                           0.95       171
   macro avg       0.97      0.93      0.95       171
weighted avg       0.96      0.95      0.95       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[6])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=6")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=6
Training Set Score : 90.7035175879397 %
Test Set Score : 91.81286549707602 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.95      0.85      0.90        74
           1       0.90      0.97      0.93        97

    accuracy                           0.92       171
   macro avg       0.92      0.91      0.92       171
weighted avg       0.92      0.92      0.92       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[7])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=7")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=7
Training Set Score : 90.7035175879397 %
Test Set Score : 92.39766081871345 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.94      0.82      0.87        55
           1       0.92      0.97      0.95       116

    accuracy                           0.92       171
   macro avg       0.93      0.90      0.91       171
weighted avg       0.92      0.92      0.92       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[8])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=8")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=8
Training Set Score : 90.20100502512562 %
Test Set Score : 92.39766081871345 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.98      0.82      0.89        66
           1       0.90      0.99      0.94       105

    accuracy                           0.92       171
   macro avg       0.94      0.90      0.92       171
weighted avg       0.93      0.92      0.92       171



In [None]:
# Splitting training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = ts, shuffle = True, random_state = R[9])
 
# Creating the Nearest Coverage Classifier
model = NearestCoverage()
 
# Training the classifier
model.fit(X_train, y_train.values.ravel())
 
# Printing Accuracy on Training and Test sets
print("Random Seed=9")
print(f"Training Set Score : {model.score(X_train, y_train) * 100} %")
print(f"Test Set Score : {model.score(X_test, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(X_test))}")

Random Seed=9
Training Set Score : 91.20603015075378 %
Test Set Score : 90.64327485380117 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.96      0.77      0.86        62
           1       0.88      0.98      0.93       109

    accuracy                           0.91       171
   macro avg       0.92      0.88      0.89       171
weighted avg       0.91      0.91      0.90       171

