In [1]:
import typing as t
import numpy as np
import itertools

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import LeaveOneOut
from scipy.sparse.csgraph import minimum_spanning_tree
from sklearn.metrics import accuracy_score

from scipy.spatial import distance

In [23]:
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer

data = load_iris()
# data = load_breast_cancer()
y = data.target
X = data.data

In [24]:
def precompute_fx(X: np.ndarray, y: np.ndarray) -> t.Dict[str, t.Any]:

    prepcomp_vals = {}
    
    classes, class_freqs = np.unique(y, return_counts=True)
    cls_index = [np.equal(y, i) for i in range(classes.shape[0])]

    #cls_n_ex = np.array([np.sum(aux) for aux in cls_index])
    cls_n_ex = list(class_freqs)
    ovo_comb = list(itertools.combinations(range(classes.shape[0]), 2))
    prepcomp_vals["ovo_comb"] = ovo_comb
    prepcomp_vals["cls_index"] = cls_index
    prepcomp_vals["cls_n_ex"] = cls_n_ex
    return prepcomp_vals

In [25]:
precomp_fx = precompute_fx(X, y)
cls_index = precomp_fx['cls_index'] #true-false
cls_n_ex = precomp_fx['cls_n_ex']   # number of elements in each class
ovo_comb = precomp_fx['ovo_comb']   # pairs

## Feature-based Measures

### 1- Maximum Fisher’s Discriminant Ratio (F1)

In [14]:
def numerator (X: np.ndarray, cls_index, cls_n_ex, i) -> float:
    return np.sum([cls_n_ex[j]*np.power((np.mean(X[cls_index[j], i])-np.mean(X[:, i], axis=0)),2) for j in range (len(cls_index))])
# according to aquation(3)

In [15]:
def denominator (X: np.ndarray, cls_index, cls_n_ex, i) -> float:
    return np.sum([np.sum(np.power(X[cls_index[j], i]-np.mean(X[cls_index[j], i], axis=0), 2)) for j in range(0, len(cls_n_ex))])
# according to aquation(3)

In [16]:
def compute_rfi (X: np.ndarray, cls_index, cls_n_ex) -> float:
    return [numerator (X, cls_index, cls_n_ex, i)/denominator(X, cls_index, cls_n_ex, i) for i in range(np.shape(X)[1])]

In [17]:
def ft_F1(X: np.ndarray, cls_index: np.ndarray, cls_n_ex: np.ndarray) -> float:
    return 1/(1 + np.max(compute_rfi (X, cls_index, cls_n_ex)))

In [26]:
ft_F1(X, cls_index, cls_n_ex)

0.05862828094263205

### 2- Volume of Overlapping Region (F2)¶

In [56]:
def ft_F1v(X: np.ndarray, ovo_comb: np.ndarray, cls_index: np.ndarray) ->float:
    df_list = []
    
    for idx1, idx2 in ovo_comb:
        y_class1 = cls_index[idx1]
        y_class2 = cls_index[idx2]
        dF = dVector(X, y_class1, y_class2)
        df_list.append(1/(1+dF))
        
    return np.mean(df_list)

In [60]:
def dVector(X: np.ndarray, y_class1: np.ndarray, y_class2: np.ndarray) -> float:
    X_class1 = X[y_class1]; u_class1 = np.mean(X_class1, axis= 0)
    X_class2 = X[y_class2]; u_class2 = np.mean(X_class2, axis= 0)
    
    W = ((np.shape(X_class1)[0]/ (np.shape(X_class1)[0] + np.shape(X_class2)[0]))* np.cov(X_class1.T)) + (np.shape(X_class2)[0]/(np.shape(X_class1)[0] + (np.shape(X_class2)[0])) * np.cov(X_class2.T))
    
    d = np.dot(np.linalg.inv(W), (u_class1 - u_class2))
    
    B = np.dot((u_class1 - u_class2),((u_class1 - u_class2).T))
    
    return np.dot(np.dot(d.T, B), d)/ np.dot(np.dot(d.T, W), d)

In [61]:
ft_F1v(X, ovo_comb, cls_index)

0.010007003139831785