In [1]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, normalized_mutual_info_score, adjusted_rand_score
from scipy.optimize import linear_sum_assignment
from sklearn.linear_model import Ridge
from sklearn.preprocessing import LabelEncoder
import pandas as pd

def labelconvert(Y):
    """Convert a label indicator matrix (one-hot) to a label vector."""
    n = Y.shape[0]
    labelY = np.zeros(n, dtype=int)
    for i in range(n):
        labelY[i] = np.where(Y[i] == 1)[0][0] + 1
    return labelY

def clustering_measure(true_labels, pred_labels):
    """Simule la fonction ClusteringMeasure de MATLAB."""
    acc = accuracy_score(true_labels, pred_labels)
    nmi = normalized_mutual_info_score(true_labels, pred_labels)
    ari = adjusted_rand_score(true_labels, pred_labels)
    return np.array([acc, nmi, ari])

def kmeans_validate(X_select, label, iter_k=20):
    """Valide l'algorithme KMeans sur plusieurs exécutions."""
    c = len(np.unique(label))
    result_iter = np.zeros((iter_k, 3))
    for i in range(iter_k):
        kmeans = KMeans(n_clusters=c, n_init=10, random_state=i)
        predY = kmeans.fit_predict(X_select)
        result_iter[i, :] = clustering_measure(label, predY)
    result_mean = np.mean(result_iter, axis=0)
    result_std = np.std(result_iter, axis=0)
    result_end = np.vstack((result_mean, result_std))
    return result_end

def relabel_sequential(labels):
    unique = np.unique(labels)
    label_map = {old: new for new, old in enumerate(unique, 1)}
    return np.array([label_map[l] for l in labels])

def compute_purity(true_labels, pred_labels):
    total = len(true_labels)
    purity = 0
    for cluster in np.unique(pred_labels):
        indices = np.where(pred_labels == cluster)[0]
        class_counts = np.bincount(true_labels[indices])
        if len(class_counts) > 0:
            purity += np.max(class_counts)
    return purity / total

def best_map(true_labels, pred_labels):
    D = max(true_labels.max(), pred_labels.max())
    cost_matrix = np.zeros((D, D), dtype=int)
    for i in range(len(true_labels)):
        cost_matrix[true_labels[i]-1, pred_labels[i]-1] += 1
    row_ind, col_ind = linear_sum_assignment(-cost_matrix)
    mapping = dict(zip(col_ind + 1, row_ind + 1))
    new_pred = np.array([mapping.get(label, label) for label in pred_labels])
    return new_pred

def transform_labels(y, nclass=None, label_type='01'):
    """Transforme un vecteur de labels en matrice d'indicateurs."""
    y = np.array(y).flatten()
    n = len(y)
    
    if nclass is not None:
        c = nclass
        class_set = np.arange(1, c + 1)
    else:
        class_set = np.unique(y)
        c = len(class_set)
    
    if label_type == '01':
        Y = np.zeros((n, c))
    else:
        Y = -np.ones((n, c))
    
    for idx, cls in enumerate(class_set):
        Y[y == cls, idx] = 1
    return Y

def updateY(X, W, b, Y, c, NITR_y=20):
    d, n = X.shape
    A = X.T @ W + np.ones((n, 1)) @ b.reshape(1, -1)
    M = A
    Obj_y = np.zeros(NITR_y + 1)
    changed = np.zeros((NITR_y, 10), dtype=int)
    
    eps_val = np.finfo(float).eps
    G = Y @ np.linalg.inv(Y.T @ Y + eps_val * np.eye(c)) ** 0.5
    Obj_y[0] = np.trace(G.T @ M)
    
    for iter1 in range(NITR_y):
        yy = np.sum(Y * Y, axis=0)
        ym = np.sum(Y * M, axis=0)
        
        for iter2 in range(10):
            converged = True
            
            for i in range(n):
                mi = M[i, :]
                id0 = np.argmax(Y[i, :])
                incre_y = np.zeros(c)
                
                for k in range(c):
                    if k == id0:
                        incre_y[k] = ym[k]/np.sqrt(yy[k]) - (ym[k] - mi[k])/np.sqrt(yy[k]-1 + eps_val)
                    else:
                        incre_y[k] = (ym[k]+mi[k])/np.sqrt(yy[k]+1) - ym[k]/np.sqrt(yy[k])
                
                id_new = np.argmax(incre_y)
                if id_new != id0:
                    converged = False
                    changed[iter1, iter2] += 1
                    Y[i, :] = 0
                    Y[i, id_new] = 1
                    yy[id0] -= 1
                    yy[id_new] += 1
                    ym[id0] -= mi[id0]
                    ym[id_new] += mi[id_new]
            
            if converged:
                break
        
        G = Y @ np.linalg.inv(Y.T @ Y + eps_val * np.eye(c)) ** 0.5
        Obj_y[iter1 + 1] = np.trace(G.T @ M)
        
        if iter1 > 3 and abs(Obj_y[iter1] - Obj_y[iter1 - 1])/Obj_y[iter1] < 1e-10:
            break
    
    return Y, Obj_y, changed

def FSDK(X, y, s_num, gamma, p, max_iter=50):
    n_samples, n_features = X.shape
    classes = np.unique(y)
    W = np.zeros((n_features, len(classes)))
    
    for i, c in enumerate(classes):
        y_binary = np.where(y == c, 1, 0)
        clf = Ridge(alpha=1/gamma)
        clf.fit(X, y_binary)
        W[:, i] = clf.coef_
    
    Obj_w = np.linalg.norm(W, axis=1)**p
    fea_id = np.argsort(Obj_w)[-s_num:]
    X_select = X[:, fea_id]
    idxw = np.argsort(Obj_w)
    t = 0
    converge = True
    return X_select, Obj_w, idxw, fea_id, t, converge

def Kmeans_validate(X_select, y_true, n_runs=10):
    accs, nmis = [], []
    for _ in range(n_runs):
        kmeans = KMeans(n_clusters=len(np.unique(y_true)), n_init=10)
        y_pred = kmeans.fit_predict(X_select)
        acc = accuracy_score(y_true, y_pred)
        nmi = normalized_mutual_info_score(y_true, y_pred)
        accs.append(acc)
        nmis.append(nmi)
    
    result_end = np.array([
        [np.mean(accs), np.mean(nmis)],
        [np.std(accs), np.std(nmis)]
    ])
    return result_end



In [None]:

# MAIN CODE
if __name__ == "__main__":
    # Chargement des données
    df = pd.read_csv("C:\\Users\\PC\\Desktop\\PFE-MNSA\\DATA\\PCMAC.mat")
    X = df.iloc[:, :-1].values
    labels = df.iloc[:, -1].values
    
    le = LabelEncoder()
    y = le.fit_transform(labels)
    
    # Paramètres
    gamma = 10
    p = 1
    s_num = 110
    
    # Exécution FSDK
    X_select, Obj_w, idxw, fea_id, t, converge = FSDK(X, y, s_num, gamma, p)
    
    # Validation KMeans
    result_end = Kmeans_validate(X_select, y)
    
    print("=== Résultats Clustering ===")
    print("Moyenne [Accuracy, NMI]:", result_end[0])
    print("Écart-type [Accuracy, NMI]:", result_end[1])