In [None]:
class FamiliesClusters(ClusterMixin, BaseEstimator):
    '''
    Iterative Families clustering
    Hierachical clustering with the ward2 criterion, use the pearson's correlation as the distance measure.
    Parameters
    ----------
    family_interest: np.array,
        list of family of interest
    Scoring : Callable,
        scoring function use to evaluate the model
    maximize: bool,
        if True the scoring function is maximize, else it is minimize
        
    Attributes
    ----------
    n_clusters_ : int
        The number of clusters found by the algorithm.
    labels_ : ndarray of shape (n_samples)
        Cluster labels for each point.
    family_interest: np.array,
        list of family of interest.
    Scoring : Callable,
        scoring function use to evaluate the model.
    maximize: bool,
        if True the scoring function is maximize, else it is minimize.
    
    '''
    
    def __init__(self, family_interest_:np.array, Scoring_:Callable, maximize_:bool):
        super().__init__()
        self.family_interest_ = family_interest_
        self.Scoring_ = Scoring_
        self.maximize_ = maximize_
        
    def fit(self, X:np.array, y:np.array, N:int =2, iterations:int =20):
        '''Fit data using hierachical clustering with the ward2 criterion and use the spearman correlation as the distance measure and predict.
        
        parameters:
        -------
        x : np.array,
            features of each data points
        y : np.array,
            family of each data points
        N : int,
            max number of 
        iterations : int,
            number of iterative clustering
    

        Returns
        -------
        self,
            return fitted self'''
        
        #Compute the pearson's correlation of X
        X_pd = pd.DataFrame(X.T)
        corr_expr_raw = X_pd.corr(method= 'spearson')
        corr_expr = np.array((1 - corr_expr_raw)/2)
        
        if(np.shape(X.T)[0] == 1):
            corr_expr.fill(1)
            
        #Create empty matrix of size (#cells x iteration) with name of cells (barcodes) as row names
        cell_clusters = pd.DataFrame(np.zeros((len(y), iterations)))
    
        #Create empty matrix of size (#cells x iteration) with name of cells (barcode) as row names
        cell_clusters_correlation = np.empty((len(y), iterations))
        cell_clusters_correalation = np.NaN

        #Put 1 in all first iteration column
        cell_clusters[:,0] = 1
        #Put the mean correlation in all first iteration column
        cell_clusters_correlation[:,0] = mean(upper_tri_indexing(corr_expr))
        
        for i in range(1,iterations) :
            
            #Loop over the clusters that are not zero
            id_cluster = unique(cell_clusters[:,(i-1)])
            non_zero_clusters = id_cluster[id_cluster!=0]
            
            for cluster in non_zero_clusters :
                
                #Get the name of cells in current cluster 
                cells_in_cluster = np.where(cell_clusters[:,(i-1)] == cluster)
                
                if len(cells_in_cluster >= 3):
                    correlation = upper_tri_indexing(mean(corr_expr_raw[cell_in_cluster,cells_in_cluster]))
                    cell_clusters_correlation[cells_in_cluster,(i-1)] = correlation
                    corr_expr_subset = corr_expr[cells_in_cluster, cells_in_cluster]
                    #Squared for ward2 criterion
                    corr_expr_subset = upper_tri_indexing(corr_expr_subset)**2
                    
                    #Cluster the cells in current cluster hierarchical clustering with pearson's correlation and ward2 criterion
                    Z = ward(corr_expr_subset)
                    #Cut the linkage matrix into N clusters
                    clustering = cut_tree(Z, n_clusters=N)
                
                    cell_clusters[cells_in_cluster,i] = clustering + max(cell_clusters[:,i])
                    
                else:
                    cell_clusters[cells_in_cluster,i] = 0
        
        #Create matrix with zeros of size (#cells, #cells)
        co_clustering = np.zeros((len(y), len(y)))
        
        #Remplace 0 values by NaN in cell_cluster
        cell_clusters[cell_clusters==0] = np.NaN
        
        for i in range (0,iterations):
        
        #Score the cluster and determine the number of clusters
        score = self.Scoring_(y,clustering)
        N = len(np.unique(clustering))
   
        self.n_clusters_, self.labels_, self.score_ = N, clustering, score
        return self
    
    def fit_predict(self, X:np.array, y:np.array,NmaxCluster:int = None):
        self.fit(X,y,NmaxCluster)
        
        return self.labels_      
    
    def score(self, X, y_true):
        #Error come from here y_true and X not same size as self.labels_ -> function fit_as
        return self.score_

In [None]:
def outer(x:np.array):
    out = np.zero((len(x),len(x))

In [None]:
mport numpy as np
import pandas as pd
import sklearn
import math 
from typing import AnyStr, Callable
from sklearn.base import ClusterMixin,BaseEstimator
from scipy.cluster.hierarchy import ward, cut_tree
from sklearn.metrics import make_scorer