In [154]:
import numpy as np
from scipy.spatial import distance_matrix
from sklearn.cluster import MeanShift
from collections import Counter
from scipy.spatial.distance import euclidean
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.neighbors.kde import KernelDensity
X=np.array([[1.5,2,3],[2.5,5,8],[5.5,10,11]])
Y=np.array([[2,3,4],[2.5,3.5,4.5],[5.5,6,7],[6,7,8]])
a=distance_matrix(X,X)

In [91]:
def bandwidth_selection(data,sigThresh,outlierThresh):
    #constant
    step = 1
    n_clus = 0

    D = distance_matrix(X,X)
    minD = int(np.round(np.min(D[np.nonzero(D)])))
    if minD==0:
        minD=2
    maxD = int(np.round(np.max(D[np.nonzero(D)])))

    outlierVec=[]
    hCandidate=[]

    for i_h in range(int(np.round(minD/2)),maxD+1,step):
        meanShift=MeanShift(bandwidth=i_h).fit(X)
        labels=meanShift.labels_

        cl_sig=[k for k, v in Counter(labels).items() if v>=sigThresh]
        
        n_sig_new = len(cl_sig)
        numOutlierModes = len(set(labels))-len(cl_sig)
        
        if n_sig_new>n_clus:
            n_clus=n_sig_new
            hCandidate.append(i_h)
            outlierVec.append(numOutlierModes)
        elif n_sig_new==n_clus:
            hCandidate.append(i_h)
            outlierVec.append(numOutlierModes)
    
    h=hCandidate[-1]
    for i in range(len(hCandidate)):
        if outlierVec[i]<=outlierThresh:
            h=hCandidate[i]
            break
    return h

In [272]:
def knn_search(data,modes):
    modClosestMemID=np.zeros(len(modes))
    dist=np.zeros(len(modes))
    for idx,mode in enumerate(modes):
        modeKNNid=np.argmin(euclidean_distances(data,[mode]))
        minDist=np.min(euclidean_distances(data,[mode]))
        modClosestMemID[idx]=modeKNNid
        dist[idx]=minDist
        
    return modClosestMemID,dist

def pairConn(X1,X2,X1_dens,X2_dens,lv,data,h):
    IDX,D=knn_search(X1,X2)
    
    idx_2=np.where(D==D.min())
    idx_1=IDX[idx_2]
    minDist=D[idx_2]
    
    isConn=1
    for one in range(len(X1)):
        for two in range(len(X2)):
            q1=X1[one,:]
            q2=X2[two,:]
            
            n_seq=20
            nl=np.zeros((n_seq,data.shape[1]))
            for i in range(data.shape[1]):
                s=q1[i]
                e=q2[i]
                if s==e:
                    nl[:,i]=np.zeros((n_seq,1))+s
                else:
                    nl[:,i]=np.linspace(s,e,n_seq)
            
            kde=KernelDensity(kernel="gaussian",bandwidth=h).fit(data)
            f_nl=np.exp(kde.score_samples(nl))
            
            if np.sum(f_nl[f_nl<lv])>0 or minDist>2*h :
                isConn=0
                break
    return isConn

def cluConn(data,f,labels,lv,h):
    n_clu=len(set(labels))
    connMat=np.zeros((n_clu,n_clu))
    
    for i in range(n_clu-1):
        for j in range(i+1,n_clu):
            id_i=np.where(labels==i)
            id_j=np.where(labels==j)
            
            connMat[i,j]=pairConn(data[id_i,:],data[id_j,:],f[id_i],lv,data,h)
            connMat[j,i]=connMat[i,j]
            
    return connMat          

In [273]:
def high_density_clustering(data,h,sizeThresh):
    meanShift=MeanShift(bandwidth=h).fit(data)
    labels=meanShift.labels_
    modes=meanShift.cluster_centers_
    
    modClosestMemID=knn_search(data,modes)
    
    n_clu=len(set(labels))
    
    kde=KernelDensity(kernel="gaussian",bandwidth=h).fit(data)
    f=np.exp(kde.score_samples(data))
    
    n_lv=20
    lv_seq=[i/n_lv*f.max() for i in range(1,n_lv+1)]
    
    #check the cluster info at each level: member, size and connectivity
    conn=[]
    l_conn=[]
    for i in range(n_lv):
        conn.append(cluConn(data,f,labels,lv_seq[i],h))
    
    #check which clusters are connected at each level
    conn_mask=np.zeros(conn[0].shape)
    for i in reversed(range(n_lv)):
        lvl_conn=np.zeros(conn[i].shape)
        lvl_tmp=lvl_conn
        tmp=lvl_tmp
        for j in range(i,n_lv):
            tmp=tmp+conn[j]
        lvl_tmp=np.where(tmp!=1,lvl_tmp,1)
        
        #fing indices of newly connected clusters
        r_c,c_c=np.where(lvl_tmp==1)
        
        #for any pair of connected clusters, check if they are already connected
        for k in range(r_c.shape[1]):
            if conn_mask[r_c[k],c_c[k]]==0:
                neighors1=np.where(conn_mask[r_c[k],:]==1)
                neighors1=np.where(conn_mask[c_c[k],:]==1)
                
                conn_mask[r_c[k],c_c[k]]=1
                conn_mask[c_c[k],r_c[k]]=1
                
                conn_mask[neighors1,c_c[k]]=1
                conn_mask[c_c[k],neighors1]=1
                conn_mask[neighors2,r_c[k]]=1
                conn_mask[r_c[k],neighors2]=1
                
                lvl_conn[r_c[k],c_c[k]]=1
                lvl_conn[c_c[k],r_c[k]]=1
        
        l_conn[i]=lvl_conn
        
    #cluster members by density
    upperLvlMembers=np.empty((n_lv,n_clu),dtype=object)
    lvlMembers=np.empty((n_lv,n_clu),dtype=object)
    upper_size=np.zeros((n_lv,n_clu))
    
    for i in range(n_lv-1):
        upper_idx=np.where(f>lv_seq[i])
        lvl_idx=np.where(f>lv_seq[i] and f<=lv_seq[i+1])
        for j in range(n_clu):
            upperLvlMembers[i,j]=np.intersect1d(upper_idx,np.where(labels==j))
            lvlMembers[i,j]=np.intersect1d(lvl_idx,np.where(labels==j))
            upper_size[i,j]=len(upperLvlMembers[i,j])
            
    #sekect significant clusters just in case
    sig_clu=[k for k, v in Counter(labels).items() if v>=sizeThresh]
            
    return upperLvlMembers

In [270]:
a=np.array([1,2,3,4,5])
b=np.array([4,5,6,7,1])
empty[2,2]=np.intersect1d(a,b)
len(empty[2,2])

3

In [268]:
empty

array([[None, None, None, None, None, None],
       [None, array([1, 2, 3]), None, None, None, None],
       [None, None, array([4, 5]), None, None, None],
       [None, None, None, None, None, None]], dtype=object)

In [250]:
a=np.zeros((2,2))
b=np.array([[1,2],[3,2]])


In [241]:
a=np.where(b!=1,a,1)

In [253]:
q,w=np.where(b==2)

In [236]:
conn=[]
co1=np.array([[1,2,3],[2,3,4],[3,4,5]])
conn.append(co1)

In [237]:
conn

[array([[1, 2, 3],
        [2, 3, 4],
        [3, 4, 5]])]

In [262]:
empty=np.empty((4,6), dtype=object)

In [265]:
empty[1,1]=np.array([1,2,3])

In [266]:
empty

array([[None, None, None, None, None, None],
       [None, array([1, 2, 3]), None, None, None, None],
       [None, None, None, None, None, None],
       [None, None, None, None, None, None]], dtype=object)