# SNF model
### Idea: 
1. Measure similarity among items (could be OHV, PSGs).
2. Fuse them using SNF.
3. Save them to SNF file.

### Purpose:
To be used within the HGNN model

### Note:
We are saving this to A/sparse_matrix_?


In [None]:
from sklearn.metrics.pairwise import euclidean_distances
from scipy.sparse import csr_matrix
from scipy.sparse import save_npz
from snf import compute
import numpy as np

class SNF_class:
    
    def __init__(self, HG, Nodes, saving_path, file_name):
        '''reading a HG and Nodes
        1. create OHV per node type
        2. measure the similarity
        3. creating a DB
        4. measure SNF and return a final matrix
        5. save the final PSG as sparse matrix
        '''
        self.HG = HG
        self.Nodes = Nodes
        # ======================================================
        self.Patients =    [v for v in self.Nodes if v[0]=='C']
        self.Gender =      [v for v in self.Nodes if v[0]=='G']
        self.Expire_Flag = [v for v in self.Nodes if v[0]=='E']
        self.Visits =      [v for v in self.Nodes if v[0]=='V']
        self.Medications = [v for v in self.Nodes if v[0]=='M']
        self.Diagnosis  =  [v for v in self.Nodes if v[0]=='D']
        self.Procedures =  [v for v in self.Nodes if v[0]=='P']
        self.Labs       =  [v for v in self.Nodes if v[0]=='L']
        self.MicroBio   =  [v for v in self.Nodes if v[0]=='B']
        # ======================================================
        D = self.get_X('D')
        M = self.get_X('M')
        P = self.get_X('P')
        L = self.get_X('L')
        B = self.get_X('B')
        
        G = self.get_X_sub_case('G')
        E = self.get_X_sub_case('E')
        # ======================================================
        print('Measure the similarity')
        sim_M = self.euc_sim(M)
        sim_D = self.euc_sim(D)
        sim_P = self.euc_sim(P)
        sim_L = self.euc_sim(L)
        sim_B = self.euc_sim(B)
        sim_G = self.euc_sim(G)
        sim_E = self.euc_sim(E)
        # ======================================================
        DB = [sim_D, sim_M, sim_P, sim_L, sim_B, sim_G, sim_E]
        self.A = self.SNF(DB, 'euclidean')
        self.save_PSG(saving_path=saving_path, file_name=file_name)
        
        
    def get_X(self, clinical_type):
        
        print(f'Getting the OHV for {clinical_type}')
        if clinical_type=='M':
            F = self.Medications
        elif clinical_type=='P':
            F = self.Procedures
        elif clinical_type=='L':
            F = self.Labs
        elif clinical_type=='D':
            F = self.Diagnosis
        elif clinical_type=='B':
            F = self.MicroBio
            
        F_indeces = {p:k for k,p in enumerate(F)}

        X = []
        for v in self.Nodes:
            f = [0] * len(F)
            if v[0]=='C':
                for u_visit in self.HG.neighbors(v):
                    for u in self.HG.neighbors(u_visit):
                        if u[0] in [clinical_type]:
                            f[F_indeces[u]] = 1
            X.append(f)
        
        return np.array(X)

    def get_X_sub_case(self, clinical_type):
        
        print(f'Getting the OHV for {clinical_type}')
        if clinical_type=='G':
            F = self.Gender
        elif clinical_type=='E':
            F = self.Expire_Flag
            
        F_indeces = {p:k for k,p in enumerate(F)}

        X = []
        for v in self.Nodes:
            f = [0] * len(F)
            if v[0]=='C':
                for u in self.HG.neighbors(v):
                    if u[0] in [clinical_type]:
                        f[F_indeces[u]] = 1
            X.append(f)
        
        return np.array(X)

    
    def euc_sim(self, X):
        euclidean_dist = euclidean_distances(X)
        euclidean_sim = 1 / (1 + euclidean_dist)
        return euclidean_sim
    
    def SNF(self, DB, dist):
        print('Fusing the three models using SNF...')
        affinity_networks = compute.make_affinity(DB, metric=dist, K=20, mu=0.5)
        fused_network = compute.snf(affinity_networks, K=20)
        best, second = compute.get_n_clusters(fused_network)
        print(best, second)
        return fused_network, best, second

    def save_PSG(self, saving_path, file_name):
        # Save the sparse matrix to an .npz file
        sparse_A = csr_matrix(self.A)
        save_npz(f'{saving_path}/{file_name}.npz', sparse_A)


## Fusing OHV...

In [3]:
def expand_array(org_A, n):
    m = org_A.shape[0]
    newA = np.zeros((n, n))
    newA[:m, :m] = org_A
    return newA  


Measure the similarity
Fusing the three models using SNF...
2 3


In [4]:
new_A = expand_array(A[0], len(Nodes))


Data has been successfully saved to /lustre/home/almusawiaf/PhD_Projects/PSG_survival_analysis/data_generation/Data/15_Diagnoses/sample/5000_15/SNFs/OHV/D_M_R.pkl
OHV: MISSION COMPLETED...
Shape of A: (3840, 3840)
Shape of new_A: (10458, 10458)
Shape of the slice of new_A: (3840, 3840)
Length of Patients: 3840
