# Symmetric Similarity Matrices
-  Here, we try to find the similarity of the nodes on the diagonal.

In [None]:
import pickle
import numpy as np
import pandas as pd
import networkx as nx 
import scipy
from scipy import sparse
from scipy.sparse import dok_matrix, lil_matrix, coo_matrix, csr_matrix

In [None]:
G     = nx.read_gml(f'results/the_complete_hetero_graph2.gml')
with open('results/Nodes.pkl', 'rb') as file:
    Nodes = pickle.load(file)

Patients    = [p for p in G.nodes() if p[0]=='C']
Visits      = [p for p in G.nodes() if p[0]=='V']
Medications = [p for p in G.nodes() if p[0]=='M']
Diagnosis   = [p for p in G.nodes() if p[0]=='D']
Procedures  = [p for p in G.nodes() if p[0]=='P']

C = len(Patients)
V = len(Visits)
Me = len(Medications)
D = len(Diagnosis)
P = len(Procedures)
t = [C, V, Me, D, P]

total = sum([C, V, Me, D, P])
print(f'Number of nodes in G = {total}')
print(f'Number of nodes in Nodes = {len(Nodes)}')

In [22]:
import os

saving_path = 'results/Nodes'

# Check if the directory exists, and create it if it doesn't
if not os.path.exists(saving_path):
    os.makedirs(saving_path)

with open(f'{saving_path}/patients.pkl', 'wb') as file:
    pickle.dump(Patients, file)

with open(f'{saving_path}/visits.pkl', 'wb') as file:
    pickle.dump(Visits, file)

with open(f'{saving_path}/medication.pkl', 'wb') as file:
    pickle.dump(Medications, file)

with open(f'{saving_path}/diagnosis.pkl', 'wb') as file:
    pickle.dump(Diagnosis, file)

with open(f'{saving_path}/procedures.pkl', 'wb') as file:
    pickle.dump(Procedures, file)

In [23]:
def get_adjacency_matrix(G, Nodes1, Nodes2):
    # Ensure that Nodes1 and Nodes2 are lists or are castable to lists
    Nodes1 = list(Nodes1)
    Nodes2 = list(Nodes2)
    
    # Map nodes to indices in the matrix
    node_to_index = {node: i for i, node in enumerate(Nodes1 + Nodes2)}
    
    # Get adjacency matrix in sparse format
    sparse_matrix = nx.adjacency_matrix(G, nodelist=Nodes1+Nodes2)
    
    # Convert to dense format if needed
    dense_matrix = sparse_matrix.todense()
    
    # Extract submatrix corresponding to Nodes1 and Nodes2
    W = dense_matrix[:len(Nodes1), len(Nodes1):]
    
    return np.array(W)

def get_adjacency_matrix1(G, Nodes1, Nodes2):
    # Ensure that Nodes1 and Nodes2 are lists or are castable to lists
    Nodes1 = list(Nodes1)
    Nodes2 = list(Nodes2)
    
    # Get adjacency matrix in sparse format for the union of Nodes1 and Nodes2
    sparse_matrix = nx.adjacency_matrix(G, nodelist=Nodes1+Nodes2)
    
    # Map nodes to their respective indices in the adjacency matrix
    node_to_index = {node: i for i, node in enumerate(Nodes1 + Nodes2)}
    
    # Extract indices for Nodes1 and Nodes2
    indices1 = [node_to_index[node] for node in Nodes1 if node in node_to_index]
    indices2 = [node_to_index[node] for node in Nodes2 if node in node_to_index]
    
    # Extract submatrix corresponding to Nodes1 and Nodes2
    W = sparse_matrix[indices1, :][:, indices2]
    
    return W


W_cv = get_adjacency_matrix1(G, Patients, Visits)
W_vm = get_adjacency_matrix1(G, Visits, Medications)
W_vd = get_adjacency_matrix1(G, Visits, Diagnosis)
W_vp = get_adjacency_matrix1(G, Visits, Procedures)  

del Patients
del Visits
del Medications
del Diagnosis
del Procedures


# del G_CV
# del G_VM
# del G_VD
# del G_VP

In [24]:
print(type(W_cv))
print([a.shape for a in [W_cv, W_vm, W_vd, W_vp]], total)

<class 'scipy.sparse._arrays.csr_array'>
[(10064, 13620), (13620, 364), (13620, 8), (13620, 88)] 24144


In [25]:
# def M(W1, W2):
#     return np.dot(W1, W2)

def M(W1, W2):
    # If W1 and W2 are sparse matrices, you can convert them to CSR format
    print(f'multiplying {W1.shape} * {W2.shape}...')
    W1_csr = csr_matrix(W1)
    W2_csr = csr_matrix(W2)

    # Perform the multiplication
    result = W1_csr.dot(W2_csr)
    print("Done multiplication...")
    return result

In [26]:
# ------ asymmetric similarities
M_CVM = M(W_cv, W_vm)
M_CVP = M(W_cv, W_vp)

M_MVP = M(W_vm.T, W_vp) # MVP

# ********************************************
# deleting edges connected to Diagnoses
# Diagnoses-medication, 
# Diagnoses-Procedures,
# Diagnoses-Patients

# M_CVD = M(W_cv, W_vd)
# M_MVD = M(W_vm.T, W_vd) # MVD
# M_DVP = M(W_vd.T, W_vp)

# ********************************************
# equivalent
# M_DVM = M(W_vd.T, W_vm) # equivalent to MVD
# M_PVM = M(W_vp.T, W_vp) # equivalent to MVP
# M_PVD = M(W_vp.T, W_vd) # equivalnet to DVP

multiplying (10064, 13620) * (13620, 364)...
Done multiplication...
multiplying (10064, 13620) * (13620, 88)...
Done multiplication...
multiplying (364, 13620) * (13620, 88)...
Done multiplication...


In [27]:
print(W_cv.shape, W_vm.shape)

(10064, 13620) (13620, 364)


In [28]:
# Symmetric
# -------Visits--------
M_VDV = M(W_vd, W_vd.T)

M_VMV = M(W_vm, W_vm.T)
M_VPV = M(W_vp, W_vp.T)

# -------Patients--------

M_CVMVC = M(M_CVM, M_CVM.T)
M_CVPVC = M(M_CVP, M_CVP.T)

# -------Medications--------
M_MVM = M(W_vm.T, W_vm)

M_MVPVM = M(M_MVP, M_MVP.T)


# -----  deleted Meta-paths  ------
# M_CVDVC = M(M_CVD, M_CVD.T)
# M_MVDVM = M(M_MVD, M_MVD.T)


multiplying (13620, 8) * (8, 13620)...
Done multiplication...
multiplying (13620, 364) * (364, 13620)...
Done multiplication...
multiplying (13620, 88) * (88, 13620)...
Done multiplication...
multiplying (10064, 364) * (364, 10064)...
Done multiplication...
multiplying (10064, 88) * (88, 10064)...
Done multiplication...
multiplying (364, 13620) * (13620, 364)...
Done multiplication...
multiplying (364, 88) * (88, 364)...
Done multiplication...


In [29]:
# ----------Diagnosis--------------
M_DVD = M(W_vd.T, W_vd)


# -------Procedures----------
M_PVP = M(W_vp.T, W_vp)

# ------ deleted meta-paths -------
# M_DVMVD = M(M_DVM, M_DVM.T)
# M_DVRVD = M(M_DVP, M_DVP.T)

# M_PVMVP = M(M_PVM, M_PVM.T)
# M_PVDVP = M(M_PVD, M_PVD.T)

multiplying (8, 13620) * (13620, 8)...
Done multiplication...
multiplying (88, 13620) * (13620, 88)...
Done multiplication...


In [30]:
def norm_max(W):
    # Normalizing the array    
    max_value = np.max(W)
    print(f'{W.shape}\t{max_value}')
    return W / max_value

def asymmetric_assign_Coo(W, shift_row, shift_col, t):
    # Create a LIL matrix for efficient assignment
    newW = lil_matrix((t, t), dtype=np.float32)
    
    # Find the indices of non-zero elements in W
    non_zero_indices = np.nonzero(W)
    rows, cols = non_zero_indices

    # Iterate over the non-zero elements of W using the indices
    for i, j in zip(rows, cols):
        value = W[i, j]
        # Add the value at the shifted position
        newW[shift_row + i, shift_col + j] = value
        # Assuming you want a symmetric assignment
        newW[shift_col + j, shift_row + i] = value
        
    return newW

# *********************************************************************************************************
print(t, W_cv.shape)
# *********************************************************************************************************

asym = [W_cv ,  W_vm,    W_vd,     W_vp,]
dim  = [(0, C) , (C, C+V), (C, C+V+Me), (C, C+V+Me+D)]

asym = [norm_max(A) for A in asym]
As2 = [asymmetric_assign_Coo(A, dim[i][0], dim[i][1], sum(t)) for i, A in enumerate(asym)]

# *********************************************************************************************************
del W_cv
del W_vm
del W_vd
del W_vp


[10064, 13620, 364, 8, 88] (10064, 13620)
(10064, 13620)	1
(13620, 364)	1
(13620, 8)	1
(13620, 88)	1


In [31]:
asym3 = [M_CVM,     M_CVP,        M_MVP]
dim3  = [(0, C+V), (0,C+V+Me+D), (C+V, C+V+Me+D)]

del M_CVM
del M_CVP
del M_MVP

asym3 = [norm_max(a) for a in asym3]
As3 = [asymmetric_assign_Coo(A, dim3[i][0], dim3[i][1], sum(t)) for i, A in enumerate(asym3) ]

# M_CVD, (0, C+V+Me), 
# M_MVD, (C+V, C+V+Me), 
# M_DVP, (C+V+Me, C+V+Me+D)

# del M_MVD
# del M_CVD
# del M_DVP



(10064, 364)	3
(10064, 88)	15
(364, 88)	11


In [32]:
def symmetric_assign(W, shift, t):
    '''positioning W into the right t place'''
    rows = W.shape[0]
    cols = W.shape[1]
    
    newW = np.zeros((t,t))
    for i in range(0, rows):
        for j in range(0, cols):
            newW[i+shift][j+shift] = W[i][j]
    return newW

def symmetric_assign2(W, shift, t):
    '''positioning W into the right t place'''
    rows, cols = W.shape
    # Initialize a larger matrix with zeros
    newW = np.zeros((t, t), dtype=np.float32)
    
    print(rows, cols, newW.shape)

    # Assign W into newW at the specified shift
    newW[shift:shift + rows, shift:shift + cols] = W.toarray()

    return newW

def symmetric_assign_Coo(W, shift, t):
    # Create a LIL matrix for efficient assignment
    newW = lil_matrix((t, t), dtype=np.float32)
    
    # Find the indices of non-zero elements in W
    non_zero_indices = np.nonzero(W)
    rows, cols = non_zero_indices
    print(rows, cols)
    # Iterate over the non-zero elements of W using the indices
    for i, j in zip(rows, cols):
        value = W[i, j]
        # Add the value at the shifted position
        newW[shift + i, shift + j] = value

    print("symmmetric_assign_Coo is complete...")
    return newW


# C --> 0
# V --> C
# M --> C+V
# D --> C+V+Me
# P --> C+V+Me+D

print(t)
print(0, C, C+V, C+V+Me, C+V+Me+D, sum(t))

# sym = [M_VDV, M_VMV, M_VPV, M_CVMVC, M_CVDVC, M_CVPVC, M_MVM, M_MVDVM, M_MVPVM, M_DVD,  M_DVMVD, M_DVRVD, M_PVP,    M_PVMVP,  M_PVDVP]
# shf = [C    , C    , C    , 0      , 0      , 0      , C+V  , C+V    , C+V    , C+V+Me, C+V+Me,  C+V+Me , C+V+Me+D, C+V+Me+D, C+V+Me+D]

# sym = [M_VDV, M_VMV, M_VPV, M_CVMVC, M_CVDVC, M_CVPVC, M_MVM, M_MVDVM, M_MVPVM, M_DVD,  M_DVRVD, M_PVP    ]
# shf = [C    , C    , C    , 0      , 0      , 0      , C+V  , C+V    , C+V    , C+V+Me, C+V+Me , C+V+Me+D ]

sym = [M_VDV, M_VMV, M_VPV, M_CVMVC, M_CVPVC, M_MVM, M_MVPVM, M_DVD,  M_PVP    ]
shf = [C    , C    , C    , 0      , 0      , C+V  , C+V    , C+V+Me, C+V+Me+D ]
sym = [norm_max(a) for a in sym]



[10064, 13620, 364, 8, 88]
0 10064 23684 24048 24056 24144
(13620, 13620)	8
(13620, 13620)	143
(13620, 13620)	21
(10064, 10064)	256
(10064, 10064)	375
(364, 364)	26
(364, 364)	524
(8, 8)	6429
(88, 88)	4936


In [33]:
del M_CVMVC
del M_CVPVC
del M_MVM
del M_MVPVM
del M_DVD
del M_PVP

# del M_VDV
# del M_VMV
# del M_VPV
# del M_CVDVC
# del M_MVDVM
# del M_DVMVD
# del M_DVRVD
# del M_PVMVP
# del M_PVDVP

# As = [symmetric_assign_Coo(A, shf[i], sum(t)) for i, A in enumerate(sym)]
As = [symmetric_assign2(A, shf[i], sum(t)) for i, A in enumerate(sym)]

13620 13620 (24144, 24144)
13620 13620 (24144, 24144)
13620 13620 (24144, 24144)
10064 10064 (24144, 24144)
10064 10064 (24144, 24144)
364 364 (24144, 24144)
364 364 (24144, 24144)
8 8 (24144, 24144)
88 88 (24144, 24144)


In [34]:
newAS2 = [a.toarray() for a in As2]
newAS3 = [a.toarray() for a in As3]


In [35]:
for a in As:
    print(a.shape)

for a in newAS2:
    print(a.shape)

for a in newAS3:
    print(a.shape)


(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)
(24144, 24144)


In [36]:
As.extend(newAS2)
As.extend(newAS3)
len(As) 

16

In [37]:
# The fusing section

# Here, we divide each A by its max
# and consider the average of A_i.

normalized_As = [Ai / Ai.max() for Ai in As]

A_final = sum(normalized_As) / len(normalized_As)



In [38]:
# Convert each numpy array to a CSR sparse matrix and save it
for i, arr in enumerate(As):
    sparse_matrix = sparse.csr_matrix(arr)
    sparse.save_npz(f"results/A/sparse_matrix_{i}.npz", sparse_matrix)
    print(f'{i}\'th matrix saved!')

sparse_matrix = sparse.csr_matrix(A_final)
sparse.save_npz(f"results/A/A_final.npz", sparse_matrix)


0'th matrix saved!
1'th matrix saved!
2'th matrix saved!
3'th matrix saved!
4'th matrix saved!
5'th matrix saved!
6'th matrix saved!
7'th matrix saved!
8'th matrix saved!
9'th matrix saved!
10'th matrix saved!
11'th matrix saved!
12'th matrix saved!
13'th matrix saved!
14'th matrix saved!
15'th matrix saved!
