In [None]:
#! pip install python-louvain
#! pip install python-igraph
#! pip install numpy pandas scikit-learn python-igraph

In [1]:
from dfply import *
from IPython.display import display
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine, event

conn_str = 'DRIVER={ODBC Driver 17 for SQL Server};SERVER=KVHSQLPC56;DATABASE=AHDA;Trusted_Connection=yes;'
conn_url = URL.create("mssql+pyodbc", query={"odbc_connect": conn_str})
engine = create_engine(conn_url)

In [2]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from collections import defaultdict
from community import community_louvain
from sqlalchemy import create_engine
from igraph import Graph
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import igraph

class PatientCommunityAnalysis:
    def __init__(self, engine):
        self.engine = engine
        self.graph2 = None
    
    def interactions(self):
    #add your own query here. For example, for falls, you need to retrieve interactions for all unique patients 
    #using HF.interactions.
        query_dt = ''' 
            WITH FirstFall AS (
       SELECT 
              Distinct patient_id
              ,min([admit_datetime]) as first_fall
              ,min(diagnosis_age) as first_fall_age
       FROM 
              Core.acute_care_diagnoses acd
       WHERE 
              diagnosis_type_code NOT IN ('3', '4', '0')
              AND SUBSTRING(icd10_code, 1, 3) IN ('W06', 'W07', 'W08', 'W13', 'W14', 'W15', 'W16', 'W17', 
                                      'X80', 'Y01', 'Y30', 'W00', 'W01', 'W03', 'W04', 'W18', 
                                      'W10', 'W05')
              AND diagnosis_age >= 65
       GROUP BY 
              patient_id
)
SELECT 
       i.patient_id, i.service_class_id, i.start_datetime, i.end_datetime
FROM 
       FirstFall
INNER JOIN 
       HF.interactions i ON i.patient_id = FirstFall.patient_id
WHERE 
       i.start_datetime < first_fall
AND        
        i.service_class_id NOT IN (80,150,152,68)
ORDER BY i.patient_id, i.start_datetime
        '''
        df = pd.read_sql(query_dt, self.engine)
        interactions = df[df['service_class_id'].notna()]
        #interactions = interactions.dropna() 
        unique_patients = interactions['patient_id'].unique()
       
        train_set = interactions[interactions['patient_id'].isin(unique_patients)]
        return train_set
  
    #def sequence(self, interactions_df):
     #   df = interactions_df
      #  df = df.astype({"service_class_id": str}, errors='raise')
       # df4 = df.groupby('patient_id')['service_class_id'].agg(' '.join).reset_index()
        #return df4
  
    def patients_projection(self, interactions_df):
        df = interactions_df
        result_map = defaultdict(dict)
        svc_unique = df['service_class_id'].unique()

        # Loop over each service and find patients who share the same service
        for svc in svc_unique:
            index = np.where(df['service_class_id'] == svc)[0]
            patients = df.iloc[index]['patient_id'].unique()

            for p1 in patients:
                for p2 in patients:
                    if p1 < p2:  # Avoid double counting
                        result_map[p1][p2] = result_map[p1].get(p2, 0) + 1

        # Convert the results into a DataFrame with source, target, and weight (shared services)
        source, target, weight = [], [], []
        for key, neighbors in result_map.items():
            for n, w in neighbors.items():
                source.append(key)
                target.append(n)
                weight.append(w)

        df3 = pd.DataFrame({'source': source, 'target': target, 'weight': weight})
        return df3


    def create_graph(self, interactions_df):
        # Create patient projection based on shared services
        df = self.patients_projection(interactions_df)

        # Extract unique patients from both source and target columns
        unique_patients = np.unique(df[['source', 'target']])

        # Create a mapping from patient IDs to unique integers
        id_map = {pat_id: idx for idx, pat_id in enumerate(unique_patients)}

        # Use the id_map to replace patient IDs in source and target with integers
        df['source'] = df['source'].map(id_map)
        df['target'] = df['target'].map(id_map)

        # Extract edges (source, target) and weights for the graph
        edges = df[['source', 'target']].values.tolist()
        weights = df['weight'].tolist()

        # Create an igraph graph using the edges and weights
        graph = igraph.Graph(edges=edges, edge_attrs={"weight": weights})

        # Store the graph and reverse mapping (integer to patient ID)
        self.graph2 = graph
        self.index_to_patient_id = {idx: pat_id for pat_id, idx in id_map.items()}


    def community_detection_on_patients(self, number_of_iterations=-1):
        louvain = self.graph2.community_multilevel(weights=self.graph2.es['weight'], return_levels=False)

        pi = []
        piI = []

        for i in range(len(louvain)):
            component = louvain[i]

            if len(component) > 1:
                piI.append(component)

        pi.append(piI)
        iteration = 1
        if number_of_iterations == -1:
            while True:
                piI1 = []
                previous_components = pi[iteration - 1]
                for community in pi[iteration - 1]:
                    induced_subgraph = self.graph2.subgraph(list(community))
                    louvain1 = induced_subgraph.community_multilevel(weights=induced_subgraph.es['weight'], return_levels=False)
                    for i in range(len(louvain1)):
                        subgraph = louvain1[i]
                        lst = [community[maped_vertex] for maped_vertex in subgraph]
                        piI1.append(lst)
                pi.append(piI1)
                current_components = pi[iteration]
                if not current_components > previous_components:
                    break
                else:
                    iteration += 1

        if number_of_iterations != -1:
            while iteration < number_of_iterations:
                piI1 = []
                previous_components = pi[iteration - 1]
                for community in pi[iteration - 1]:
                    induced_subgraph = self.graph2.subgraph(list(community))
                    louvain1 = induced_subgraph.community_multilevel(weights=induced_subgraph.es['weight'], return_levels=False)
                    for i in range(len(louvain1)):
                        subgraph = louvain1[i]
                        lst = [community[maped_vertex] for maped_vertex in subgraph]
                        piI1.append(lst)
                pi.append(piI1)
                current_components = pi[iteration]
                if not current_components > previous_components:
                    break
                else:
                    iteration += 1

        print("Number of all iterations:", len(pi))
        modularity_list = []
        for i, iteration in enumerate(pi):
            print(f"Number of communities in {i + 1} iteration: {len(iteration)}")

            membership_vector = [-1] * self.graph2.vcount()

            for community_idx, community in enumerate(iteration):
                for vertex in community:
                    membership_vector[vertex] = community_idx

            modularity_value = self.graph2.modularity(membership_vector)
            modularity_list.append(modularity_value)
            print("Modularity Value:", modularity_value)

        self.modularity_list = modularity_list
            
        self.pi6 = pi
        return self.pi6, self.graph2
    
    def create_table_for_patients(self, iteration2=-1):
        iteration2 = iteration2 if iteration2 != -1 else len(self.pi6)
        weight = []
        out_degree = []
        weightd_degree = []
        for j in range(len(self.pi6)):
            iteration1 = self.pi6[j]

            weightI = []
            out_degreeI = list()
            weightd_degreeI = []
            for i in range(len(iteration1)):
                component = iteration1[i]

                subgraph = self.graph2.induced_subgraph(component)
                total_degree = self.graph2.strength(component, mode='all', loops=True, weights="weight")
                weightd_degreeI.append(total_degree)
                lst = list(range(len(component)))
                in_degree = subgraph.strength(lst, mode='all', loops=True, weights="weight")
                weightI.append(in_degree)
                lst2 = []
                for item1, item2 in zip(total_degree, in_degree):
                    item = item1 - item2
                    lst2.append(item)
                out_degreeI.append(lst2)

            weight.append(weightI)
            out_degree.append(out_degreeI)
            weightd_degree.append(weightd_degreeI)

        self.vertices = []
        idd = 1
        id = []

        for component in self.pi6[iteration2 - 1]:
            for vertex in component:
                patient_id = self.index_to_patient_id[vertex]  
                self.vertices.append(patient_id)
                id.append(idd)
            idd += 1

        weightt = []
        for component in weight[iteration2 - 1]:
            for degree in component:
                weightt.append(degree)

        out_degree2 = []
        for component in out_degree[iteration2 - 1]:
            for out_deg in component:
                out_degree2.append(out_deg)

        weighted_degree2 = []
        for component in weightd_degree[iteration2 - 1]:
            for weight_deg in component:
                weighted_degree2.append(weight_deg)

        df = pd.DataFrame()
        df.insert(loc=0, column='patients_id', value=self.vertices)
        df.insert(loc=1, column='community_id', value=id)
        df.insert(loc=2, column='in_degree', value=weightt)
        df.insert(loc=3, column='out_degree', value=out_degree2)
        df.insert(loc=4, column='weighted_degree', value=weighted_degree2)

        return df

    def count_nodes_in_communities(self, iteration=-1):
        iteration = iteration if iteration != -1 else len(self.pi6)
        nodes_counts = {}
        for i in range(iteration):
            communities = self.pi6[i]
            for j, community in enumerate(communities):
                community_id = f"Community {j+1} (Iteration {i+1})"
                nodes_counts[community_id] = len(community)
        return nodes_counts



# Main script

custom_iteration1 = float(input("Enter the number of iterations:"))

patient_community_analysis = PatientCommunityAnalysis(engine)
interactions_df = patient_community_analysis.interactions()

#sequence = patient_community_analysis.sequence(interactions_df)

patient_community_analysis.create_graph(interactions_df)

communities, graph = patient_community_analysis.community_detection_on_patients(number_of_iterations=custom_iteration1)
output_file_path = 'Iteration_Information_Fall.txt'
with open(output_file_path, 'w') as file:
    file.write("done!\n")
    file.write(f"number of all iterations {custom_iteration1}\n")
    for iteration, community_list in enumerate(communities, start=1):
        file.write(f"number of communities in {iteration} iteration {len(community_list)}\n")

custom_iteration2 = int(input("Enter your desired iterations:"))
patient_community = patient_community_analysis.create_table_for_patients(iteration2=custom_iteration2)
patient_community.to_csv('Patient_Community_Projection_Fall.csv', index=False)
print(patient_community)

nodes_counts = patient_community_analysis.count_nodes_in_communities(iteration=custom_iteration2)
for community_id, node_count in nodes_counts.items():
    print(f"{community_id}: {node_count}")


Enter the number of iterations:1
Number of all iterations: 1
Number of communities in 1 iteration: 3
Modularity Value: 0.054287579090583696
Enter your desired iterations:1
      patients_id  community_id  in_degree  out_degree  weighted_degree
0        10001903             1     9843.0     19083.0          28926.0
1         1000582             1     6693.0     10041.0          16734.0
2       100334194             1    11169.0     21759.0          32928.0
3         1003362             1     4484.0      2107.0           6591.0
4       1003C3404             1     9334.0     17620.0          26954.0
...           ...           ...        ...         ...              ...
13382     CC7AB43             3     8965.0     19593.0          28558.0
13383     CC93453             3     1344.0      1828.0           3172.0
13384     CCB4163             3     4235.0      6036.0          10271.0
13385    CCB90234             3     4235.0      6036.0          10271.0
13386    CCC9C804             3     