In [None]:
#! pip install python-louvain
#! pip install python-igraph
#! pip install numpy pandas scikit-learn python-igraph

In [1]:
from dfply import *
from IPython.display import display
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine, event

conn_str = 'DRIVER={ODBC Driver 17 for SQL Server};SERVER=KVHSQLPC56;DATABASE=AHDA;Trusted_Connection=yes;'
conn_url = URL.create("mssql+pyodbc", query={"odbc_connect": conn_str})
engine = create_engine(conn_url)

In [2]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from collections import defaultdict
from community import community_louvain
from sqlalchemy import create_engine
from igraph import Graph
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import igraph

class PatientCommunityAnalysis:
    def __init__(self, engine):
        self.engine = engine
        self.graph2 = None
    
    def interactions(self):
    #add your own query here. For example, for falls, you need to retrieve interactions for all unique patients 
    #using HF.interactions.
        query_dt = ''' 
            WITH FirstFall AS (
       SELECT 
              Distinct patient_id
              ,min([admit_datetime]) as first_fall
              ,min(diagnosis_age) as first_fall_age
       FROM 
              Core.acute_care_diagnoses acd
       WHERE 
              diagnosis_type_code NOT IN ('3', '4', '0')
              AND SUBSTRING(icd10_code, 1, 3) IN ('W06', 'W07', 'W08', 'W13', 'W14', 'W15', 'W16', 'W17', 
                                      'X80', 'Y01', 'Y30', 'W00', 'W01', 'W03', 'W04', 'W18', 
                                      'W10', 'W05')
              AND diagnosis_age >= 65
       GROUP BY 
              patient_id
)
SELECT 
       i.patient_id, i.service_class_id, i.start_datetime, i.end_datetime
FROM 
       FirstFall
INNER JOIN 
       HF.interactions i ON i.patient_id = FirstFall.patient_id
WHERE 
       i.start_datetime < first_fall
AND        
        i.service_class_id NOT IN (80,150,152,68)
ORDER BY i.patient_id, i.start_datetime
        '''
        df = pd.read_sql(query_dt, self.engine)
        interactions = df[df['service_class_id'].notna()]
        #interactions = interactions.dropna() 
        unique_patients = interactions['patient_id'].unique()
       
        train_set = interactions[interactions['patient_id'].isin(unique_patients)]
        return train_set
  
    #def sequence(self, interactions_df):
     #   df = interactions_df
      #  df = df.astype({"service_class_id": str}, errors='raise')
       # df4 = df.groupby('patient_id')['service_class_id'].agg(' '.join).reset_index()
        #return df4
  
    def patients_projection(self, interactions_df):
        df = interactions_df
        result_map = defaultdict(dict)
        svc_unique = df['service_class_id'].unique()

        # Loop over each service and find patients who share the same service
        for svc in svc_unique:
            index = np.where(df['service_class_id'] == svc)[0]
            patients = df.iloc[index]['patient_id'].unique()

            for p1 in patients:
                for p2 in patients:
                    if p1 < p2:  # Avoid double counting
                        result_map[p1][p2] = result_map[p1].get(p2, 0) + 1

        # Convert the results into a DataFrame with source, target, and weight (shared services)
        source, target, weight = [], [], []
        for key, neighbors in result_map.items():
            for n, w in neighbors.items():
                source.append(key)
                target.append(n)
                weight.append(w)

        df3 = pd.DataFrame({'source': source, 'target': target, 'weight': weight})
        return df3


    def create_graph(self, interactions_df):
        # Create patient projection based on shared services
        df = self.patients_projection(interactions_df)

        # Extract unique patients from both source and target columns
        unique_patients = np.unique(df[['source', 'target']])

        # Create a mapping from patient IDs to unique integers
        id_map = {pat_id: idx for idx, pat_id in enumerate(unique_patients)}

        # Use the id_map to replace patient IDs in source and target with integers
        df['source'] = df['source'].map(id_map)
        df['target'] = df['target'].map(id_map)

        # Extract edges (source, target) and weights for the graph
        edges = df[['source', 'target']].values.tolist()
        weights = df['weight'].tolist()

        # Create an igraph graph using the edges and weights
        graph = igraph.Graph(edges=edges, edge_attrs={"weight": weights})

        # Store the graph and reverse mapping (integer to patient ID)
        self.graph2 = graph
        self.index_to_patient_id = {idx: pat_id for pat_id, idx in id_map.items()}


    def community_detection_on_patients(self, number_of_iterations=-1):
        louvain = self.graph2.community_multilevel(weights=self.graph2.es['weight'], return_levels=False)

        pi = []
        piI = []

        for i in range(len(louvain)):
            component = louvain[i]

            if len(component) > 1:
                piI.append(component)

        pi.append(piI)
        iteration = 1
        if number_of_iterations == -1:
            while True:
                piI1 = []
                previous_components = pi[iteration - 1]
                for community in pi[iteration - 1]:
                    induced_subgraph = self.graph2.subgraph(list(community))
                    louvain1 = induced_subgraph.community_multilevel(weights=induced_subgraph.es['weight'], return_levels=False)
                    for i in range(len(louvain1)):
                        subgraph = louvain1[i]
                        lst = [community[maped_vertex] for maped_vertex in subgraph]
                        piI1.append(lst)
                pi.append(piI1)
                current_components = pi[iteration]
                if not current_components > previous_components:
                    break
                else:
                    iteration += 1

        if number_of_iterations != -1:
            while iteration < number_of_iterations:
                piI1 = []
                previous_components = pi[iteration - 1]
                for community in pi[iteration - 1]:
                    induced_subgraph = self.graph2.subgraph(list(community))
                    louvain1 = induced_subgraph.community_multilevel(weights=induced_subgraph.es['weight'], return_levels=False)
                    for i in range(len(louvain1)):
                        subgraph = louvain1[i]
                        lst = [community[maped_vertex] for maped_vertex in subgraph]
                        piI1.append(lst)
                pi.append(piI1)
                current_components = pi[iteration]
                if not current_components > previous_components:
                    break
                else:
                    iteration += 1

        print("Number of all iterations:", len(pi))
        modularity_list = []
        for i, iteration in enumerate(pi):
            print(f"Number of communities in {i + 1} iteration: {len(iteration)}")

            membership_vector = [-1] * self.graph2.vcount()

            for community_idx, community in enumerate(iteration):
                for vertex in community:
                    membership_vector[vertex] = community_idx

            modularity_value = self.graph2.modularity(membership_vector)
            modularity_list.append(modularity_value)
            print("Modularity Value:", modularity_value)

        self.modularity_list = modularity_list
            
        self.pi6 = pi
        return self.pi6, self.graph2
    
    def create_table_for_patients(self, iteration2=-1):
        if iteration2 is None:
                iteration_index = len(self.pi6) - 1
                print(f"Iteration not specified, using the last iteration: {len(self.pi6)}")
        else:
            iteration_index = iteration2 - 1  

        if iteration_index < 0 or iteration_index >= len(self.pi6):
            raise ValueError(f"Invalid iteration index: {iteration2}. Please enter a number between 1 and {len(self.pi6)}.")

        selected_iteration = self.pi6[iteration_index]

        self.vertices = []  
        community_ids = []  
        weightt = []        
        out_degree2 = []    
        weighted_degree2 = []  #


        for community_id, component in enumerate(selected_iteration, start=1):

            subgraph = self.graph2.induced_subgraph(component)

            patient_ids = [self.index_to_patient_id[vertex] for vertex in component]

            in_degree = subgraph.strength(list(range(len(component))), mode='all', loops=True, weights="weight")
            total_degree = self.graph2.strength(component, mode='all', loops=True, weights="weight")
            out_degree = [total - in_deg for total, in_deg in zip(total_degree, in_degree)]

            self.vertices.extend(patient_ids)
            community_ids.extend([community_id] * len(component))
            weightt.extend(in_degree)
            out_degree2.extend(out_degree)
            weighted_degree2.extend(total_degree)

        df = pd.DataFrame({
            'patients_id': self.vertices,
            'community_id': community_ids,
            'in_degree': weightt,
            'out_degree': out_degree2,
            'weighted_degree': weighted_degree2
        })

        return df
    
    def count_nodes_in_communities(self, iteration=-1):
        iteration = iteration if iteration != -1 else len(self.pi6)
        nodes_counts = {}
        for i in range(iteration):
            communities = self.pi6[i]
            for j, community in enumerate(communities):
                community_id = f"Community {j+1} (Iteration {i+1})"
                nodes_counts[community_id] = len(community)
        return nodes_counts


custom_iteration1 = float(input("Enter the number of iterations:"))

patient_community_analysis = PatientCommunityAnalysis(engine)
interactions_df = patient_community_analysis.interactions()

#sequence = patient_community_analysis.sequence(interactions_df)

patient_community_analysis.create_graph(interactions_df)

communities, graph = patient_community_analysis.community_detection_on_patients(number_of_iterations=custom_iteration1)
output_file_path = 'Iteration_Information_Fall_FifthRun.txt'
with open(output_file_path, 'w') as file:
    file.write("done!\n")
    file.write(f"number of all iterations {custom_iteration1}\n")
    for iteration, community_list in enumerate(communities, start=1):
        file.write(f"number of communities in {iteration} iteration {len(community_list)}\n")

custom_iteration2 = int(input("Enter your desired iterations:"))
patient_community = patient_community_analysis.create_table_for_patients(iteration2=custom_iteration2)
patient_community.to_csv('Elham_Patient_Community_Projection_Fall_FifthRun.csv', index=False)
print(patient_community)

nodes_counts = patient_community_analysis.count_nodes_in_communities(iteration=custom_iteration2)
for community_id, node_count in nodes_counts.items():
    print(f"{community_id}: {node_count}")


Enter the number of iterations:10
Number of all iterations: 10
Number of communities in 1 iteration: 3
Modularity Value: 0.05571521788139422
Number of communities in 2 iteration: 12
Modularity Value: 0.019215500055243494
Number of communities in 3 iteration: 51
Modularity Value: 0.008254796335223682
Number of communities in 4 iteration: 176
Modularity Value: 0.0037738720651128107
Number of communities in 5 iteration: 461
Modularity Value: 0.0031231915107110364
Number of communities in 6 iteration: 724
Modularity Value: 0.003060400920705846
Number of communities in 7 iteration: 807
Modularity Value: 0.003063364338106471
Number of communities in 8 iteration: 820
Modularity Value: 0.0030645858463925484
Number of communities in 9 iteration: 822
Modularity Value: 0.003064319613286543
Number of communities in 10 iteration: 824
Modularity Value: 0.0030645862582909266
Enter your desired iterations:1
      patients_id  community_id  in_degree  out_degree  weighted_degree
0        10001903      

In [None]:
import numpy as np
import pandas as pd

class CommunityAnalysisReport:
    def __init__(self, engine):
        self.engine = engine
        #self.cohort = None  
        
    def take_svcid_HF(self):
        query_s1 = """ 
            SELECT DISTINCT service_class_id 
            FROM HF.interactions 
            WHERE service_class_id != -1
            AND mhsu_flag = 1
            AND service_class_id NOT IN (17, 18)
            AND start_datetime >= '2016-04-01'
            GROUP BY service_class_id
            """
        return pd.read_sql(query_s1, self.engine)
        
    def interactionsAndPatientsOFeachSvc_HF(self, df):
        patient_string = '\',\''.join(df["patients_id"])
        patient_string = '\'' + patient_string + '\''

        query_s2 = """ 
            SELECT service_class_id, COUNT(DISTINCT patient_id) AS patients
            FROM HF.interactions 
            WHERE patient_id IN (""" + patient_string + """) 
            AND service_class_id != -1
            AND age_at_encounter BETWEEN 5 AND 21
            AND mhsu_flag = 1
            AND service_class_id NOT IN (17, 18)
            AND patient_id NOT IN ('177CA9AA4', '2991143')
            AND start_datetime >= '2016-04-01'
            GROUP BY service_class_id
            ORDER BY patients DESC 
            """

        return pd.read_sql(query_s2, self.engine)
    
    def safe_division1(self,x, y):
        if y == 0:
            return np.nan
        else:
            return round(((x / y)*100), 2)

    def interactions_and_patients_of_each_svc_HF(self, df, iteration):
        
        take_svcid_label = self.take_svcid_HF()
        i = 0
        for community in df.community_id.unique():
            index=np.where(df.community_id==community)
            community_df=df.iloc[index]
            interactionsAndPatientsOFeachSvc_df= self.interactionsAndPatientsOFeachSvc_HF(community_df)          
            interactionsAndPatientsOFeachSvcMergeWithAllSvc= pd.merge(take_svcid_label, interactionsAndPatientsOFeachSvc_df, on='service_class_id', how='left')
            new_col_name = str(community)+ '  #patients'
            new_col_name2 = str(community)+ '  #interactions' 
            col_dict = {'patients':new_col_name }
            interactionsAndPatientsOFeachSvcMergeWithAllSvc=interactionsAndPatientsOFeachSvcMergeWithAllSvc.rename(columns=col_dict)
            interactionsAndPatientsOFeachSvcMergeWithAllSvc=interactionsAndPatientsOFeachSvcMergeWithAllSvc.fillna(0)

            summury=interactionsAndPatientsOFeachSvcMergeWithAllSvc
            summury['community%'+ str(community)] =  summury.apply(lambda row: self.safe_division1(row.iloc[1], len(community_df)), axis=1)
            if(i!=0):
                df_copy=pd.merge( df_copy,  summury, on='service_class_id', how='left')

            if(i==0):  
                df_copy =  summury.copy(deep=True)
            i=i+1
        result_df_s = df_copy.fillna(0)
        
        query = """
                SELECT DISTINCT i.service_class_id, s.service_class_name
                FROM HF.interactions i
                JOIN Ref.service_classifications s ON i.service_class_id = s.service_class_id;
                """
        df_service = pd.read_sql(query, self.engine)
        
        for service_class_id in result_df_s['service_class_id'].unique():
            service_name = df_service.loc[df_service['service_class_id'] == service_class_id, 'service_class_name'].iloc[0]
            result_df_s.loc[result_df_s['service_class_id'] == service_class_id, 'service_class_name'] = service_name
        columns = result_df_s.columns.tolist()

        columns.insert(1, columns.pop())
        result_df_s = result_df_s[columns]
       
        return result_df_s
    

community_report = CommunityAnalysisReport(engine)

custom_iteration3 = int(input("Enter your desired iterations:"))

df = patient_community_analysis.create_table_for_patients(iteration2=custom_iteration3)

service = community_report.interactions_and_patients_of_each_svc_HF(df, iteration=custom_iteration3)
service.to_csv(f'Elham_Community_Service_Summary_Result_FifthRun.csv', index=False)  
print(service)