In [1]:
'''
    My implementation  (pseudocode)
    For each impression, read through all time-series arrays including the impression:
        For each array of time-series:
        Start with adding 'start node' to graph
        For each time-series in the array:
            if the intervention is not already in outside dictionary: 
                add the id of the intervention as key to outside dictionary, and initialize value as empty dict
            if there is no next intervention, the id/next node is 'end'
            if id of the next intervention is NOT in inside dictionary for current node:
                add the id of next intervention as key to the value dictionary, and initialize value as 1
            if id of next intervention IS in the dictionary for current node:
                value of next intervention ++
'''
import pandas as pd

In [2]:
#groupings of procedures to further clean graph

temp_pros = pd.read_excel('GROUPED list of Procedures.xlsx').set_index(['Grouping'])
temp_pros['Snomed'] = temp_pros['Snomed'].astype(str)
grouped_pros = {}
for group, row in temp_pros.iterrows():
    sno = row['Snomed']
    grouped_pros[sno] = group 
    
grouped_pros

{'404996007': 'Access Airway',
 '232707004': 'Clear Airway',
 '23690002': 'Clear Airway',
 '232708009': 'Clear Airway',
 '230040009': 'Clear Airway',
 '232664002': 'Open/Position Airway',
 '51717002': 'ETCO2 Capnography',
 '425543005': 'ETCO2 Capnography',
 '19821003': 'ETCO2 Capnography',
 '284029005': 'ETCO2 Capnography',
 '104690002': 'Glucose Check',
 '302789003': 'Glucose Check',
 '406164000': 'Restraint Applied',
 '386423001': 'Restraint Applied',
 '241741007': 'Patient Cooling/Warming',
 '241740008': 'Patient Cooling/Warming',
 '421335007': 'Patient Cooling/Warming',
 '229585002': 'Patient Cooling/Warming',
 '398074008': 'Patient Cooling/Warming',
 '431774007': 'Patient Cooling/Warming',
 '431949004': 'Patient Cooling/Warming',
 '429283006': 'CPR',
 '441893003': 'CPR',
 '89666000': 'CPR',
 '425519007': 'CPR Discontinued',
 '18590009': 'Cardiac Pacing',
 '250980009': 'Cardioversion',
 '426220008': 'Defibrillation',
 '233169004': 'Defibrillation',
 '268400002': 'ECG',
 '428803005'

In [3]:
#medications

temp_meds = pd.read_excel('Complied list of ODEMSA Medications.xlsx').set_index(['Medications'])
temp_meds['RxCui'] = temp_meds['RxCui'].astype(str)
med_dict = {}
for group, row in temp_meds.iterrows():
    rxcui = row['RxCui']
    med_dict[rxcui] = group 
    
med_dict

{'828527': '10 ML calcium chloride 10 % Prefilled Syringe',
 '161': 'Acetaminophen (Tylenol)',
 '296': 'Adenosine (Adenocard)',
 '435': 'Albuterol (Proventil)',
 '214199': 'Albuterol 5mg / Ipratropium 0.5mg (DuoNeb)',
 '8410': 'Alteplase (Activase)',
 '703': 'Amiodarone (Cordarone)',
 'T195': 'Antibiotics',
 '1191': 'Aspirin (ASA)',
 '1223': 'Atropine',
 '153971': 'Atropine Sulfate',
 '205489': 'Bumetanide (Bumex)',
 '1901': 'Calcium Chloride',
 '197435': 'Calcium Gluconate 100 MG/ML Injectable Solution',
 '687707': 'CYANOKIT 5g',
 '575233': 'Dexamethasone (Decadron)',
 '1812079': 'Dexamethasone 10mg IV/IM',
 '48933': 'Dexamethasone 10MG PO',
 '4850': 'Dextrose',
 '237648': 'Dextrose 10 % Injectable Solution',
 '727518': 'Dextrose 25%',
 '1795250': 'Dextrose 5% in 0.45% NS',
 '237653': 'Dextrose 50 % Injectable Solution',
 '3322': 'Diazepam (Valium)',
 '3443': 'Diltiazem (Cardizem)',
 '3498': 'Diphenhydramine (Benadryl)',
 '3616': 'Dobutamine',
 '3628': 'Dopamine',
 '67108': 'Enoxapari

In [4]:
dataframes = {}

xl = pd.ExcelFile('RQ4 WITH MOST COMMON - Graph Set 2 Data.xlsx')
impressions = xl.sheet_names

for sheet in impressions:
    dataframes[f'{sheet}']= pd.read_excel(xl,sheet_name=sheet)
    
dataframes

{'Seizures':       Unnamed: 0                                           Sequence
 0              0  start,230040009,425543005,428803005,6960,6960,...
 1              1            start,398041008,392230005,428803005,end
 2              2                      start,392230005,428803005,end
 3              3  start,6960,425543005,392230005,125464,42880300...
 4              4               start,392230005,428803005,125464,end
 ...          ...                                                ...
 1748        1748  start,46825001,392230005,392230005,392230005,2...
 1749        1749  start,7806,46825001,268400002,392230005,6960,2...
 1750        1750  start,46825001,392230005,392230005,6960,7806,3...
 1751        1751                  start,46825001,392230005,6960,end
 1752        1752      start,386423001,285064,46825001,6960,7806,end
 
 [1753 rows x 2 columns],
 'Chest Pain (Angina Cardiac)':       Unnamed: 0                                           Sequence
 0              0               

In [5]:
#initialize graphs
graphs = {}

for sheet_name, data in dataframes.items():
    graph = {}
    #for each array of time series, start with adding 'start node' to graph
    for index, array_string in data['Sequence'].iteritems():
        array = array_string.split(",")
        for i in range(len(array)):
            node = array[i]
            if node in grouped_pros: 
                node = grouped_pros[node]
            elif node in med_dict:
                node = med_dict[node]
                
            if node not in graph: graph[node] = {}
            next_node_index = i + 1
            #if there is no next intervention
            if next_node_index == len(array): break #stop, you went through all nodes
            next_node = array[next_node_index]
            if next_node in grouped_pros: 
                next_node = grouped_pros[next_node]
            elif next_node in med_dict:
                next_node = med_dict[next_node]
                
            if next_node not in graph[node]:
                graph[node][next_node] = 1
            else:
                graph[node][next_node] += 1
    #after you go through the entire sheet
    graphs[sheet_name] = graph
                   
        
    

In [6]:
graphs

{'Seizures': {'start': {'Clear Airway': 11,
   'Stabilization/Splinting': 48,
   'IV/IO': 362,
   'Midazolam (Versed)': 140,
   'ECG': 893,
   'Normal saline': 20,
   'Ondansetron (Zofran)': 2,
   'Oxygen': 156,
   'Restraint Applied': 16,
   'ETCO2 Capnography': 35,
   'Intubation': 11,
   'Dextrose': 6,
   'Wound Care': 2,
   'BVM': 6,
   'Monitor/Care': 5,
   'Aspirin (ASA)': 2,
   'Naloxone (Narcan)': 7,
   'Glucose Check': 5,
   'Dextrose 10 % Injectable Solution': 4,
   'Bleeding Control': 6,
   'Open/Position Airway': 2,
   'CPR': 3,
   'CPAP/BiPAP': 1,
   'Glucagon (Glucagen)': 2,
   'Lorazepam (Ativan)': 1,
   'Generic Medication Placeholder': 1,
   'Haldol': 1,
   'Albuterol (Proventil)': 1,
   'Fentanyl': 2,
   'Cardiac Pacing': 1,
   'Albuterol 5mg / Ipratropium 0.5mg (DuoNeb)': 1},
  'Clear Airway': {'ETCO2 Capnography': 5,
   'ECG': 19,
   'end': 7,
   'Intubation': 4,
   'Glucose Check': 1,
   'BVM': 3,
   'Clear Airway': 1,
   'Oxygen': 4,
   'IV/IO': 8,
   'Midazolam (

In [7]:
#convert weights into percents
for impression, graph in graphs.items():
    #for each inner dictionary, sum up all the values
    #values into percent = current value/sum 

    for node, dictionary in graph.items():
        sum_weights = sum(dictionary.values())
        for node, weight in dictionary.items():
            dictionary[node] = weight/sum_weights
            
graphs

{'Seizures': {'start': {'Clear Airway': 0.006274957216200799,
   'Stabilization/Splinting': 0.027381631488876214,
   'IV/IO': 0.2065031374786081,
   'Midazolam (Versed)': 0.07986309184255562,
   'ECG': 0.5094124358243012,
   'Normal saline': 0.011409013120365089,
   'Ondansetron (Zofran)': 0.0011409013120365088,
   'Oxygen': 0.0889903023388477,
   'Restraint Applied': 0.00912721049629207,
   'ETCO2 Capnography': 0.019965772960638905,
   'Intubation': 0.006274957216200799,
   'Dextrose': 0.0034227039361095267,
   'Wound Care': 0.0011409013120365088,
   'BVM': 0.0034227039361095267,
   'Monitor/Care': 0.002852253280091272,
   'Aspirin (ASA)': 0.0011409013120365088,
   'Naloxone (Narcan)': 0.003993154592127781,
   'Glucose Check': 0.002852253280091272,
   'Dextrose 10 % Injectable Solution': 0.0022818026240730175,
   'Bleeding Control': 0.0034227039361095267,
   'Open/Position Airway': 0.0011409013120365088,
   'CPR': 0.0017113519680547634,
   'CPAP/BiPAP': 0.0005704506560182544,
   'Gluc

In [8]:
# #get labels from dictionaries

# label_df = pd.DataFrame()

# for f in ['Complied list of Procedures.xlsx', 'Complied list of ODEMSA Medications.xlsx']:
#     data = pd.read_excel(f)
#     label_df = label_df.append(data)
    
# label_df.columns = ['ID', 0, 1,2,3]
# label_df['ID'] = label_df['ID'].astype(str)
# label_df = label_df.set_index('ID')


# label_df

In [9]:
# #turn dataframe into dictionary

# labels = label_df.T.to_dict('list')

# labels

In [10]:
# #convert to dataframe - use multiindexing
# dfs = {}

# for impression, graph in graphs.items():
#     nodes = []
#     node_labels = []

#     next_node_in_sequence = []
#     next_node_labels = []
#     frequencies = []

    
#     for node, dictionary in graph.items():
#         node_label = ""
#         if node_label in labels: node_label = labels[node][0]
#         for next_node, frequency in dictionary.items():
#             next_node_label = ""
#             if next_node in labels: next_node_label = labels[next_node][0]
            
#             nodes.append(node)
#             node_labels.append(node_label)
#             next_node_in_sequence.append(next_node)
#             next_node_labels.append(next_node_label)
#             frequencies.append(frequency)
            
#     graph_df = pd.DataFrame(nodes, columns = ['Nodes'])
#     graph_df['Node label'] = node_labels
#     graph_df['Edges'] = next_node_in_sequence
#     graph_df['Edges label'] = next_node_labels
#     graph_df['Frequency'] = frequencies
#     graph_df = graph_df.set_index(['Nodes', 'Node label', 'Edges'])
    
#     dfs[impression] = graph_df
    


In [11]:
# dfs

In [12]:
#write out to excel

# writer = pd.ExcelWriter( 'RQ4 Markov Chain Frequencies.xlsx', engine='xlsxwriter')

# for impression, df in dfs.items():    
#     df.to_excel(writer, sheet_name=impression)
    
# writer.save()

In [13]:
# #make visualization
# import networkx as nx
# from networkx.drawing import nx_pydot
# from graphviz import Source
# from pyvis.network import Network
# form pyvis.options import Layout

# vis = {}
# for impression, graph in graphs.items():
    
# #     used = set()    
# #     nt = Network(directed=True, notebook=True)
    
# #     i = 0    
# #     for node, edge_list in graph.items():
# #         if node not in used: 
# #             nt.add_node(node, label=node, level = i)
# #             used.add(node)
# #             i += 1
# #         for edge, freq in edge_list.items():
# #             nt.add_node(edge)
# #             nt.add_edge(node, edge, weight = freq)
    
# #     nt.show("example.html")
    
#     #initialize graph
#     DG = nx.MultiDiGraph()

#     #add all nodes
#     DG.add_nodes_from(graph.keys())

#     #make edge list - format
#     edges = []
#     for node, edge_list in graph.items():
#         for edge, freq in edge_list.items():
#             edges.append((node,edge, {'label': round(freq,2)}))

#     #add all edges
#     DG.add_edges_from(edges)
    
#     net = Network(directed=True, notebook=True)

#     net.from_nx(DG)

#     net.show("example.html")

    
#     #draw to visual representation
#     nx_pydot.write_dot(DG, impression + ' multig.dot')
    
    
#     src = Source.from_file(impression + ' multig.dot')
#     vis[impression] = src
    
#     #save and render
#     src.render(impression+ ' graph.gv', view=True)
    
    

In [14]:
#     #add all edges
#     set_separation(nt, 100)

#     nt.show("example.html")

In [15]:
#filter

#make visualization
import networkx as nx
from networkx.drawing import nx_pydot
from graphviz import Source

vis = {}
for impression, graph in graphs.items():

    #initialize graph
    DG = nx.MultiDiGraph()

    #add all nodes
    for node in graph.keys():
        if node == 'start' or node == 'end':
            DG.add_node(node, style = 'filled')
        else:
            DG.add_node(node)

    #make edge list - format
    edges = []
    for node, edge_list in graph.items():
        for edge, freq in edge_list.items():
            if freq >= 2:  # identify edges with freq < 0.02
                edges.append((node,edge, {'label': round(freq,2)}))
    DG.add_edges_from(edges)
    
    # "backtracking" from the edges, delete any predecessors and successors disconnected from the start/end 
    remove = [node for node in nx.nodes(DG) if node != 'start' and node != 'end' and ('start' not in nx.all_neighbors(DG, node) or 'end' not in nx.all_neighbors(DG, node)) ]
    DG.remove_nodes_from(remove)

    
    #draw to visual representation
#     nx_pydot.write_dot(DG, impression + ' multig.dot')
    
#     src = Source.from_file(impression + ' multig.dot')
#     vis[impression] = src
    
#     #save and render
#     src.render('All nodes' + impression, view=True)
    
    