In [8]:
import pandas as pd
import numpy as np
import networkx as nx
from itertools import combinations 
import time
import os
import sys

# Import from the parent directory
sys.path.append(os.path.join(os.getcwd(), '..'))
from utils import utils

In [9]:
notes = pd.read_csv(utils.notes_with_disposition_large_file, parse_dates=[2])

notes.sort_values('date', inplace=True)
DELTA = np.timedelta64(90, 'D')
STEP = np.timedelta64(2, 'D')
first_date = notes['date'].iloc[0] 
last_date = notes['date'].iloc[-1]
date_range = np.arange(first_date, last_date, STEP)

edge_to_date_dict = dict()
date_to_discharge_dict = dict()
date_discharge_to_careteam_dict = dict()

start_time = time.perf_counter()

def hash_edge(edge):
    edge = sorted(edge)
    return f"{edge[0]}{edge[1]}"

def hash_date_discharge(date, discharge_id):
    return f"{date}{discharge_id}"

for date in date_range:
    plus_step = date + STEP
    notes_for_date = notes.query('date >= @date & date <= @plus_step')
    num_rows = len(notes_for_date.index)
    if num_rows == 0: continue
    discharge_ids_for_date = notes_for_date.discharge_id.unique()
    date_to_discharge_dict.setdefault(date,[]).extend(discharge_ids_for_date)
    for discharge_id in discharge_ids_for_date:
            care_team = notes_for_date.query('discharge_id == @discharge_id').dr.unique()
            care_team_edges = [edge for edge in list(combinations(care_team, 2))]
            date_discharge_to_careteam_dict[hash_date_discharge(date,discharge_id)] = care_team_edges
            for edge in care_team_edges: 
                hashed_edge = hash_edge(edge)
                edge_to_date_dict.setdefault(hashed_edge,[]).append(date)

careteam_date_range = np.arange(first_date + DELTA, last_date, STEP)
for date in careteam_date_range:
    discharge_ids = date_to_discharge_dict[date]
    for discharge_id in discharge_ids:
        careteam_edges = date_discharge_to_careteam_dict[hash_date_discharge(date,discharge_id)]
        graph = nx.Graph()
        for edge in careteam_edges:
            hashed_edge = hash_edge(edge)
            weight = len([d for d in edge_to_date_dict[hashed_edge] if d < date and d >= date-DELTA])
            if weight > 0: graph.add_edge(*edge, weight=weight)
        print(graph.number_of_edges())
            
stop_time = time.perf_counter()               
print(f"It took {stop_time - start_time} seconds or {(stop_time - start_time) / 60} minutes"
     + f" to process a total of {len(notes.index)} notes.") 


45
45
45
15
3
3
15
0
1
36
0
7
0
3
0
8
0
2
2
3
3
0
4
5
1
0
2
0
1
3
21
21
15
45
36
3
6
6
10
10
15
3
0
6
0
5
0
9
3
0
9
6
8
9
1
4
11
8
1
2
0
9
0
5
3
10
2
1
0
5
1
45
45
45
36
45
45
45
15
21
0
36
0
15
28
36
10
3
0
28
3
5
6
0
0
2
0
0
1
7
4
3
2
0
0
0
2
1
0
0
0
0
0
15
45
36
28
28
3
0
1
3
6
3
1
3
0
3
5
0
3
1
0
1
3
1
3
1
4
0
3
2
0
1
4
1
4
4
2
0
8
3
1
4
0
8
1
0
0
0
28
21
21
21
28
0
36
3
1
28
1
45
6
0
3
0
0
1
9
0
8
4
1
0
5
0
0
0
0
1
0
7
0
3
3
5
0
4
6
0
0
0
3
0
2
6
1
21
0
3
15
45
1
28
28
3
1
6
10
3
6
1
1
0
3
4
4
1
0
0
2
0
3
1
0
0
4
8
1
10
0
9
0
0
4
36
45
6
36
1
45
3
3
45
8
1
0
2
12
0
7
9
3
10
2
1
1
0
0
5
2
10
9
7
3
0
2
5
0
4
6
36
6
36
36
28
21
3
21
45
21
36
2
1
1
0
0
2
1
0
8
5
0
4
2
1
4
2
5
8
0
1
45
21
6
28
10
6
28
15
36
45
1
28
1
9
0
0
7
0
6
1
1
3
1
1
0
1
1
6
6
3
3
8
0
5
4
0
28
36
15
15
36
1
21
15
1
0
5
0
4
1
2
0
6
0
2
5
4
6
5
0
8
5
5
4
4
9
0
9
2
1
1
0
36
36
36
1
45
28
45
10
15
36
1
36
28
10
3
0
4
4
4
2
2
1
0
1
2
2
8
1
7
1
3
2
0
1
5
0
3
0
36
10
36
6
21
6
0
3
6
0
10
0
3
0
0
4
9
6
4
1
0
0
0
9
1
0
0
7