In [1]:
from datetime import datetime, date, timedelta
import pandas as pd
import networkx as nx
from itertools import combinations  
import numpy as np

class CareTeam:
    def __init__(self, notes_in_window, discharge_id, care_team):
        self.notes_in_window = notes_in_window  
        self.discharge_id = discharge_id
        self.care_team = care_team
        self.care_team_edges = [sorted(edge) for edge in list(combinations(care_team, 2))]
        self.G = nx.Graph()
        self.unique_dates = notes_in_window.date.unique()
        self.__create_graph()
    
    def __create_graph(self):
        for note_date in self.unique_dates:
            notes_for_date = self.notes_in_window.query('date == @note_date')
            discharge_ids_for_date = notes_for_date.discharge_id.unique()
            for discharge_id in discharge_ids_for_date:
                drs_for_discharge_id = notes_for_date.query('discharge_id == @discharge_id').dr.unique()
                care_team_edges_for_discharge_id = [edge for edge in list(combinations(drs_for_discharge_id, 2)) 
                                                    if sorted(edge) in self.care_team_edges]
                for edge in care_team_edges_for_discharge_id: 
                    self.__add_edge_to_G(edge)
        
    def __add_edge_to_G(self, edge):
        data = self.G.get_edge_data(*edge, default=None)
        weight = 1 if data is None else data['weight'] + 1
        self.G.add_edge(*edge, weight=weight)

class CareDate:
    def __init__(self, notes_in_window, notes_for_care_date):
        self.notes_in_window = notes_in_window
        self.notes_for_care_date = notes_for_care_date
        self.care_team_dict = {}
        self.__populate_care_team_dict()
    
    def __populate_care_team_dict(self):
        discharge_ids_for_date = self.notes_for_care_date.discharge_id.unique()
        for discharge_id in discharge_ids_for_date:
            drs_for_discharge_id = self.notes_for_care_date.query('discharge_id == @discharge_id').dr.unique()
            self.care_team_dict[discharge_id] = drs_for_discharge_id
        
    def __iter__(self):
        for discharge_id, care_team in self.care_team_dict.items():
            yield CareTeam(self.notes_in_window, discharge_id, care_team)
    
class StudyWindowManager:
    def __init__(self, notes, window_in_days, step_in_days):
        notes.sort_values('date', inplace=True)
        self.notes = notes
        self.DELTA = np.timedelta64(window_in_days, 'D')
        self.STEP = np.timedelta64(step_in_days, 'D')
        first_date = notes['date'].iloc[0] 
        last_date = notes['date'].iloc[-1]
        self.date_range = np.arange(first_date, last_date - self.DELTA, self.STEP)
        
    def __iter__(self):
        for start_date in self.date_range:
            end_date = start_date + self.DELTA 
            date_of_care = end_date + self.STEP 
            notes_in_window = self.notes.query('date >= @start_date & date <= @end_date')
            notes_for_care_date = self.notes.query('date > @end_date & date <= @date_of_care')
            num_rows = len(notes_for_care_date.index)
            if num_rows == 0: continue
            yield CareDate(notes_in_window, notes_for_care_date)
            

In [2]:
notes = pd.read_csv('../data/notes_test.csv', parse_dates=[2])

def get_output(care_team):
    coefficients = nx.clustering(care_team.G)# Clustering coefficient of all nodes (in a dictionary)
    # Average clustering coefficient with divide-by-zero check
    avg_clust = sum(coefficients.values()) / len(coefficients) if len(coefficients) > 0 else 0  
    experience = care_team.G.size(weight='weight') #Experience as sum of weights
    team_edge_size = care_team.G.number_of_edges()
    cumulative_experience = experience - team_edge_size
    avg_cumulative_experience = cumulative_experience / len(care_team.care_team)#Average Cumulative Experience

    return {
        'discharge_id': care_team.discharge_id,
        'avg_clust': avg_clust,
        'cumulative_experience': cumulative_experience,
        'avg_cumulative_experience': avg_cumulative_experience,
        "team_edge_size": team_edge_size,
        "team_size": care_team.G.number_of_nodes()
    }

WINDOW = 90
STEP = 2

get_care_dates = StudyWindowManager(notes, WINDOW, STEP)

output = pd.DataFrame(columns=['discharge_id',
        'avg_clust',
        'cumulative_experience',
        'avg_cumulative_experience',
        'team_edge_size',
        'team_size'])
    
for care_date in get_care_dates:
    print(f'Number of care teams: {len(care_date.care_team_dict)}')
    for care_team in care_date:
        output = output.append(get_output(care_team), ignore_index=True)
        
print(output)

Number of care teams: 1
{'discharge_id': 6, 'avg_clust': 1.0, 'cumulative_experience': 6.0, 'avg_cumulative_experience': 0.8571428571428571, 'team_edge_size': 21, 'team_size': 7}
Number of care teams: 1
{'discharge_id': 7, 'avg_clust': 0, 'cumulative_experience': 0.0, 'avg_cumulative_experience': 0.0, 'team_edge_size': 0, 'team_size': 0}
   discharge_id  avg_clust  cumulative_experience  avg_cumulative_experience  \
0           6.0        1.0                    6.0                   0.857143   
1           7.0        0.0                    0.0                   0.000000   

   team_edge_size  team_size  
0            21.0        7.0  
1             0.0        0.0  
