In [1]:
import random
from datetime import datetime, date, timedelta
from itertools import chain
import pandas as pd
import networkx as nx
from itertools import combinations  
import numpy as np
import time

class CareTeam:
    def __init__(self, note_df, discharge_id, care_team):
        self.note_df = note_df  
        self.discharge_id = discharge_id
        self.care_team = care_team
        self.care_team_edges = [sorted(edge) for edge in list(combinations(care_team, 2))]
        self.G = nx.Graph()
        self.unique_dates = note_df.date.unique()
        self.__create_graph()
    
    def __create_graph(self):
        for note_date in self.unique_dates:
            notes_for_date = self.note_df.query('date == @note_date')
            discharge_ids_for_date = notes_for_date.discharge_id.unique()
            for discharge_id in discharge_ids_for_date:
                drs_for_discharge_id = notes_for_date.query('discharge_id == @discharge_id').dr.unique()
                care_team_edges_for_discharge_id = [edge for edge in list(combinations(drs_for_discharge_id, 2)) 
                                                    if sorted(edge) in self.care_team_edges]
                for edge in care_team_edges_for_discharge_id: 
                    self.__add_edge_to_G(edge)
        
    def __add_edge_to_G(self, edge):
        data = self.G.get_edge_data(*edge, default=None)
        weight = 1 if data is None else data['weight'] + 1
        self.G.add_edge(*edge, weight=weight)

class CareDate:
    def __init__(self, note_df, notes_for_care_date, date_of_care):
        self.note_df = note_df
        self.notes_for_care_date = notes_for_care_date
        self.date_of_care = date_of_care
        self.care_team_dict = {}
        self.__create_care_team_dict()
    
    def __create_care_team_dict(self):
        discharge_ids_for_date = self.notes_for_care_date.discharge_id.unique()
        for discharge_id in discharge_ids_for_date:
            drs_for_discharge_id = self.notes_for_care_date.query('discharge_id == @discharge_id').dr.unique()
            self.care_team_dict[discharge_id] = drs_for_discharge_id
        
    def __iter__(self):
        for discharge_id, care_team in self.care_team_dict.items():
            yield CareTeam(self.note_df, discharge_id, care_team)
    
class TeamworkStudy:
    def __init__(self, note_df, window_in_days, step_in_days):
        note_df.sort_values('date', inplace=True)
        self.note_df = note_df
        self.DELTA = np.timedelta64(window_in_days, 'D')
        step = np.timedelta64(step_in_days, 'D')
        first_date = note_df['date'].iloc[0] 
        last_date = note_df['date'].iloc[-1]
        self.date_range = np.arange(first_date, last_date - self.DELTA, step)
        
    def __iter__(self):
        for start_date in self.date_range:
            end_date = start_date + self.DELTA
            date_of_care = end_date + np.timedelta64(1, 'D')
            notes_in_window = self.note_df.query('date >= @start_date & date <= @end_date')
            notes_for_care_date = self.note_df.query('date == @date_of_care')
            num_rows = len(notes_for_care_date.index)
            if num_rows == 0: continue
            yield CareDate(notes_in_window, notes_for_care_date, date_of_care)
            

In [2]:
note_df = pd.read_csv('../data/notes_test.csv', parse_dates=[2])

def get_output(care_team):
    coefficients = nx.clustering(care_team.G)# Clustering coefficient of all nodes (in a dictionary)
    # Average clustering coefficient with divide-by-zero check
    avg_clust = sum(coefficients.values()) / len(coefficients) if len(coefficients) > 0 else 0  
    experience = care_team.G.size(weight='weight') #Experience as sum of weights
    team_edge_size = care_team.G.number_of_edges()
    cumulative_experience = experience - team_edge_size
    avg_cumulative_experience = cumulative_experience / len(care_team.care_team)#Average Cumulative Experience

    return {
        'discharge_id': care_team.discharge_id,
        'avg_clust': avg_clust,
        'cumulative_experience': cumulative_experience,
        'avg_cumulative_experience': avg_cumulative_experience,
        "team_edge_size": team_edge_size,
        "team_size": care_team.G.number_of_nodes()
    }

WINDOW = 90
STEP = 1

get_care_dates = TeamworkStudy(note_df, WINDOW, STEP)
    
for care_date in get_care_dates:
    print(f'Care Date: {care_date.date_of_care}')
    for care_team in care_date:
        print(get_output(care_team))

Care Date: 2019-04-15T00:00:00.000000
{'discharge_id': 6, 'avg_clust': 1.0, 'cumulative_experience': 6.0, 'avg_cumulative_experience': 0.8571428571428571, 'team_edge_size': 21, 'team_size': 7}
Care Date: 2019-08-15T00:00:00.000000
{'discharge_id': 7, 'avg_clust': 0, 'cumulative_experience': 0.0, 'avg_cumulative_experience': 0.0, 'team_edge_size': 0, 'team_size': 0}
