## Define classes to organize network creation logic
One collaboration graph is built for the whole 90 day window, so this may need to be refactored to do the coefficient calculations per team per day.

In [1]:
import random
from datetime import datetime, date, timedelta
from itertools import chain
import pandas as pd
import networkx as nx
from itertools import combinations  
import numpy as np
import time

class CollaborationNetworkGraph:
    def __init__(self, note_df):
        """notes_df is assumed to be sorted by date"""
        self.note_df = note_df
        self.G = nx.Graph()
        self.unique_dates = note_df.date.unique()
        self.__create_graph()
        
    def __create_graph(self):
        for note_date in self.unique_dates:
            notes_for_date = self.note_df.query('date == @note_date')
            discharge_ids_for_date = notes_for_date.discharge_id.unique()
            for discharge_id in discharge_ids_for_date:
                drs_for_discharge_id = notes_for_date.query('discharge_id == @discharge_id').dr.unique()
                for comb in list(combinations(drs_for_discharge_id, 2)): self.__add_edge_to_G(*comb)
                # this is where the calculations may need to go...
                # this method could be public and yield results for each team
        
    def __add_edge_to_G(self, name1, name2):
        edge = (name1, name2)
        data = self.G.get_edge_data(*edge, default=None)
        weight = 1 if data is None else data['weight'] + 1
        self.G.add_edge(*edge, weight=weight)
        
class GraphGenerator:
    def __init__(self, note_df, window_in_days, step_in_days):
        note_df.sort_values('date', inplace=True)
        self.note_df = note_df
        self.DELTA = np.timedelta64(window_in_days, 'D')
        step = np.timedelta64(step_in_days, 'D')
        first_date = note_df['date'].iloc[0]
        last_date = note_df['date'].iloc[-1]
        self.date_range = np.arange(first_date, last_date - self.DELTA, step)
        
    def __iter__(self):
        for start_date in self.date_range:
            end_date = start_date + self.DELTA
            notes_in_window = self.note_df.query('date >= @start_date & date <= @end_date')
            graph = CollaborationNetworkGraph(notes_in_window)
            yield graph

In [3]:
note_df = pd.read_csv('./data/notes.csv', parse_dates=[2])

WINDOW = 90
STEP = 7

get_graphs = GraphGenerator(note_df, WINDOW, STEP)
    
for graph in get_graphs:
    clust_coefficients = nx.clustering(graph.G)
    print(clust_coefficients)
    

{'Myrtle George': 0.9444444444444444, 'Victoria Washington': 0.9444444444444444, 'Mercedes Arias': 0.9444444444444444, 'Albert Romero': 0.9444444444444444, 'Margie Meyer': 1.0, 'Evan Frazier': 0.9444444444444444, 'Jorge Garcia': 0.9444444444444444, 'Brad Palmer': 1.0, 'Neil Mitchell': 0.9444444444444444, 'Allan Murphy': 1.0}
{'Neil Mitchell': 0.8571428571428571, 'Jorge Garcia': 0.8333333333333334, 'Evan Frazier': 0.9285714285714286, 'Brad Palmer': 0.8571428571428571, 'Mercedes Arias': 0.9285714285714286, 'Albert Romero': 0.8333333333333334, 'Myrtle George': 0.9285714285714286, 'Victoria Washington': 0.8333333333333334, 'Allan Murphy': 1.0, 'Margie Meyer': 0.9}
{'Evan Frazier': 0.9285714285714286, 'Mercedes Arias': 0.9285714285714286, 'Jorge Garcia': 0.8333333333333334, 'Brad Palmer': 0.8571428571428571, 'Myrtle George': 0.9285714285714286, 'Victoria Washington': 0.8333333333333334, 'Allan Murphy': 1.0, 'Neil Mitchell': 0.8571428571428571, 'Margie Meyer': 0.9, 'Albert Romero': 0.8333333