In [1]:
import os
import sys
import time

import numpy as np
import pandas as pd

# Import from the parent directory
sys.path.append(os.path.join(os.getcwd(), ".."))
from teamwork import teamwork as tw
from utils.utils import *

In [2]:
start_time = time.perf_counter()

discharge_table = pd.read_csv(
    discharges_test_file, parse_dates=["arrive_date", "discharge_date"]
)
notes_table = pd.read_csv(notes_test_file, parse_dates=["date"])
notes_table.rename(columns={"discharge_id": "id"}, inplace=True)

# match in admission datetime indexing on visit id from discharge table
notes_table = notes_table.merge(discharge_table, on="id", how="right")

In [9]:
import pprint

for x in notes_table.select_dtypes(include=["datetime64"]).columns.tolist():
    notes_table[x] = notes_table[x].astype(str)
notes_dict = notes_table.to_dict(orient="index")
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(notes_dict)

{   0: {   'age': 75,
           'arrive_date': '2019-01-01 00:00:00',
           'date': '2019-01-01 19:15:00',
           'discharge_date': '2019-01-01',
           'disposition': 1,
           'dr': 'Brad Palmer',
           'id': 0,
           'patient': 'patient1'},
    1: {   'age': 68,
           'arrive_date': '2019-01-24 00:00:00',
           'date': '2019-01-24 10:19:00',
           'discharge_date': '2019-01-24',
           'disposition': 0,
           'dr': 'Albert Romero',
           'id': 1,
           'patient': 'patient2'},
    2: {   'age': 68,
           'arrive_date': '2019-01-24 00:00:00',
           'date': '2019-01-24 17:09:00',
           'discharge_date': '2019-01-24',
           'disposition': 0,
           'dr': 'Margie Meyer',
           'id': 1,
           'patient': 'patient2'},
    3: {   'age': 68,
           'arrive_date': '2019-01-24 00:00:00',
           'date': '2019-01-24 16:48:00',
           'discharge_date': '2019-01-24',
           'disposition':

In [3]:
teamwork_columns = {
    tw.VISIT_ID: "id",
    tw.ADMISSION_DATE: "arrive_date",
    tw.NOTE_AUTHOR: "dr",
    tw.NOTE_DATE: "date",
}
corpus = tw.TeamworkCorpus(notes_table, **teamwork_columns)
stop_time = time.perf_counter()
print(
    f"It took {stop_time - start_time} seconds or {(stop_time - start_time) / 60} minutes to create teamwork corpus"
)

print(len(corpus.team_experience_dict))

It took 0.1147093000181485 seconds or 0.0019118216669691416 minutes to create teamwork corpus
1


In [4]:
vid = 6
experience_analysis_dict = {
    k: get_output_for_row(v["graph"], k, v["team"])
    for (k, v) in corpus.team_experience_dict.items()
}
experience_analysis_df = pd.DataFrame.from_dict(
    experience_analysis_dict, orient="index"
)
experience_analysis_df.head()

Unnamed: 0,visit_id,avg_clust,sum_clust,team_size,potential_edges,team_edge_size,cumulative_experience,avg_cumulative_experience
6,6,0.627976,3.139882,7,21,10,3.0,0.142857


In [5]:
admit_date = discharge_table.loc[discharge_table["id"] == vid, "arrive_date"].values[0]
cutoff_date = admit_date + np.timedelta64(2, "D")
print(f"admit  date: {admit_date}")
print(f"cutoff date: {cutoff_date}")
team_notes = notes_table.loc[
    (notes_table["date"] >= admit_date) & (notes_table["date"] < cutoff_date)
]
team_notes.drop_duplicates(subset=["dr"], inplace=True)
team_notes.head(20)

admit  date: 2019-04-15T19:15:00.000000000
cutoff date: 2019-04-17T19:15:00.000000000


Unnamed: 0,id,dr,date,patient,arrive_date,discharge_date,disposition,age
12,6,Albert Romero,2019-04-16 01:29:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
13,6,Margie Meyer,2019-04-15 20:19:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
14,6,Evan Frazier,2019-04-16 04:43:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
16,6,Victoria Washington,2019-04-16 07:00:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
18,6,Neil Mitchell,2019-04-15 21:25:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
19,6,Henry Philofsky,2019-04-16 01:25:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
20,6,Grant DeLong,2019-04-16 03:25:00,patient4,2019-04-15 19:15:00,2019-04-15,0,66
