## Eligibility for mobilization - Analysis

In [1]:
#! pip install pandas numpy duckdb seaborn matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import pyCLIF

Loaded configuration from config.json
{'site_name': 'UCMC', 'tables_path': '/Users/kavenchhikara/Desktop/CLIF/CLIF-UCMC/rclif/c19', 'file_type': 'parquet'}


In [14]:
final_df = pd.read_parquet('../output/intermediate/final_df.parquet')

In [8]:
final_df.dtypes

hospitalization_id       object
recorded_date            object
recorded_hour             int32
time_from_vent            int64
min_resp_rate_obs       float64
min_lpm_set             float64
min_fio2_set            float64
min_peep_set            float64
max_resp_rate_obs       float64
max_lpm_set             float64
max_fio2_set            float64
max_peep_set            float64
hourly_trach            float64
hourly_on_vent          float64
min_norepinephrine      float64
max_norepinephrine      float64
ne_calc_min             float64
ne_calc_max             float64
nicardipine_flag        float64
nitroprusside_flag      float64
clevidipine_flag        float64
red_meds_flag           float64
max_dbp                 float64
max_heart_rate          float64
max_map                 float64
max_respiratory_rate    float64
max_sbp                 float64
max_spo2                float64
min_dbp                 float64
min_heart_rate          float64
min_map                 float64
min_resp

### Patel et al. Criteria:

Cardio
* Mean arterial blood pressure: 65-110 mm Hg
* Systolic blood pressure: ≤ 200 mm Hg
* Heart rate: 40-130 beats per minute

Respiratory
* Respiratory rate: 5-40 breaths per minute
* Pulse oximetry: ≥ 88%

In [9]:
# Apply Patel et al. Criteria

# 1. Mean arterial blood pressure: 65-110 mm Hg
final_df['patel_map_flag'] = (
    (final_df['min_map'] >= 65) & (final_df['max_map'] <= 110)
).astype(int)

# 2. Systolic blood pressure: ≤ 200 mm Hg
final_df['patel_sbp_flag'] = (
    final_df['max_sbp'] <= 200
).astype(int)

# 3. Heart rate (Pulse): 40-130 beats per minute
final_df['patel_pulse_flag'] = (
    (final_df['min_heart_rate'] >= 40) & (final_df['max_heart_rate'] <= 130)
).astype(int)

# 4. Respiratory rate: 5-40 breaths per minute
final_df['patel_resp_rate_flag'] = (
    (final_df['min_respiratory_rate'] >= 5) & (final_df['max_respiratory_rate'] <= 40)
).astype(int)

# 5. Pulse oximetry (SpO2): ≥ 88%
final_df['patel_spo2_flag'] = (
    final_df['min_spo2'] >= 88
).astype(int)

# Resp flag: Combines respiratory rate and SpO2 criteria
final_df['patel_resp_flag'] = (
    final_df['patel_resp_rate_flag'] &
    final_df['patel_spo2_flag']
).astype(int)

# Cardio flag: Combines MAP, SBP, and Pulse criteria
final_df['patel_cardio_flag'] = (
    final_df['patel_map_flag'] &
    final_df['patel_sbp_flag'] &
    final_df['patel_pulse_flag']
).astype(int)

# Step 2: Create the overall Patel flag
final_df['patel_flag'] = (
    final_df['patel_map_flag'] &
    final_df['patel_sbp_flag'] &
    final_df['patel_pulse_flag'] &
    final_df['patel_resp_rate_flag'] &
    final_df['patel_spo2_flag']
)

### TEAM criteria

Cardio
* Heart rate: ≤ 150 bpm
* Most recent lactate: ≤ 4.0 mmol/L
* Noradrenaline infusion rate: 0.1-0.2 mcg/kg/min (inclusive)- I used calculated NE for this. 

Respiratory
* Sufficient respiratory stability:
    *  FiO2: ≤ 0.6
    *  PEEP: ≤ 16 cm H2O (use peep_observed)
* Current respiratory rate: ≤ 45 (use resp_rate_obs)

In [11]:
# 1. Heart rate: ≤ 150 bpm
final_df['team_pulse_flag'] = (
    final_df['max_heart_rate'] <= 150
).astype(int)

# 2. Most recent lactate: ≤ 4.0 mmol/L
final_df['team_lactate_flag'] = (
    final_df['lactate'] <= 4.0
).astype(int)

# 3. Noradrenaline infusion rate: 0.1-0.2 mcg/kg/min
final_df['team_ne_flag'] = (
    # (final_df['ne_calc_min'] >= 0.1) & (final_df['ne_calc_max'] <= 0.2)
    final_df['ne_calc_max'] <= 0.2
).astype(int)

# 4. Sufficient respiratory stability:
#    a. FiO2: ≤ 0.6
final_df['team_fio2_flag'] = (
    final_df['min_fio2_set'] <= 0.6
).astype(int)

#    b. PEEP: ≤ 16 cm H2O
final_df['team_peep_flag'] = (
    final_df['max_peep_set'] <= 16
).astype(int)

# 5. Current respiratory rate: ≤ 45
final_df['team_resp_rate_flag'] = (
    final_df['max_resp_rate_obs'] <= 45
).astype(int)

# Cardio flag: Combines heart rate, lactate, and norepinephrine criteria
final_df['team_cardio_flag'] = (
    final_df['team_pulse_flag'] &
    final_df['team_lactate_flag'] &
    final_df['team_ne_flag']
).astype(int)

# Resp flag: Combines FiO2, PEEP, and respiratory rate criteria
final_df['team_resp_flag'] = (
    final_df['team_fio2_flag'] &
    final_df['team_peep_flag'] &
    final_df['team_resp_rate_flag']
).astype(int)


# Step 2: Create the overall TEAM flag
final_df['team_flag'] = (
    final_df['team_pulse_flag'] &
    final_df['team_lactate_flag'] &
    final_df['team_ne_flag'] &
    final_df['team_fio2_flag'] &
    final_df['team_peep_flag'] &
    final_df['team_resp_rate_flag']
)

### Consensus criteria

* Green Criteria
    * Respiratory
        * Saturation  90% and
        * Respiratory rate ≤ 30 breaths/min
        * Current FiO2 ≤ 0.6 and
        * PEEP≤ 10cm H20
    * Cardiovascular:
        * Blood pressure greater than lower limit of target range (MAP 65+) while on no or low level of support (low support- define as <0.1 μg/kg/min of Norepi equivalents)
        * Heart rate <120 beats/min
        * lactate < 4mmol/L
        * HR > 40
* Yellow Criteria
    * Respiratory
        * Sat >= 90%
        * Current FiO2 >0.6
        * Respiratory rate >30breaths/min
        * PEEP >10cm H20
    * Cardiovascular
        * Blood pressure greater than lower limit of target range (MAP 65+) while receiving moderate level of support (medium-define as 0.1–0.3 μg/kg/min of Norepi equivalents)
        * Heart rate 120-150 beats/min
        * Shock of any cause with lactate >4mmol/L
        * HR > 40
* Red Criteria
    * Respiratory
        * Sat <90%
    * Cardiovascular
        * Below target MAP despite support (MAP <65) or
        * greater than lower limit MAP (MAP 65+) but on high level support (high defined as >0.3 μg/kg/min of Norepi equivalents)
        * IV therapy for hypertensive emergency (SBP >200mmHg or MAP >110 and on nicardipine, nitroprusside, or clevidipine gtt)
        * HR >150 bpm
        * Bradycardia <40


### Consensus criteria - redefined 

* all_red: All red subcomponents must be met.
* all_green: All green subcomponents must be met, and no red subcomponents are met.
* all_yellow: All yellow subcomponents must be met, no red subcomponents are met, and all green subcomponents are not met.
* any_yellow: Any yellow subcomponent is met, no green subcomponents are fully met, and no red subcomponents are met.
* any_yellow_or_green: Any yellow or green subcomponents are met, but no red subcomponents are met.

In [13]:
# Red Cardiovascular Criteria
final_df['red_resp_spo2_flag'] = (final_df['min_spo2'] < 90).astype(int)
final_df['red_map_flag'] = (final_df['min_map'] < 65).astype(int)
# High support (Norepinephrine equivalents > 0.3 μg/kg/min)
final_df['red_high_support_flag'] = (final_df['ne_calc_max'] > 0.3).astype(int)
# Hypertensive emergency criteria (SBP > 200 mmHg or MAP > 110 mmHg and on certain medications)
final_df['red_hypertensive_flag'] = (
    ((final_df['max_sbp'] > 200) | (final_df['max_map'] > 110)) &
    (final_df['nicardipine_flag'] | final_df['nitroprusside_flag'] | final_df['clevidipine_flag'])
).astype(int)
# High heart rate criteria (HR > 150 bpm)
final_df['red_pulse_high_flag'] = (final_df['max_heart_rate'] > 150).astype(int)
# Low heart rate criteria (HR < 40 bpm)
final_df['red_pulse_low_flag'] = (final_df['min_heart_rate'] < 40).astype(int)

# Yellow Respiratory Criteria
final_df['yellow_resp_spo2_flag'] = (final_df['min_spo2'] >= 90).astype(int)
final_df['yellow_fio2_flag'] = (final_df['min_fio2_set'] > 0.6).astype(int)
final_df['yellow_resp_rate_flag'] = (final_df['max_resp_rate_obs'] > 30).astype(int)
final_df['yellow_peep_flag'] = (final_df['min_peep_set'] > 10).astype(int)

# Yellow Cardiovascular Criteria
final_df['yellow_map_flag'] = (final_df['min_map'] >= 65).astype(int) & (final_df['ne_calc_max'].between(0.1, 0.3)).astype(int)
final_df['yellow_pulse_flag'] = (final_df['min_heart_rate'].between(120, 150)).astype(int)
final_df['yellow_lactate_flag'] = (final_df['lactate'] > 4).astype(int)

# Step 3: Implement Green Criteria
final_df['green_resp_spo2_flag'] = (final_df['min_spo2'] >= 90).astype(int)
final_df['green_resp_rate_flag'] = (final_df['max_resp_rate_obs'] <= 30).astype(int)
final_df['green_fio2_flag'] = (final_df['min_fio2_set'] <= 0.6).astype(int)
final_df['green_peep_flag'] = (final_df['min_peep_set'] <= 10).astype(int)

# Green Cardiovascular Criteria
final_df['green_map_flag'] = (final_df['min_map'] >= 65).astype(int) & (final_df['ne_calc_max'] < 0.1).astype(int)
final_df['green_pulse_flag'] = (final_df['min_heart_rate'] < 120).astype(int)
final_df['green_lactate_flag'] = (final_df['lactate'] < 4).astype(int)
final_df['green_hr_flag'] = (final_df['min_heart_rate'] > 40).astype(int)

TypeError: unsupported operand type(s) for |: 'float' and 'float'

In [12]:
# Step 1: Define 'all_red'
final_df['any_red'] = (
    final_df['red_resp_spo2_flag'] |
    final_df['red_map_flag'] |
    final_df['red_high_support_flag'] |
    final_df['red_hypertensive_flag'] |
    final_df['red_pulse_high_flag'] |
    final_df['red_pulse_low_flag']
).astype(int)

final_df['any_yellow'] = (
    (
        final_df['yellow_resp_spo2_flag'] |
        final_df['yellow_fio2_flag'] |
        final_df['yellow_resp_rate_flag'] |
        final_df['yellow_peep_flag'] |
        final_df['yellow_map_flag'] |
        final_df['yellow_pulse_flag'] |
        final_df['yellow_lactate_flag']
    )
).astype(int)

final_df['any_green'] = (
    final_df['green_resp_spo2_flag'] |
    final_df['green_resp_rate_flag'] |
    final_df['green_fio2_flag'] |
    final_df['green_peep_flag'] |
    final_df['green_map_flag'] |
    final_df['green_pulse_flag'] |
    final_df['green_lactate_flag'] |
    final_df['green_hr_flag'] 
).astype(int)


final_df['all_green'] = (
    final_df['green_resp_spo2_flag'] &
    final_df['green_resp_rate_flag'] &
    final_df['green_fio2_flag'] &
    final_df['green_peep_flag'] &
    final_df['green_map_flag'] &
    final_df['green_pulse_flag'] &
    final_df['green_lactate_flag'] &
    final_df['green_hr_flag'] 
).astype(int)

final_df['all_green_no_red'] = (
    final_df['green_resp_spo2_flag'] &
    final_df['green_resp_rate_flag'] &
    final_df['green_fio2_flag'] &
    final_df['green_peep_flag'] &
    final_df['green_map_flag'] &
    final_df['green_pulse_flag'] &
    final_df['green_lactate_flag'] &
    final_df['green_hr_flag'] &
    (final_df['any_red'] == 0)  # Ensure no red subcomponents are met
).astype(int)

final_df['all_green_no_red_yellow'] = (
    final_df['green_resp_spo2_flag'] &
    final_df['green_resp_rate_flag'] &
    final_df['green_fio2_flag'] &
    final_df['green_peep_flag'] &
    final_df['green_map_flag'] &
    final_df['green_pulse_flag'] &
    final_df['green_lactate_flag'] &
    final_df['green_hr_flag'] &
    (final_df['any_red'] == 0)  & # Ensure no red subcomponents are met
    (final_df['any_yellow'] == 0)  # Ensure no yellow subcomponents are met
).astype(int)


# Step 3: Define 'all_yellow_only' (all yellow subcomponents must be met, no red subcomponents, and no green subcomponents)
final_df['all_yellow_no_red_green'] = (
    final_df['yellow_resp_spo2_flag'] &
    final_df['yellow_fio2_flag'] &
    final_df['yellow_resp_rate_flag'] &
    final_df['yellow_peep_flag'] &
    final_df['yellow_map_flag'] &
    final_df['yellow_pulse_flag'] &
    final_df['yellow_lactate_flag'] &
    (final_df['any_red'] == 0) &  # Ensure no red subcomponents are met
    (final_df['any_green'] == 0)  # Ensure no green subcomponents are fully met
).astype(int)

# Step 4: Define 'any_yellow_only' (any yellow subcomponent is met, no green or red subcomponents are met)
final_df['any_yellow_no_red_green'] = (
    (
        final_df['yellow_resp_spo2_flag'] |
        final_df['yellow_fio2_flag'] |
        final_df['yellow_resp_rate_flag'] |
        final_df['yellow_peep_flag'] |
        final_df['yellow_map_flag'] |
        final_df['yellow_pulse_flag'] |
        final_df['yellow_lactate_flag']
    ) &
    (final_df['any_red'] == 0) &  # Ensure no red subcomponents are met
    (final_df['any_green'] == 0)  # Ensure no green subcomponents are fully met
).astype(int)

# Step 5: Define 'any_yellow_or_green' (any yellow or green subcomponent is met, but no red subcomponents are met)
final_df['any_yellow_or_green_no_red'] = (
    (
        final_df['yellow_resp_spo2_flag'] |
        final_df['yellow_fio2_flag'] |
        final_df['yellow_resp_rate_flag'] |
        final_df['yellow_peep_flag'] |
        final_df['yellow_map_flag'] |
        final_df['yellow_pulse_flag'] |
        final_df['yellow_lactate_flag'] |
        final_df['green_resp_spo2_flag'] |
        final_df['green_resp_rate_flag'] |
        final_df['green_fio2_flag'] |
        final_df['green_peep_flag'] |
        final_df['green_map_flag'] |
        final_df['green_pulse_flag'] |
        final_df['green_lactate_flag'] |
        final_df['green_hr_flag']
    ) &
    (final_df['any_red'] == 0)  # Ensure no red subcomponents are met
).astype(int)

final_df['yellow_resp_flag'] = (
    (
     final_df['yellow_resp_spo2_flag'] |
     final_df['yellow_fio2_flag'] |
     final_df['yellow_resp_rate_flag'] |
     final_df['yellow_peep_flag'] |
     final_df['green_resp_spo2_flag'] |
     final_df['green_resp_rate_flag'] |
     final_df['green_fio2_flag'] |
     final_df['green_peep_flag'] 
    )  &
    (final_df['any_red'] == 0)  # Ensure no red subcomponents are met
).astype(int)

final_df['yellow_cardio_flag'] = (
    (
    final_df['yellow_map_flag'] |
    final_df['yellow_pulse_flag'] |
    final_df['yellow_lactate_flag'] |
    final_df['green_map_flag'] |
    final_df['green_pulse_flag'] |
    final_df['green_lactate_flag'] |
    final_df['green_hr_flag']
    )&
    (final_df['any_red'] == 0)  # Ensure no red subcomponents are met
).astype(int)

KeyError: 'red_resp_spo2_flag'

In [None]:
# Optional: To summarize the results, you can print value counts for each flag
print(final_df[['any_red', 'any_yellow', 'any_green' ,  'all_green',
                'all_green_no_red', 'all_green_no_red_yellow', 'all_yellow_no_red_green', 
                'any_yellow_no_red_green','any_yellow_or_green_no_red' ]].sum())

In [None]:
## consensus criteria check

# Create a DataFrame to store the counts of criteria met
criteria_counts = final_df[['hospitalization_id', 'any_red', 'any_yellow', 'any_green' ,  'all_green',
                'all_green_no_red', 'all_green_no_red_yellow', 'all_yellow_no_red_green', 
                'any_yellow_no_red_green','any_yellow_or_green_no_red']].groupby('hospitalization_id').max()

# Plot the histograms
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
criteria_counts[['any_red', 'any_yellow', 'any_green' ,  'all_green',
                'all_green_no_red', 'all_green_no_red_yellow', 'all_yellow_no_red_green', 
                'any_yellow_no_red_green','any_yellow_or_green_no_red']].sum().plot(kind='bar', color=['red', 'yellow', 'green'])
plt.title('Number of Encounters Meeting Each Criterion')
plt.xlabel('Criteria')
plt.ylabel('Number of Encounters')

# Step 2: Check if any encounter met more than one criterion

# Add columns to check for overlaps
criteria_counts['multiple_flags'] = (criteria_counts[['any_red', 'any_yellow', 'any_green' ,  'all_green',
                'all_green_no_red', 'all_green_no_red_yellow', 'all_yellow_no_red_green', 
                'any_yellow_no_red_green','any_yellow_or_green_no_red']].sum(axis=1) > 1).astype(int)

# Histogram of encounters meeting more than one criterion
plt.subplot(1, 2, 2)
criteria_counts['multiple_flags'].value_counts().plot(kind='bar', color=['blue', 'orange'])
plt.title('Number of Encounters Meeting Multiple Criteria')
plt.xlabel('Met Multiple Criteria')
plt.ylabel('Number of Encounters')
plt.xticks([0, 1], ['No', 'Yes'], rotation=0)

plt.tight_layout()
plt.show()

## Final figures and tables

1. Figure 1: Percentage of encounter satisfying Patel, TEAM, and any yellow or GREEN criteria
2. Figure 2: Percentage of business hours each encounter was eligible for different criteria
3. Figure 3: Percentage of business hours not eligible for each criteria broken down by subcomponent failure


## Summary and Visualizations

In [None]:
# List of columns to keep for visualizations
columns_to_keep = [
    'hospitalization_id',
    'recorded_dttm',
    'recorded_date',
    'recorded_hour',
    'time',
    'time_biz',
    'patel_map_flag',
    'patel_sbp_flag',
    'patel_pulse_flag',
    'patel_resp_rate_flag',
    'patel_spo2_flag',
    'patel_flag',
    'team_pulse_flag',
    'team_lactate_flag',
    'team_ne_flag',
    'team_fio2_flag',
    'team_peep_flag',
    'team_resp_rate_flag',
    'team_flag',
    'red_resp_spo2_flag',
    'red_map_flag',
    'red_high_support_flag',
    'red_hypertensive_flag',
    'red_pulse_high_flag',
    'red_pulse_low_flag',
    'yellow_resp_spo2_flag',
    'yellow_fio2_flag',
    'yellow_resp_rate_flag',
    'yellow_peep_flag',
    'yellow_map_flag',
    'yellow_pulse_flag',
    'yellow_lactate_flag',
    'green_resp_spo2_flag',
    'green_resp_rate_flag',
    'green_fio2_flag',
    'green_peep_flag',
    'green_map_flag',
    # 'green_ne_flag',
    'green_pulse_flag',
    'green_lactate_flag',
    'green_hr_flag',
    'any_red',
    'any_yellow',
    'any_green',
    'all_green',
    'all_green_no_red',
    'all_green_no_red_yellow',
    'all_yellow_no_red_green',
    'any_yellow_no_red_green',
    'any_yellow_or_green_no_red',
    # ,'first_location_category', 
    # 'last_location_category',
    'hourly_on_vent',
    'patel_resp_flag',
    'patel_cardio_flag',
    'team_resp_flag',
    'team_cardio_flag',
    'yellow_resp_flag',
    'yellow_cardio_flag'
    
]

# Create a new DataFrame with only the specified columns
visualization_df = final_df[columns_to_keep]

In [None]:
def plot_criteria_histograms(df):
    criteria_flags = ['patel_flag', 'team_flag', 'all_green_no_red', 'any_yellow_or_green_no_red']

    plt.figure(figsize=(15, 10))

    for i, flag in enumerate(criteria_flags, 1):
        plt.subplot(3, 2, i)
        
        # Use sort_index() to ensure 0 is first and 1 is second
        df[flag].value_counts().sort_index().plot(kind='bar')
        
        plt.title(f'Distribution of {flag} Satisfaction')
        plt.xticks(rotation=0)
        plt.xlabel('Criteria Satisfaction')
        plt.ylabel('Number of hours')
        plt.xticks([0, 1], ['Not Satisfied', 'Satisfied'])  # Label the bars for clarity
    
    plt.tight_layout()
    plt.show()

# Call the function
plot_criteria_histograms(visualization_df)

### Average Total Hours Meeting Each Criterion (Patel, Green, Yellow, TEAM)

Calculate the total and average number of hours that an encounter meets each of the specified criteria.

Steps: 
1. Group the data by `hospitalization_id`.
2. For each encounter, sum the number of business hours (or total hours) that the patient meets the Patel, Green, Yellow, and TEAM criteria.
3. Compute the average number of hours across all `encounters`

In [None]:
def compute_avg_hours_by_criteria(df, criteria_columns):
    # For each encounter, sum the number of hours each criterion is met
    hours_criteria = df.groupby('hospitalization_id').agg({criterion: 'sum' for criterion in criteria_columns})
    
    # Calculate the mean number of hours for each criterion
    avg_hours_by_criteria = hours_criteria.mean().reset_index()
    avg_hours_by_criteria.columns = ['Criteria', 'Average Hours Met']
    
    return avg_hours_by_criteria

# Specify the columns for the different criteria
criteria_columns = ['patel_flag', 'team_flag', 'all_green_no_red', 'any_yellow_or_green_no_red']

# Calculate the average hours met for each criterion
avg_hours_by_criteria = compute_avg_hours_by_criteria(visualization_df, criteria_columns)

# Create the bar plot
plt.figure(figsize=(10, 6))
ax = sns.barplot(x='Criteria', y='Average Hours Met', data=avg_hours_by_criteria, color='skyblue')

# Add the average hours as labels on top of the bars
for i, row in avg_hours_by_criteria.iterrows():
    ax.text(i, row['Average Hours Met'] + 0.1, f'{row["Average Hours Met"]:.2f}', ha='center', color='black', fontsize=12)

# Customize the plot
plt.title('Average Number of Business Hours Each Encounter Was Eligible')
plt.xlabel('Criteria')
plt.ylabel('Average Number of Business Hours')
plt.xticks(rotation=45)

# Show the plot
plt.tight_layout()
plt.show()

### Total number of encounters meeting each criterion.

In [None]:
def count_encounters_meeting_criteria(df):
    total_encounters = df['hospitalization_id'].nunique()  # Calculate total unique encounters
    # For each encounter, check if they ever met the criteria and sum up
    criteria_counts = df.groupby('hospitalization_id').agg(
        patel_met=('patel_flag', lambda x: x.max()),
        team_met=('team_flag', lambda x: x.max()),
        all_green_no_red_met=('all_green_no_red', lambda x: x.max()),
        any_yellow_or_green_no_red_met=('any_yellow_or_green_no_red', lambda x: x.max()),
        any_red_met=('any_red', lambda x: x.max())
    ).sum().reset_index()

    criteria_counts.columns = ['Criteria', 'Number of Encounters']
    criteria_counts['Percentage'] = (criteria_counts['Number of Encounters'] / total_encounters) * 100
    
    return criteria_counts

# Generate the criteria comparison table
criteria_comparison_table = count_encounters_meeting_criteria(visualization_df)

# Plot the bar chart
plt.figure(figsize=(10, 6))
barplot = sns.barplot(x='Criteria', y='Number of Encounters', data=criteria_comparison_table, palette='viridis')
# Rotate the x-axis labels
plt.xticks(rotation=90)
# Add percentages on top of the bars
for index, row in criteria_comparison_table.iterrows():
    barplot.text(index, row['Number of Encounters'] + 0.5, f"{row['Percentage']:.1f}%", 
                 color='black', ha="center")

# Add labels and title
plt.xlabel('Criteria')
plt.ylabel('Number of Encounters')
plt.title('Total Number of Encounters Meeting Each Criterion')

plt.show()

### Time of day when different criteria are most likely to be met

In [None]:
def criteria_time_of_day(df):
    # Group by hour and calculate the mean of each flag, which gives us the proportion of time the criteria is met
    time_of_day_df = df.groupby('recorded_hour').agg(
        patel_met=('patel_flag', 'mean'),
        team_met=('team_flag', 'mean'),
        all_green_no_red=('all_green_no_red', 'mean'),
        any_yellow_or_green_no_red=('any_yellow_or_green_no_red', 'mean'),
        any_red=('any_red', 'mean')
    ).reset_index()
    
    # Convert proportions to percentages
    time_of_day_df[['patel_met', 'team_met', 'all_green_no_red', 'any_yellow_or_green_no_red', 'any_red']] *= 100
    
    # Plotting
    plt.figure(figsize=(12, 8))
    sns.lineplot(data=time_of_day_df, x='recorded_hour', y='patel_met', label='Patel Criteria', marker='o')
    sns.lineplot(data=time_of_day_df, x='recorded_hour', y='team_met', label='TEAM Criteria', marker='o')
    sns.lineplot(data=time_of_day_df, x='recorded_hour', y='all_green_no_red', label='Green Criteria', marker='o')
    sns.lineplot(data=time_of_day_df, x='recorded_hour', y='any_yellow_or_green_no_red', label='Yellow Criteria', marker='o')
    sns.lineplot(data=time_of_day_df, x='recorded_hour', y='any_red', label='Red Criteria', marker='o')

    # Adding labels and title
    plt.xlabel('Hour of Day')
    plt.ylabel('Percentage of Time Criteria Met (%)')
    plt.title('Comparison of Criteria Satisfaction by Time of Day')
    plt.xticks(range(8, 17))  # Ensure all hours are shown
    plt.legend(title='Criteria')
    plt.grid(True)
    plt.show()

# Call the function with visualization_df
criteria_time_of_day(visualization_df)

### Subcomponent Failure

In [None]:
team_subcomponents = [
    'team_pulse_flag', 'team_lactate_flag', 'team_ne_flag',
    'team_fio2_flag', 'team_peep_flag', 'team_resp_rate_flag'
]
team_subcomponents = [
    'team_pulse_flag', 'team_lactate_flag', 'team_ne_flag',
    'team_fio2_flag', 'team_peep_flag', 'team_resp_rate_flag'
]

failed_df = visualization_df[visualization_df['team_flag'] == 0]
failure_rates = failed_df[team_subcomponents].mean() * 100

In [None]:
def analyze_subcomponent_failures(df, criterion_flag, subcomponents):
    # Filter the DataFrame to encounters where the criterion failed
    failed_df = df[df[criterion_flag] == 0]
    
    # Calculate the failure rate for each subcomponent
    failure_rates = (failed_df[subcomponents] == 0).mean() * 100
    
    # Create a summary DataFrame
    summary_df = pd.DataFrame({
        'Subcomponent': subcomponents,
        'Failure Rate (%)': failure_rates
    }).sort_values(by='Failure Rate (%)', ascending=False)
    
    return summary_df

# Define subcomponents for each criterion
patel_subcomponents = [
    'patel_map_flag', 'patel_sbp_flag', 'patel_pulse_flag',
    'patel_resp_rate_flag', 'patel_spo2_flag'
]

team_subcomponents = [
    'team_pulse_flag', 'team_lactate_flag', 'team_ne_flag',
    'team_fio2_flag', 'team_peep_flag', 'team_resp_rate_flag'
]

green_subcomponents = [
    'green_resp_spo2_flag', 'green_resp_rate_flag', 'green_fio2_flag',
    'green_peep_flag', 'green_map_flag',  'green_pulse_flag'
]

# Analyze failures for Patel, TEAM, and Green criteria
patel_failure_summary = analyze_subcomponent_failures(visualization_df, 'patel_flag', patel_subcomponents)
team_failure_summary = analyze_subcomponent_failures(visualization_df, 'team_flag', team_subcomponents)
green_failure_summary = analyze_subcomponent_failures(visualization_df, 'all_green_no_red', green_subcomponents)

# Plot the results for each criterion
def plot_failure_summary(failure_summary, criterion_name):
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Failure Rate (%)', y='Subcomponent', data=failure_summary, palette='viridis')
    plt.title(f'Subcomponent Failures for {criterion_name} Criterion')
    plt.xlabel('Failure Rate (%)')
    plt.ylabel('Subcomponent')
    plt.show()

# Plot for Patel Criterion
plot_failure_summary(patel_failure_summary, 'Patel')

# Plot for TEAM Criterion
plot_failure_summary(team_failure_summary, 'TEAM')

# Plot for Green Criterion
plot_failure_summary(green_failure_summary, 'Green')

###  Average number of business hours each encounter is eligible

In [None]:
# Count the number of business hours each encounter was eligible for each criterion by counting where the flag is 1
hours_df = visualization_df.groupby('hospitalization_id').agg(
    patel_hours=('patel_flag', 'sum'),
    team_hours=('team_flag', 'sum'),
    green_hours=('all_green_no_red', 'sum'),
    yellow_hours=('any_yellow_or_green_no_red', 'sum'),
    red_hours=('any_red', 'sum')
).reset_index()

# Calculate the average number of hours across all encounters
avg_hours_df = hours_df[['patel_hours', 'team_hours', 'green_hours', 'yellow_hours', 'red_hours']].mean()

# Sort the results in the order Patel, TEAM, Green, Yellow, Red
avg_hours_df = avg_hours_df[['patel_hours', 'team_hours', 'green_hours', 'yellow_hours', 'red_hours']]

In [None]:
# Bar Chart
plt.figure(figsize=(10, 6))
avg_hours_df.plot(kind='bar', color='skyblue')
plt.title('Average Number of Business Hours Each Encounter Was Eligible')
plt.xlabel('Criteria')
plt.ylabel('Average Number of Business Hours')
plt.xticks(rotation=45)
plt.show()

In [None]:
avg_hours_df

### Average Hours Criteria Met on Days 1, 2, and 3

Determine how many hours the criteria are met on specific calendar days (Day 1, Day 2, Day 3 after intubation).

1. First, assign a calendar_day column that represents the calendar day relative to intubation.
2. Use the recorded_dttm to calculate the difference from the intubation time, and categorize rows into Day 1, Day 2, Day 3.
3. For each encounter, group the data by calendar_day and hospitalization_id and sum the hours that meet each criterion.
4. Compute the average number of hours for each criterion per day.


In [None]:
visualization_df = pd.merge(visualization_df, vent_start_end[['hospitalization_id', 'vent_start_time']], on='hospitalization_id', how='left')
# Ensure both 'vent_start_time' and 'recorded_dttm' are in the same datetime64[ns] format
visualization_df['vent_start_time'] = pd.to_datetime(visualization_df['vent_start_time'].values.astype('datetime64[ns]'))
visualization_df['recorded_dttm'] = pd.to_datetime(visualization_df['recorded_dttm'].values.astype('datetime64[ns]'))

# Check the data types to confirm
print(visualization_df[['vent_start_time', 'recorded_dttm']].dtypes)

In [None]:
def assign_calendar_day(df, intubation_col, recorded_col):
    # Calculate the difference in days between intubation and recorded time
    df['calendar_day'] = (df[recorded_col] - df[intubation_col]).dt.days + 1
    return df

# Assign calendar day for each encounter
visualization_df = assign_calendar_day(visualization_df, 'vent_start_time', 'recorded_dttm')

# Verify the results
print(visualization_df[['hospitalization_id', 'vent_start_time', 'recorded_dttm', 'calendar_day']].head())

In [None]:
def compute_avg_hours_by_day(df, criteria_columns):
    # Group by hospitalization_id and calendar_day
    hours_per_day = df.groupby(['hospitalization_id', 'calendar_day']).agg({criterion: 'sum' for criterion in criteria_columns}).reset_index()
    
    # Filter for Day 1, Day 2, Day 3
    hours_per_day = hours_per_day[hours_per_day['calendar_day'].isin([1, 2, 3])]
    
    # Calculate the average number of hours for each day
    avg_hours_by_day = hours_per_day.groupby('calendar_day').mean().reset_index()
    return avg_hours_by_day

# Calculate the average number of hours each criterion is met on Day 1, 2, and 3
avg_hours_by_day = compute_avg_hours_by_day(visualization_df, criteria_columns)


In [None]:
def plot_avg_hours_by_day_bar(avg_hours_by_day, criteria_columns):
    # Melt the DataFrame for easier plotting with seaborn
    melted_df = avg_hours_by_day.melt(id_vars='calendar_day', value_vars=criteria_columns, var_name='Criteria', value_name='Average Hours Met')

    plt.figure(figsize=(10, 6))
    
    # Create a bar plot
    sns.barplot(x='calendar_day', y='Average Hours Met', hue='Criteria', data=melted_df, palette='viridis')
    
    # Add custom x-axis labels for Day 1, Day 2, Day 3
    plt.xticks(ticks=[0, 1, 2], labels=["Day 1", "Day 2", "Day 3"])
    
    # Add labels and title
    plt.title('Average Number of Hours Criteria Met by Day')
    plt.xlabel('Calendar Day')
    plt.ylabel('Average Hours Met')
    
    # Move the legend to the bottom
    plt.legend(title='Criteria', loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=3)
    
    # Show plot
    plt.tight_layout()
    plt.show()

# Plot the average hours by day using a bar plot
plot_avg_hours_by_day_bar(avg_hours_by_day, criteria_columns)

In [None]:


# Create a DataFrame for parallel categories plot
parallel_df = visualization_df[['patel_flag', 'team_flag',  'all_green_no_red', 'any_yellow_or_green_no_red']]

# Create parallel categories plot
fig = px.parallel_categories(parallel_df, dimensions=['patel_flag', 'team_flag', 'all_green_no_red', 'any_yellow_or_green_no_red'],
                             color="patel_flag",
                             labels={'patel_flag': 'Patel Met', 'team_flag': 'TEAM Met',  'all_green_no_red': 'Green Flag', 'any_yellow_or_green_no_red': 'Yellow Flag'},
                             color_continuous_scale=px.colors.sequential.Inferno)

fig.update_layout(title="Parallel Categories Plot: Comparison of Criteria Satisfaction and Location")
fig.show()