# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
import modules.data_visualization_utils as dvu
from importlib import reload

# my_computer_fpath = "C:\\Users\\dfber\\OneDrive - Mass General Brigham\\Epidural project\\Data\\"
my_computer_fpath = "C:\\Users\\User\\OneDrive - Mass General Brigham\\Epidural project\\Data\\"

# Load Data

In [None]:
df = pd.read_csv(my_computer_fpath + 'processed_and_imputed_merlin_data.csv') 

In [None]:
# Filter the DataFrame to include only epidural-only catheter procedures
neuraxial_catheter_df = df
epidural_df = df[(df['true_procedure_type_incl_dpe'] == 'epidural') | (df['true_procedure_type_incl_dpe'] == 'dpe')]

# Correlation Matrix

In [None]:
related_groups = {
    'failure': ['failed_catheter','has_subsequent_neuraxial_catheter','has_subsequent_spinal','has_subsequent_airway'],
    'timing': ['placement_to_delivery_hours','rom_thru_delivery_hours','rom_to_placement_hours'],
    'maternal_age_gp': ['maternal_age_years','gravidity_2047','parity_2048'],
    'multiple_gestation_and_labor_induction': ['multiple_gestation','labor_induction'],
    'baby_size': ['gestational_age_weeks','baby_weight_2196'],
    'maternal_size': ['bmi_end_pregnancy_2044', 'bmi_greater_than_40', 'maternal_weight_end_pregnancy_2045', 'bmi_before_pregnancy_2161'],
    'team_composition': ['has_resident','has_anesthesiologist'],
    'team_catheter_counts': ['current_anesthesiologist_catheter_count','current_resident_catheter_count','total_team_catheter_count'],
    'bmi_and_experience': ["high_bmi_and_highly_experienced_resident",    "high_bmi_and_lowly_experienced_resident",    "high_bmi_and_no_resident",    "high_bmi_and_highly_experienced_anesthesiologist"],
    'scoliosis_and_experience': ["scoliosis_and_highly_experienced_resident",    "scoliosis_and_lowly_experienced_resident",    "scoliosis_and_no_resident",    "scoliosis_and_highly_experienced_anesthesiologist"],
    'back_group': ['high_bmi_and_scoliosis','has_scoliosis','has_dorsalgia','has_back_problems'],
    'maternal_risk': ['prior_ob_cmi_scores_max','CS_hx','high_risk_current_pregnancy','high_risk_hx','iufd'],
    'psychosocial_and_ses': ['composite_psychosocial_problems','only_private_insurance','maternal_language_english','marital_status_married_or_partner','country_of_origin_USA','employment_status_fulltime','composite_SES_advantage'],
    'lor': ['lor_depth','predicted_lor_depth','unexpected_delta_lor','unexpected_delta_lor_squared'],
    'pain_and_attempts': ['prior_pain_scores_max','paresthesias_present','number_of_neuraxial_attempts','number_of_spinal_attempts'],
    'prior_catheters': ['prior_failed_catheters_this_enc','prior_failed_catheters_prev_enc','prior_all_catheters_all_enc']
}

In [None]:
dvu.plot_correlation_heatmap_with_related_groups(neuraxial_catheter_df, drop_columns=['anes_procedure_encounter_id_2273','unique_pt_id'],additional_groups=related_groups,draw_group_boxes=True,draw_group_lines=True)

# Drop any columns here?

In [None]:
# I did formerly but no longer do

# Correlation Matrix 2

In [None]:
dvu.plot_correlation_heatmap_with_related_groups(neuraxial_catheter_df, drop_columns=['anes_procedure_encounter_id_2273','unique_pt_id'],additional_groups=related_groups,draw_group_boxes=True,draw_group_lines=True)

# Data Visualization

## Procedure Types

In [None]:
plt.hist(neuraxial_catheter_df['true_procedure_type_incl_dpe'])

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='true_procedure_type_incl_dpe', value_col='failed_catheter',
                           sort_by=None, legend_labels=['Successful', 'Failed'])

## Anesthesiologist Experience

In [None]:
reload(dvu)

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='anesthesiologist_experience_category', value_col='failed_catheter',custom_order=['no_anesthesiologist','low','moderate','high'])

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='resident_experience_category', value_col='failed_catheter',custom_order=['no_resident','low','high'])

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='anesthesiologist_experience_category',index_col_2='resident_experience_category', value_col='failed_catheter')

In [None]:
dvu.plot_violin_crosstab_anova(neuraxial_catheter_df, index_col='resident_experience_category', value_col='bmi_end_pregnancy_2044',custom_order=['no_resident','low','high'])

## Delivery Site

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', value_col='failed_catheter')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', value_col='has_subsequent_neuraxial_catheter',
                           sort_by=None, legend_labels=['Yes', 'No'])

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', value_col='has_subsequent_spinal',
                           sort_by=None, legend_labels=['Yes', 'No'])

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', value_col='has_subsequent_airway',
                           sort_by=None, legend_labels=['Yes', 'No'])

In [None]:
dvu.plot_violin_crosstab_anova(neuraxial_catheter_df, index_col='delivery_site', value_col='bmi_end_pregnancy_2044')

## DPE

In [None]:
# prompt: create a pie chart of the fraction of DPE in epidural_df

# Count DPE values, treating NaN and '' as "no"
dpe_counts = epidural_df['true_procedure_type_incl_dpe'].value_counts()

# Create the pie chart
plt.figure(figsize=(8, 8))
plt.pie(dpe_counts, labels=dpe_counts.index, autopct='%1.1f%%', startangle=90)
plt.title('Fraction of DPE in Epidural Procedures')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', value_col='true_procedure_type_incl_dpe', sort_by='intrathecal')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='true_procedure_type_incl_dpe', value_col='failed_catheter')

In [None]:
reload(dvu)

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='delivery_site', index_col_2='true_procedure_type_incl_dpe',value_col='failed_catheter',sort_by='no_sort', title='Failed Catheter Rate by Delivery Site and Procedure Type')

In [None]:
c = pd.crosstab([df['delivery_site'], df['true_procedure_type_incl_dpe']], df['failed_catheter'])

## Scoliosis and back problems

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='has_scoliosis', value_col='failed_catheter')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='has_back_problems', value_col='failed_catheter')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='has_dorsalgia', value_col='failed_catheter')

## Fetal Presentation

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='fetal_presentation', value_col='failed_catheter')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='fetal_position', value_col='failed_catheter')

## Race and SES

In [None]:
columns_to_analyze = [
    'maternal_race',
    'composite_psychosocial_problems',
    'only_private_insurance',
    'maternal_language_english',
    'marital_status_married_or_partner',
    'country_of_origin_USA',
    'employment_status_fulltime',
    'composite_SES_advantage'
]
for col in columns_to_analyze:
    dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col=col, value_col='failed_catheter')

## Pain

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='prior_pain_scores_max', value_col='failed_catheter', sort_by='no_sort', title='Failed Catheter Rate by Prior Pain Score')

## Gravidity and Parity

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='gravidity_2047', value_col='failed_catheter', sort_by='no_sort')

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='parity_2048', value_col='failed_catheter',  sort_by='no_sort')

## Maternal Age

In [None]:
reload(dvu)

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='maternal_age_years', y_axis='failed_catheter', bin_size=1)

## BMI / height / weight

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='bmi_end_pregnancy_2044', y_axis='failed_catheter', bin_size=1)

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='bmi_end_pregnancy_2044', y_axis='failed_catheter', bin_size=5)

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='maternal_weight_end_pregnancy_2045', y_axis='failed_catheter', bin_size=10)

## Needle Type

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='epidural_needle_type', value_col='failed_catheter')

## Paresthesias

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='paresthesias_present', value_col='failed_catheter')

## Number of Attempts

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='number_of_neuraxial_attempts', value_col='failed_catheter')

## Loss of Resistance Depth

In [None]:
dvu.plot_histogram(neuraxial_catheter_df, col='lor_depth', bin_space=0.5)

In [None]:
# Bin the lor_depth by 0.5
neuraxial_catheter_df['lor_depth_bin'] = (neuraxial_catheter_df['lor_depth'] // 0.5) * 0.5

# Plot the stacked bar histogram
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='lor_depth_bin', value_col='failed_catheter',sort_by='no_sort', title='Failed Catheter Rate by LOR Depth')

neuraxial_catheter_df.drop(columns=['lor_depth_bin'], inplace=True)

In [None]:
# prompt: Plot number of neuraxial attempts vs LOR depth on the x-axis. Add jiggle to both x and y axes

df_plot = neuraxial_catheter_df.dropna(subset=['number_of_neuraxial_attempts', 'lor_depth'])

# Add random jiggle to both x and y axes
jiggle_x = np.random.normal(scale = 0.1, size=len(df_plot))
jiggle_y = np.random.normal(scale = 0.1, size=len(df_plot))

plt.figure(figsize=(10, 6))
plt.scatter(df_plot['lor_depth'] + jiggle_x, df_plot['number_of_neuraxial_attempts'] + jiggle_y, alpha=0.5)
plt.xlabel('LOR Depth')
plt.ylabel('Number of Neuraxial Attempts')
plt.title('Number of Neuraxial Attempts vs. LOR Depth with Jiggle')
plt.show()


In [None]:
# Extract the data, dropping NaNs
df_plot = neuraxial_catheter_df.dropna(subset=['lor_depth', 'number_of_neuraxial_attempts'])

# Create a list of unique values in 'number_of_neuraxial_attempts'
attempts = [1, 2, 3, 4]

# Create histograms for each number_of_neuraxial_attempts
plt.figure(figsize=(12, 8))

for i, attempt in enumerate(attempts, start=1):
    # Filter data for each attempt
    subset = df_plot[df_plot['number_of_neuraxial_attempts'] == attempt]
    
    # Plot histogram for 'lor_depth'
    plt.subplot(2, 2, i)
    plt.hist(subset['lor_depth'], bins=20, color='skyblue', edgecolor='black')
    plt.title(f'Histogram of LOR Depth for {attempt} Neuraxial Attempt(s)')
    plt.xlabel('LOR Depth')
    plt.ylabel('Frequency')

plt.tight_layout()
plt.show()


In [None]:
dvu.plot_violin_crosstab_anova(neuraxial_catheter_df, index_col='number_of_neuraxial_attempts', value_col='lor_depth')

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='number_of_neuraxial_attempts', y_axis='failed_catheter', bin_size=1)

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='lor_depth', y_axis='failed_catheter', bin_size=1)

In [None]:
reload(dvu)

In [None]:
dvu.plot_scatter(neuraxial_catheter_df, x_axis='bmi_end_pregnancy_2044', y_axis='lor_depth')

In [None]:
from scipy.stats import gaussian_kde

# Extract the data, dropping NaNs
df_plot = neuraxial_catheter_df.dropna(subset=['lor_depth', 'bmi_end_pregnancy_2044'])
x = df_plot['bmi_end_pregnancy_2044'].values
y = df_plot['lor_depth'].values

# Perform kernel density estimation
xy = np.vstack([x, y])
kde = gaussian_kde(xy)

# Define grid over data range
xmin, xmax = x.min() - 1, x.max() + 1
ymin, ymax = y.min() - 1, y.max() + 1
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
Z = np.reshape(kde(positions).T, X.shape)

# Create the contour plot
plt.figure(figsize=(10, 6))
plt.contourf(X, Y, Z, levels=15, cmap='viridis')
plt.colorbar(label='Density')
plt.xlabel('BMI')
plt.ylabel('LOR Depth')
plt.title('Contour Plot of LOR Depth vs. BMI (KDE)')
plt.show()


In [None]:

# Drop rows with NaNs
df_plot = neuraxial_catheter_df.dropna(
    subset=['lor_depth', 'bmi_end_pregnancy_2044', 'failed_catheter']
)

# Separate the data by failed_catheter category
df_0 = df_plot[df_plot['failed_catheter'] == 0]
df_1 = df_plot[df_plot['failed_catheter'] == 1]

plt.figure(figsize=(10, 6))

# Scatter plot for failed_catheter = 0
plt.scatter(
    df_0['bmi_end_pregnancy_2044'],
    df_0['lor_depth'],
    s=10, 
    alpha=0.7,
    color='blue',
    label='Failed Catheter = 0'
)

# Scatter plot for failed_catheter = 1
plt.scatter(
    df_1['bmi_end_pregnancy_2044'],
    df_1['lor_depth'],
    s=10, 
    alpha=0.7,
    color='orange',
    label='Failed Catheter = 1'
)

plt.scatter(neuraxial_catheter_df['bmi_end_pregnancy_2044'], neuraxial_catheter_df['predicted_lor_depth'], s=10, alpha=1, color='black', label='Predicted')

# --- Calculate and plot regression line for failed_catheter = 0 ---
p0 = np.polyfit(df_0['bmi_end_pregnancy_2044'], df_0['lor_depth'], deg=1)  # slope, intercept
slope_0, intercept_0 = p0
print(f"For failed_catheter=0, slope = {slope_0:.2f}, intercept = {intercept_0:.2f}")

x_vals_0 = np.linspace(df_0['bmi_end_pregnancy_2044'].min(), df_0['bmi_end_pregnancy_2044'].max(), 100)
y_vals_0 = np.polyval(p0, x_vals_0)
plt.plot(x_vals_0, y_vals_0, color='blue', linewidth=2)

# --- Calculate and plot regression line for failed_catheter = 1 ---
p1 = np.polyfit(df_1['bmi_end_pregnancy_2044'], df_1['lor_depth'], deg=1)
slope_1, intercept_1 = p1
print(f"For failed_catheter=1, slope = {slope_1:.2f}, intercept = {intercept_1:.2f}")

x_vals_1 = np.linspace(df_1['bmi_end_pregnancy_2044'].min(), df_1['bmi_end_pregnancy_2044'].max(), 100)
y_vals_1 = np.polyval(p1, x_vals_1)
plt.plot(x_vals_1, y_vals_1, color='orange', linewidth=2)

# Labels and legend
plt.xlabel('BMI')
plt.ylabel('LOR Depth')
plt.title('LOR Depth vs. BMI')
plt.legend()

plt.show()

## Gestational Age and Weight

In [None]:
reload(dvu)

In [None]:
dvu.plot_histogram(neuraxial_catheter_df, col='gestational_age_weeks', bin_space=1)

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='gestational_age_weeks', y_axis='failed_catheter', bin_size=1)

In [None]:
dvu.plot_histogram(neuraxial_catheter_df, col='baby_weight_2196', bin_space=0.1, xtick_space=0.5, xlabel='Baby Weight (kg)')

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='baby_weight_2196', y_axis='failed_catheter', bin_size=0.2)

## Prior failed catheters

In [None]:
dvu.plot_stacked_bar_histogram(neuraxial_catheter_df, index_col='prior_failed_catheters_this_enc', value_col='failed_catheter')

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='prior_failed_catheters_this_enc', y_axis='failed_catheter', bin_size=1)

## Placement to Delivery Time

In [None]:
dvu.plot_binned_errorbar(neuraxial_catheter_df, x_axis='placement_to_delivery_hours', y_axis='failed_catheter', bin_size=2)