In [None]:
from response_curves import generate_response_curve, parameter_bounds, sample_curves
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import ast
#Load the dataset
df = pd.read_csv('auc_results.csv')
# Create time points
x = np.linspace(0, 200, 500)

sampled_curves = [
    (row['curve_type'], row['parameters']) for _, row in df.iterrows()
]

true_auc_values = []
starts =[]
ends = []

for curve_type, param_list in sampled_curves:
    params = ast.literal_eval(param_list)

    # Generate the true response curve
    response, support_start, support_end = generate_response_curve(
        x, curve_type=curve_type, params=params, threshold=0.01
    )
    starts.append(support_start)
    ends.append(support_end)

    # Calculate the true AUC under the response curve
    true_auc = np.trapz(response[(x >= support_start) & (x <= support_end)], x[(x >= support_start) & (x <= support_end)])
    true_auc_values.append(true_auc)

# Add the true_auc column to the dataframe
df['true_auc'] = true_auc_values
df['start_support'] = starts
df['end_support'] = ends





In [None]:
for i in range(4, 11):
    sub_df = df[df['grid_points_in_graph'] == i]
    plt.plot(sub_df['mc_relative_error'], label=f'Grid Points: {i}')
    plt.xlabel('Curve Index')
    plt.ylabel('mc_relative_error')
plt.legend()
plt.show()

In [None]:
for i in range(4, 11):
    sub_df = df[df['grid_points_in_graph'] == i]
    plt.plot(sub_df['standard_relative_error'], label=f'Grid Points: {i}')
    plt.xlabel('Curve Index')
    plt.ylabel('standard_relative_error')
plt.legend()
plt.show()

In [None]:
# Visualize the curves and sample
import ast
import re
import seaborn as sns
import matplotlib.pyplot as plt

palette = sns.color_palette("husl", 10)

sub_df = df[df['participants'] == 10]

# Create a 7x8 grid of subplots
fig, axes = plt.subplots(7, 7, figsize=(20, 15), sharex=True, sharey=False)
axes = axes.flatten()  # Flatten the 2D array of axes for easier indexing

# Sort the DataFrame by curve type to ensure the same curve types are in each column
i = 0
for ctr, row in sub_df.iterrows():
    ax = axes[i//10]  # Get the corresponding subplot

    params = ast.literal_eval(row['parameters'])
    curve_type = row['curve_type']
    response, start, end = generate_response_curve(x, curve_type=curve_type, params=params)

    # Plot the response curve
    ax.plot(x, response, label=f"Curve: {curve_type}", color=palette[i % len(palette)], alpha=0.5)

    means_str = row['means']
    means_str = re.sub(r'\s+', ',', means_str)  # Replace multiple spaces with a single comma
    means = ast.literal_eval(means_str)

    std_str = row['stds']
    std_str = re.sub(r'\s+', ',', std_str)  # Replace multiple spaces with a single comma
    stds = ast.literal_eval(std_str)

    current_grid_str = row['current_grid']
    current_grid_str = re.sub(r'\s+', ',', current_grid_str)  # Replace multiple spaces with a single comma
    current_grid_str = current_grid_str.replace("[,", "[").replace(",]", "]")  # Remove leading/trailing commas
    current_grid = ast.literal_eval(current_grid_str)

    # Plot the means
    ax.errorbar(current_grid, means, yerr=stds, fmt='x', label="Means with Std", color=palette[i % len(palette)], capsize=3)

    # Set title and grid for each subplot
    ax.set_title(f"{curve_type.replace('_', ' ').capitalize()}", fontsize=8)
    ax.grid(True, alpha=0.3)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.set_ylabel("Number Grid Points: " + str(row['grid_points_in_graph']), fontsize=10)

    i += 1



# Adjust layout and add a global title
plt.tight_layout()
plt.subplots_adjust(top=0.95)
#fig.suptitle("Response Curves by Curve Type", fontsize=16)

# Remove titles from all but the first row of plots
for ax in axes[7:]:  # Assuming 7 plots per row, adjust as needed
    ax.set_title("")

# Remove y-labels from all plots except the first column
for i, ax in enumerate(axes):
    if i % 7 != 0:  # Assuming 7 plots per row
        ax.set_ylabel("")
        
plt.savefig("response_curves_by_curve_type.pdf", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
! open .

In [None]:
plt.scatter(df['mc_auc_mean'], df['true_auc'])
plt.scatter(df['standard_auc_mean'], df['true_auc'],alpha=0.5)


In [None]:
# Group by curve type and calculate the means and standard deviations for both methods
results = df.groupby('curve_type').agg(
    mc_mean=('mc_auc_mean', 'mean'),
    mc_std=('mc_auc_mean', 'std'),
    standard_mean=('standard_auc_mean', 'mean'),
    standard_std=('standard_auc_mean', 'std')
).reset_index()

results.to_csv('auc_results_summary.csv', index=False)

In [None]:
# Group by participants and calculate BIAS, RMSE, and COVERAGE (%) for MC and Standard methods
summary = df.groupby('participants').apply(
    lambda g: pd.Series({
        'MC_BIAS': (g['mc_auc_mean'] - g['true_auc']).mean(),
        'MC_RMSE': np.sqrt(np.mean((g['mc_auc_mean'] - g['true_auc'])**2)),
        'MC_COVERAGE (%)': 100 * g['mc_coverage'].mean(),
        'Standard_BIAS': (g['standard_auc_mean'] - g['true_auc']).mean(),
        'Standard_RMSE': np.sqrt(np.mean((g['standard_auc_mean'] - g['true_auc'])**2)),
        'Standard_COVERAGE (%)': 100 * g['standard_coverage'].mean(),
    })
).reset_index()

summary.to_csv('auc_bias_rmse_coverage_by_participants.csv', index=False)
summary

In [None]:
# Group by grid_points_in_graph and calculate BIAS, RMSE, and COVERAGE (%) for MC and Standard methods
summary_grid = df.groupby('grid_points_in_graph').apply(
    lambda g: pd.Series({
        'MC_BIAS': (g['mc_auc_mean'] - g['true_auc']).mean(),
        'MC_RMSE': np.sqrt(np.mean((g['mc_auc_mean'] - g['true_auc'])**2)),
        'MC_COVERAGE (%)': 100 * g['mc_coverage'].mean(),
        'Standard_BIAS': (g['standard_auc_mean'] - g['true_auc']).mean(),
        'Standard_RMSE': np.sqrt(np.mean((g['standard_auc_mean'] - g['true_auc'])**2)),
        'Standard_COVERAGE (%)': 100 * g['standard_coverage'].mean(),
    })
).reset_index()

summary_grid.to_csv('auc_bias_rmse_coverage_by_gridpoints.csv', index=False)
summary_grid

In [None]:
# Ensure all columns in the results DataFrame are numeric
numeric_columns = ['grid_points_in_graph', 'mc_mean', 'mc_std', 'standard_mean', 'standard_std']
for col in numeric_columns:
    results[col] = pd.to_numeric(results[col], errors='coerce')

# Save the DataFrame to a CSV file
results.to_csv('auc_results_summary_gridpoints.csv', index=False)

In [None]:
results_list = []

for curve_type, group in df.groupby('curve_type'):
    mc_diff_mean = np.sqrt(np.mean((group['mc_auc_mean'] - group['true_auc'])**2))
    standard_diff_mean = np.sqrt(np.mean((group['standard_auc_mean'] - group['true_auc'])**2))

    results_list.append({
        "Curve Type": curve_type,
        "Mean Absolute Difference (MC)": mc_diff_mean,
        "Mean Absolute Difference (Standard)": standard_diff_mean
    })

results_df = pd.DataFrame(results_list)
results_df
results_df.to_csv("mean_results.csv", index=False)