In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
try:
    df_n_raw = pd.read_csv('syringe_volume_estimations_and_metrics_N.csv')
    df_x_raw = pd.read_csv('syringe_volume_estimations_and_metrics_X.csv')
except FileNotFoundError:
    print("Make sure 'syringe_volume_estimations_and_metrics_N.csv' and 'syringe_volume_estimations_and_metrics_X.csv' are in the same directory as the notebook.")
    # Create dummy dataframes for the rest of the notebook to run without error if files are missing
    # This is just for demonstration if files are not found.
    # In a real scenario, you would stop execution or handle the error appropriately.
    columns = ['video_path', 'actual_volume (ml)', 'min', 'max', 'mean', 'std', 'median', 'SEM', 'CV', 'diameter (cm)', 'zoom']
    df_n_raw = pd.DataFrame(columns=columns)
    df_x_raw = pd.DataFrame(columns=columns)

# Add a model type column for easier plotting and aggregation
df_n = df_n_raw.copy()
df_x = df_x_raw.copy()
df_n['model_type'] = 'Nano (N)'
df_x['model_type'] = 'X'

# Combine the dataframes
df_combined = pd.concat([df_n, df_x], ignore_index=True)

# Calculate error metrics
df_combined['error (ml)'] = df_combined['mean'] - df_combined['actual_volume (ml)']
df_combined['abs_error (ml)'] = np.abs(df_combined['error (ml)'])
df_combined['percent_error (%)'] = np.where(
    df_combined['actual_volume (ml)'] == 0,
    np.nan, # Avoid division by zero for 0ml actual volume
    (df_combined['error (ml)'] / df_combined['actual_volume (ml)']) * 100
)
df_combined['abs_percent_error (%)'] = np.abs(df_combined['percent_error (%)'])


# Set a consistent color palette for the models
model_palette = {"Nano (N)": "skyblue", "X": "salmon"}

# --- Notebook Starts Here ---

In [None]:
# Display first few rows of the combined dataframe and info
print("Combined Dataframe Head:")
display(df_combined.head())
print("\nCombined Dataframe Info:")
display(df_combined.info())

In [None]:
plt.figure(figsize=(10, 7))
sns.scatterplot(data=df_combined, x='actual_volume (ml)', y='mean', hue='model_type', style='zoom', s=100, palette=model_palette)
plt.plot([df_combined['actual_volume (ml)'].min(), df_combined['actual_volume (ml)'].max()],
         [df_combined['actual_volume (ml)'].min(), df_combined['actual_volume (ml)'].max()],
         'k--', lw=2, label='Perfect Prediction (y=x)')
plt.title('Mean Estimated Volume vs. Actual Volume by Model and Zoom')
plt.xlabel('Actual Volume (ml)')
plt.ylabel('Mean Estimated Volume (ml)')
plt.legend(title='Model & Zoom')
plt.grid(True)
plt.savefig('mean_estimated_vs_actual_volume.png', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_combined, x='model_type', y='abs_error (ml)', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of Absolute Error (ml) by Model and Zoom')
plt.ylabel('Absolute Error (ml)')
plt.xlabel('Model Type')
plt.grid(axis='y')
plt.savefig('absolute_error_distribution.png', dpi=300)
plt.show()

print("\nMean Absolute Error (ml):")
print(df_combined.groupby(['model_type', 'zoom'])['abs_error (ml)'].mean())

In [None]:
plt.figure(figsize=(10, 6))
# We filter out 0ml actual volume for CV comparison here as CV can be misleading with mean close to 0
sns.boxplot(data=df_combined[df_combined['actual_volume (ml)'] > 0], x='model_type', y='CV', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of Coefficient of Variation (CV %) for Non-Empty Syringes')
plt.ylabel('Coefficient of Variation (CV %)')
plt.xlabel('Model Type')
plt.ylim(0, df_combined[df_combined['actual_volume (ml)'] > 0]['CV'].quantile(0.95) * 1.1) # Adjust ylim to exclude extreme outliers for better visualization
plt.grid(axis='y')
plt.savefig('cv_distribution.png', dpi=300)
plt.show()

print("\nMean CV (%) for Non-Empty Syringes:")
print(df_combined[df_combined['actual_volume (ml)'] > 0].groupby(['model_type', 'zoom'])['CV'].mean())

In [None]:
df_0ml = df_combined[df_combined['actual_volume (ml)'] == 0]

plt.figure(figsize=(12, 7))
sns.barplot(data=df_0ml, x='diameter (cm)', y='mean', hue='model_type', dodge=True, palette=model_palette, capsize=.1, ci='sd')
# plt.facet_grid(cols=df_0ml['zoom']) # This is not a direct seaborn function. Let's use subplots.

fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
fig.suptitle('Mean Estimated Volume for 0ml (Empty Syringe) by Diameter, Model, and Zoom', fontsize=16)

sns.barplot(data=df_0ml[df_0ml['zoom'] == '1x'], x='diameter (cm)', y='mean', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[0], estimator=np.mean, ci='sd') # ci='sd' shows std dev
axes[0].set_title('Zoom: 1x')
axes[0].set_ylabel('Mean Estimated Volume (ml)')
axes[0].set_xlabel('Syringe Diameter (cm)')
axes[0].grid(axis='y')
axes[0].axhline(0, color='k', linestyle='--', lw=1)


sns.barplot(data=df_0ml[df_0ml['zoom'] == '3x'], x='diameter (cm)', y='mean', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[1], estimator=np.mean, ci='sd')
axes[1].set_title('Zoom: 3x')
axes[1].set_ylabel('') # Hide y-label as it's shared
axes[1].set_xlabel('Syringe Diameter (cm)')
axes[1].grid(axis='y')
axes[1].axhline(0, color='k', linestyle='--', lw=1)

plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust layout to make space for suptitle
plt.show()

print("\nMean Estimated Volume (ml) for 0ml Syringes:")
print(df_0ml.groupby(['model_type', 'zoom', 'diameter (cm)'])['mean'].mean().unstack(['zoom', 'diameter (cm)']))


plt.figure(figsize=(12, 7))
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
fig.suptitle('Coefficient of Variation (CV %) for 0ml (Empty Syringe) by Diameter, Model, and Zoom', fontsize=16)

sns.barplot(data=df_0ml[df_0ml['zoom'] == '1x'], x='diameter (cm)', y='CV', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[0])
axes[0].set_title('Zoom: 1x')
axes[0].set_ylabel('Coefficient of Variation (CV %)')
axes[0].set_xlabel('Syringe Diameter (cm)')
axes[0].grid(axis='y')

sns.barplot(data=df_0ml[df_0ml['zoom'] == '3x'], x='diameter (cm)', y='CV', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[1])
axes[1].set_title('Zoom: 3x')
axes[1].set_ylabel('')
axes[1].set_xlabel('Syringe Diameter (cm)')
axes[1].grid(axis='y')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

print("\nMean CV (%) for 0ml Syringes:")
print(df_0ml.groupby(['model_type', 'zoom', 'diameter (cm)'])['CV'].mean().unstack(['zoom', 'diameter (cm)']))

In [None]:
df_non_empty = df_combined[df_combined['actual_volume (ml)'] > 0]

plt.figure(figsize=(10, 6))
sns.boxplot(data=df_non_empty, x='model_type', y='abs_error (ml)', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of Absolute Error (ml) for Non-Empty Syringes')
plt.ylabel('Absolute Error (ml)')
plt.xlabel('Model Type')
plt.grid(axis='y')
plt.show()

print("\nMean Absolute Error (ml) for Non-Empty Syringes:")
print(df_non_empty.groupby(['model_type', 'zoom'])['abs_error (ml)'].mean())

# CV plot for non-empty was already shown in section 1.3

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Absolute Error by Zoom
sns.boxplot(data=df_combined, x='zoom', y='abs_error (ml)', hue='model_type', ax=axes[0], palette=model_palette)
axes[0].set_title('Impact of Zoom on Absolute Error (ml)')
axes[0].set_ylabel('Absolute Error (ml)')
axes[0].set_xlabel('Zoom Level')
axes[0].grid(axis='y')

# CV by Zoom (for non-empty to avoid 0ml issues)
sns.boxplot(data=df_combined[df_combined['actual_volume (ml)'] > 0], x='zoom', y='CV', hue='model_type', ax=axes[1], palette=model_palette)
axes[1].set_title('Impact of Zoom on CV (%) for Non-Empty Syringes')
axes[1].set_ylabel('Coefficient of Variation (CV %)')
axes[1].set_xlabel('Zoom Level')
axes[1].set_ylim(0, df_combined[df_combined['actual_volume (ml)'] > 0]['CV'].quantile(0.90) * 1.1) # Limit y for clarity
axes[1].grid(axis='y')

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display # Needed for display() in a script if not in Jupyter

# Load the datasets
try:
    df_n_raw = pd.read_csv('syringe_volume_estimations_and_metrics_N.csv')
    df_x_raw = pd.read_csv('syringe_volume_estimations_and_metrics_X.csv')
except FileNotFoundError:
    print("Make sure 'syringe_volume_estimations_and_metrics_N.csv' and 'syringe_volume_estimations_and_metrics_X.csv' are in the same directory as the script.")
    # Create dummy dataframes for the rest of the script to run without error if files are missing
    columns = ['video_path', 'actual_volume (ml)', 'min', 'max', 'mean', 'std', 'median', 'SEM', 'CV', 'diameter (cm)', 'zoom']
    df_n_raw = pd.DataFrame(np.random.rand(10, len(columns)), columns=columns) # Dummy data
    df_x_raw = pd.DataFrame(np.random.rand(10, len(columns)), columns=columns) # Dummy data
    df_n_raw['actual_volume (ml)'] = np.random.choice([0, 1, 2, 5, 10], size=10)
    df_x_raw['actual_volume (ml)'] = np.random.choice([0, 1, 2, 5, 10], size=10)
    df_n_raw['diameter (cm)'] = np.random.choice([1.0, 2.0], size=10)
    df_x_raw['diameter (cm)'] = np.random.choice([1.0, 2.0], size=10)
    df_n_raw['zoom'] = np.random.choice(['1x', '3x'], size=10)
    df_x_raw['zoom'] = np.random.choice(['1x', '3x'], size=10)
    df_n_raw['CV'] = np.random.rand(10) * 10
    df_x_raw['CV'] = np.random.rand(10) * 10


# Add a model type column for easier plotting and aggregation
df_n = df_n_raw.copy()
df_x = df_x_raw.copy()
df_n['model_type'] = 'Nano (N)'
df_x['model_type'] = 'X'

# Combine the dataframes
df_combined = pd.concat([df_n, df_x], ignore_index=True)

# Calculate error metrics
df_combined['error (ml)'] = df_combined['mean'] - df_combined['actual_volume (ml)']
df_combined['abs_error (ml)'] = np.abs(df_combined['error (ml)'])
df_combined['percent_error (%)'] = np.where(
    df_combined['actual_volume (ml)'] == 0,
    np.nan, # Avoid division by zero for 0ml actual volume
    (df_combined['error (ml)'] / df_combined['actual_volume (ml)']) * 100
)
df_combined['abs_percent_error (%)'] = np.abs(df_combined['percent_error (%)'])


# Set a consistent color palette for the models
model_palette = {"Nano (N)": "skyblue", "X": "salmon"}

# --- Notebook Starts Here ---

# Display first few rows of the combined dataframe and info
print("Combined Dataframe Head:")
display(df_combined.head())
print("\nCombined Dataframe Info:")
df_combined.info() # display() doesn't work well with .info() in a script

# Plot 1: Mean Estimated Volume vs. Actual Volume
plt.figure(figsize=(10, 7))
sns.scatterplot(data=df_combined, x='actual_volume (ml)', y='mean', hue='model_type', style='zoom', s=100, palette=model_palette)
plt.plot([df_combined['actual_volume (ml)'].min(), df_combined['actual_volume (ml)'].max()],
         [df_combined['actual_volume (ml)'].min(), df_combined['actual_volume (ml)'].max()],
         'k--', lw=2, label='Perfect Prediction (y=x)')
plt.title('Mean Estimated Volume vs. Actual Volume by Model and Zoom')
plt.xlabel('Actual Volume (ml)')
plt.ylabel('Mean Estimated Volume (ml)')
plt.legend(title='Model & Zoom')
plt.grid(True)
plt.savefig('mean_estimated_vs_actual_volume.png', dpi=300, bbox_inches='tight')
plt.close() # Close plot to free memory

# Plot 2: Distribution of Absolute Error (ml) by Model and Zoom
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_combined, x='model_type', y='abs_error (ml)', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of Absolute Error (ml) by Model and Zoom')
plt.ylabel('Absolute Error (ml)')
plt.xlabel('Model Type')
plt.grid(axis='y')
plt.savefig('absolute_error_distribution.png', dpi=300, bbox_inches='tight')
plt.close()

print("\nMean Absolute Error (ml):")
print(df_combined.groupby(['model_type', 'zoom'])['abs_error (ml)'].mean())

# Plot 3: Distribution of Coefficient of Variation (CV %) for Non-Empty Syringes
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_combined[df_combined['actual_volume (ml)'] > 0], x='model_type', y='CV', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of CV (%) for Non-Empty Syringes')
plt.ylabel('Coefficient of Variation (CV %)')
plt.xlabel('Model Type')
# Ensure there's data before trying to calculate quantile
if not df_combined[df_combined['actual_volume (ml)'] > 0].empty:
    plt.ylim(0, df_combined[df_combined['actual_volume (ml)'] > 0]['CV'].quantile(0.95) * 1.1)
else:
    plt.ylim(0, 1) # Default ylim if no data
plt.grid(axis='y')
plt.savefig('cv_distribution_non_empty.png', dpi=300, bbox_inches='tight') # Renamed to be specific
plt.close()

print("\nMean CV (%) for Non-Empty Syringes:")
print(df_combined[df_combined['actual_volume (ml)'] > 0].groupby(['model_type', 'zoom'])['CV'].mean())

df_0ml = df_combined[df_combined['actual_volume (ml)'] == 0]

# Plot 4: Mean Estimated Volume for 0ml (Empty Syringe) by Diameter, Model, and Zoom
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
fig.suptitle('Mean Estimated Volume for 0ml (Empty Syringe) by Diameter, Model, and Zoom', fontsize=16)

sns.barplot(data=df_0ml[df_0ml['zoom'] == '1x'], x='diameter (cm)', y='mean', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[0], estimator=np.mean, errorbar='sd') # Changed ci to errorbar
axes[0].set_title('Zoom: 1x')
axes[0].set_ylabel('Mean Estimated Volume (ml)')
axes[0].set_xlabel('Syringe Diameter (cm)')
axes[0].grid(axis='y')
axes[0].axhline(0, color='k', linestyle='--', lw=1)

sns.barplot(data=df_0ml[df_0ml['zoom'] == '3x'], x='diameter (cm)', y='mean', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[1], estimator=np.mean, errorbar='sd') # Changed ci to errorbar
axes[1].set_title('Zoom: 3x')
axes[1].set_ylabel('')
axes[1].set_xlabel('Syringe Diameter (cm)')
axes[1].grid(axis='y')
axes[1].axhline(0, color='k', linestyle='--', lw=1)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig('mean_estimated_0ml_syringes.png', dpi=300, bbox_inches='tight')
plt.close()

print("\nMean Estimated Volume (ml) for 0ml Syringes:")
if not df_0ml.empty:
    print(df_0ml.groupby(['model_type', 'zoom', 'diameter (cm)'])['mean'].mean().unstack(['zoom', 'diameter (cm)']))
else:
    print("No data for 0ml syringes.")


# Plot 5: Coefficient of Variation (CV %) for 0ml (Empty Syringe) by Diameter, Model, and Zoom
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
fig.suptitle('CV (%) for 0ml (Empty Syringe) by Diameter, Model, and Zoom', fontsize=16)

sns.barplot(data=df_0ml[df_0ml['zoom'] == '1x'], x='diameter (cm)', y='CV', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[0], estimator=np.mean, errorbar='sd') # Added estimator and errorbar
axes[0].set_title('Zoom: 1x')
axes[0].set_ylabel('Coefficient of Variation (CV %)')
axes[0].set_xlabel('Syringe Diameter (cm)')
axes[0].grid(axis='y')

sns.barplot(data=df_0ml[df_0ml['zoom'] == '3x'], x='diameter (cm)', y='CV', hue='model_type',
            dodge=True, palette=model_palette, capsize=.1, ax=axes[1], estimator=np.mean, errorbar='sd') # Added estimator and errorbar
axes[1].set_title('Zoom: 3x')
axes[1].set_ylabel('')
axes[1].set_xlabel('Syringe Diameter (cm)')
axes[1].grid(axis='y')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig('cv_0ml_syringes.png', dpi=300, bbox_inches='tight')
plt.close()

print("\nMean CV (%) for 0ml Syringes:")
if not df_0ml.empty:
    print(df_0ml.groupby(['model_type', 'zoom', 'diameter (cm)'])['CV'].mean().unstack(['zoom', 'diameter (cm)']))
else:
    print("No data for 0ml syringes for CV calculation.")


df_non_empty = df_combined[df_combined['actual_volume (ml)'] > 0]

# Plot 6: Distribution of Absolute Error (ml) for Non-Empty Syringes
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_non_empty, x='model_type', y='abs_error (ml)', hue='zoom', palette=sns.color_palette("coolwarm", 2))
plt.title('Distribution of Absolute Error (ml) for Non-Empty Syringes')
plt.ylabel('Absolute Error (ml)')
plt.xlabel('Model Type')
plt.grid(axis='y')
plt.savefig('absolute_error_non_empty_syringes.png', dpi=300, bbox_inches='tight')
plt.close()

print("\nMean Absolute Error (ml) for Non-Empty Syringes:")
print(df_non_empty.groupby(['model_type', 'zoom'])['abs_error (ml)'].mean())

# Plot 7: Impact of Zoom on Absolute Error and CV
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Absolute Error by Zoom
sns.boxplot(data=df_combined, x='zoom', y='abs_error (ml)', hue='model_type', ax=axes[0], palette=model_palette)
axes[0].set_title('Impact of Zoom on Absolute Error (ml)')
axes[0].set_ylabel('Absolute Error (ml)')
axes[0].set_xlabel('Zoom Level')
axes[0].grid(axis='y')

# CV by Zoom (for non-empty to avoid 0ml issues)
sns.boxplot(data=df_combined[df_combined['actual_volume (ml)'] > 0], x='zoom', y='CV', hue='model_type', ax=axes[1], palette=model_palette)
axes[1].set_title('Impact of Zoom on CV (%) for Non-Empty Syringes')
axes[1].set_ylabel('Coefficient of Variation (CV %)')
axes[1].set_xlabel('Zoom Level')
# Ensure there's data before trying to calculate quantile
if not df_combined[df_combined['actual_volume (ml)'] > 0].empty:
    axes[1].set_ylim(0, df_combined[df_combined['actual_volume (ml)'] > 0]['CV'].quantile(0.90) * 1.1)
else:
    axes[1].set_ylim(0,1) # Default ylim if no data
axes[1].grid(axis='y')

plt.tight_layout()
plt.savefig('zoom_impact_error_cv.png', dpi=300, bbox_inches='tight')
plt.close()

print("\nPython script finished generating plots.")