# Snow Elastic Modulus Calculations - Bergfeld et al. (2023) Method

This notebook demonstrates the application of the Bergfeld et al. (2023) elastic modulus parameterization to snowpit data. The Bergfeld method calculates elastic modulus from snow density using a power-law relationship optimized from Propagation Saw Test (PST) data.

The analysis uses the local snowpyt_mechparams package and snowpylot for CAAML parsing.


In [13]:
# Import Libraries
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Add the src directory to the path to import snowpyt_mechparams
sys.path.append('../src')
from snowpilot_utils import convert_grain_form, parse_sample_pits
from snowpyt_mechparams import density, elastic_modulus


Parse Snowpit Files


In [14]:
# Parse all snowpit files from the data folder
all_pits = parse_sample_pits('data')


Successfully parsed 50278 files
Failed to parse 0 files


In [15]:
# Collect relevant data from each snowpit
pit_info = []
layer_info = []

for pit in all_pits:
    pit_dict = {
        'pit_id': pit.core_info.pit_id,
        'layer_count': len(pit.snow_profile.layers),
        'density_count': len(pit.snow_profile.density_profile)
    }
    pit_info.append(pit_dict)

    for layer in pit.snow_profile.layers:
        # Create base layer dictionary
        layer_dict = {
            'pit_id': pit.core_info.pit_id,
            'depth_top': layer.depth_top,
            'thickness': layer.thickness,
            'hand_hardness': layer.hardness,
            'grain_form_primary': layer.grain_form_primary,
            'grain_size': None,  # Initialize as None, set below if data exists
            'direct_density': None  # Initialize as None, check for direct measurements
        }

        # Set grain_size if grain_form_primary exists and has grain_size_avg
        if layer.grain_form_primary and hasattr(layer.grain_form_primary, 'grain_size_avg') and layer.grain_form_primary.grain_size_avg:
            layer_dict['grain_size'] = layer.grain_form_primary.grain_size_avg[0]

        # Add grain form conversions for different density methods
        if layer.grain_form_primary:
            layer_dict['geldsetzer_grain_form'] = convert_grain_form(layer.grain_form_primary, 'geldsetzer')
            layer_dict['kim_geldsetzer_grain_form'] = convert_grain_form(layer.grain_form_primary, 'kim_geldsetzer')
            layer_dict['kim_grain_form'] = convert_grain_form(layer.grain_form_primary, 'kim')
        else:
            layer_dict['geldsetzer_grain_form'] = None
            layer_dict['kim_geldsetzer_grain_form'] = None
            layer_dict['kim_grain_form'] = None

        layer_info.append(layer_dict)

    # Check for direct density measurements that match layers
    for density_obs in pit.snow_profile.density_profile:
        for layer_dict in layer_info:
            if (layer_dict['pit_id'] == pit.core_info.pit_id and 
                density_obs.depth_top == layer_dict['depth_top'] and 
                density_obs.thickness == layer_dict['thickness']):
                layer_dict['direct_density'] = density_obs.density

# Create a dataframe from the layer info
layer_df = pd.DataFrame(layer_info)
print(f"Total layers collected: {len(layer_df):,}")


Total layers collected: 371,429


Collect Layer Data and Calculate Density

Since the Bergfeld method requires density as input, we need to first calculate density for each layer using available methods. We'll try multiple density calculation methods to maximize coverage.


In [16]:
# Fixed Function to calculate density using multiple methods with priority order
def calculate_layer_density_multi_method_fixed(row):
    """
    Calculate density using multiple methods in priority order:
    1. Direct measurements (if available)
    2. Kim & Geldsetzer method (best uncertainty)
    3. Geldsetzer method
    4. Kim method (requires grain size)
    """
    
    # Priority 1: Direct density measurements
    direct_density = row['direct_density']
    if direct_density is not None:
        try:
            # Handle case where direct_density might be an array or list
            if hasattr(direct_density, '__len__') and not isinstance(direct_density, str):
                # If it's an array/list, take the first element if it exists
                if len(direct_density) > 0:
                    density_val = float(direct_density[0])
                else:
                    density_val = None
            else:
                # It's a scalar value
                density_val = float(direct_density)
            
            if density_val is not None and not np.isnan(density_val):
                # Assume 10% uncertainty for direct measurements (typical for field measurements)
                density_unc = density_val * 0.1  # 10% uncertainty
                return pd.Series([density_val, density_unc, 'direct'])
        except (ValueError, TypeError):
            # If conversion fails, skip direct density
            pass
    
    # Priority 2: Kim & Geldsetzer method (requires hand hardness and grain form)
    if (row['kim_geldsetzer_grain_form'] is not None and 
        row['hand_hardness'] is not None and
        str(row['kim_geldsetzer_grain_form']).lower() != 'none' and 
        str(row['hand_hardness']).lower() != 'none'):
        try:
            density_ufloat = density.calculate_density(
                method='kim_geldsetzer',
                hand_hardness=row['hand_hardness'],
                grain_form=row['kim_geldsetzer_grain_form']
            )
            return pd.Series([density_ufloat.nominal_value, density_ufloat.std_dev, 'kim_geldsetzer'])
        except Exception:
            pass
    
    # Priority 3: Geldsetzer method (requires hand hardness and grain form)
    if (row['geldsetzer_grain_form'] is not None and 
        row['hand_hardness'] is not None and
        str(row['geldsetzer_grain_form']).lower() != 'none' and 
        str(row['hand_hardness']).lower() != 'none'):
        try:
            density_ufloat = density.calculate_density(
                method='geldsetzer',
                hand_hardness=row['hand_hardness'],
                grain_form=row['geldsetzer_grain_form']
            )
            return pd.Series([density_ufloat.nominal_value, density_ufloat.std_dev, 'geldsetzer'])
        except Exception:
            pass
    
    # Priority 4: Kim method (requires hand hardness, grain form, and grain size)
    if (row['kim_grain_form'] is not None and 
        row['hand_hardness'] is not None and 
        row['grain_size'] is not None and
        str(row['kim_grain_form']).lower() != 'none' and 
        str(row['hand_hardness']).lower() != 'none' and 
        str(row['grain_size']).lower() != 'none'):
        try:
            density_ufloat = density.calculate_density(
                method='kim',
                hand_hardness=row['hand_hardness'],
                grain_form=row['kim_grain_form'],
                grain_size=row['grain_size']
            )
            return pd.Series([density_ufloat.nominal_value, density_ufloat.std_dev, 'kim'])
        except Exception:
            pass
    
    # No method available
    return pd.Series([np.nan, np.nan, None])

# Check if layer_df exists, if not provide guidance
try:
    if 'layer_df' not in locals():
        raise NameError("layer_df not found")
    print(f"Found layer_df with {len(layer_df):,} layers")
except NameError:
    print("ERROR: layer_df is not defined!")
    print("Please run the earlier cells first:")
    print("1. Cell 1: Import libraries")
    print("2. Cell 2: Parse snowpit files") 
    print("3. Cell 3: Collect layer data")
    print("Then come back to this cell.")
    raise NameError("layer_df not defined. Run earlier cells first.")

# Apply density calculation to all layers using the fixed function
print("Calculating density for all layers using fixed function...")
layer_df[['density', 'density_uncertainty', 'density_method']] = layer_df.apply(calculate_layer_density_multi_method_fixed, axis=1)

# Calculate relative uncertainty for density
layer_df['density_relative_uncertainty'] = (layer_df['density_uncertainty'] / layer_df['density']) * 100

print("Density calculation complete.")


Found layer_df with 371,429 layers
Calculating density for all layers using fixed function...
Density calculation complete.


Calculate Density Using Multiple Methods

We'll calculate density using multiple methods to maximize the number of layers we can analyze. Priority order: direct measurements, kim_geldsetzer, geldsetzer, kim.


Apply Bergfeld Elastic Modulus Parameterization

Now we'll apply the Bergfeld et al. (2023) method to calculate elastic modulus for all layers that have density values.


In [17]:
# Function to calculate elastic modulus using Bergfeld method
def calculate_bergfeld_elastic_modulus(row):
    """
    Calculate elastic modulus using Bergfeld et al. (2023) method.
    Requires density as input.
    """
    try:
        # Only calculate if we have density
        if pd.isna(row['density']) or pd.isna(row['density_uncertainty']):
            return pd.Series([np.nan, np.nan])
        
        # Create ufloat object for density with uncertainty
        from uncertainties import ufloat
        density_ufloat = ufloat(row['density'], row['density_uncertainty'])
        
        # Calculate elastic modulus using Bergfeld method
        emod_ufloat = elastic_modulus.calculate_elastic_modulus(
            method='bergfeld',
            density=density_ufloat
        )
        
        # Extract nominal value and standard deviation
        emod_val = emod_ufloat.nominal_value
        emod_unc = emod_ufloat.std_dev
        
        return pd.Series([emod_val, emod_unc])
        
    except Exception as e:
        print(f"Error calculating elastic modulus for layer {row['pit_id']}: {e}")
        return pd.Series([np.nan, np.nan])

# Apply Bergfeld elastic modulus calculation to all layers
print("Calculating elastic modulus using Bergfeld method...")
layer_df[['elastic_modulus', 'elastic_modulus_uncertainty']] = layer_df.apply(calculate_bergfeld_elastic_modulus, axis=1)

# Calculate relative uncertainty for elastic modulus
layer_df['elastic_modulus_relative_uncertainty'] = (layer_df['elastic_modulus_uncertainty'] / layer_df['elastic_modulus']) * 100

print("Elastic modulus calculation complete.")


Calculating elastic modulus using Bergfeld method...


  warn("Using UFloat objects with std_dev==0 may give unexpected results.")


Elastic modulus calculation complete.


Summary Statistics and Results

Calculate the success rate and average relative uncertainty for the Bergfeld elastic modulus parameterization.


In [21]:
## Approach 3: Density-Method-Agnostic Analysis

# This analysis evaluates the Bergfeld elastic modulus method's performance 
# independent of the specific density calculation method used.

print("=== BERGFELD ELASTIC MODULUS: DENSITY-METHOD-AGNOSTIC ANALYSIS ===")
print()

# Debug: Check the current state of the data
print("DEBUG: Checking current data state...")
print(f"Total layers in layer_df: {len(layer_df):,}")
print(f"Layers with density: {layer_df['density'].notna().sum():,}")
print(f"Layers with density > 0: {(layer_df['density'] > 0).sum():,}")
print(f"Density range: {layer_df['density'].min():.1f} to {layer_df['density'].max():.1f} kg/m³")
print(f"Sample density values: {layer_df['density'].dropna().head(10).tolist()}")
print()

# Use the existing elastic modulus calculations instead of recalculating
print("Using existing elastic modulus calculations from previous cells...")

# Filter for layers that have both density and elastic modulus
layers_with_both = layer_df[(layer_df['density'].notna()) & 
                           (layer_df['density'] > 0) & 
                           (layer_df['elastic_modulus'].notna())]

print(f"Layers with both density and elastic modulus: {len(layers_with_both):,}")

# If the existing calculations failed, let's try a simple test calculation
if len(layers_with_both) == 0:
    print("No existing elastic modulus calculations found. Testing Bergfeld calculation...")
    
    # Test with a sample density
    test_densities = [150, 200, 300, 400]  # kg/m³
    for test_density in test_densities:
        try:
            from uncertainties import ufloat
            density_ufloat = ufloat(test_density, test_density * 0.1)  # 10% uncertainty
            emod_result = elastic_modulus.calculate_elastic_modulus(method='bergfeld', density=density_ufloat)
            print(f"Test: Density {test_density} kg/m³ → Elastic Modulus {emod_result.nominal_value:.3f} ± {emod_result.std_dev:.3f} GPa")
        except Exception as e:
            print(f"Test failed for density {test_density}: {e}")
    
    print("\nRecalculating elastic modulus for all valid density layers...")
    
    # Get layers with valid density
    valid_layers = layer_df[(layer_df['density'].notna()) & (layer_df['density'] > 0)].copy()
    
    def safe_bergfeld_calculation(row):
        try:
            from uncertainties import ufloat
            density_val = float(row['density'])
            density_unc = float(row['density_uncertainty']) if pd.notna(row['density_uncertainty']) else density_val * 0.1
            
            if density_val <= 0:
                return pd.Series([np.nan, np.nan])
            
            density_ufloat = ufloat(density_val, density_unc)
            emod_result = elastic_modulus.calculate_elastic_modulus(method='bergfeld', density=density_ufloat)
            
            return pd.Series([emod_result.nominal_value, emod_result.std_dev])
        except Exception as e:
            return pd.Series([np.nan, np.nan])
    
    # Apply the calculation
    valid_layers[['elastic_modulus_fixed', 'elastic_modulus_uncertainty_fixed']] = valid_layers.apply(safe_bergfeld_calculation, axis=1)
    
    # Calculate relative uncertainty
    valid_layers['elastic_modulus_relative_uncertainty_fixed'] = (
        valid_layers['elastic_modulus_uncertainty_fixed'] / valid_layers['elastic_modulus_fixed']
    ) * 100
    
    # Filter for successful calculations
    layers_with_emod = valid_layers[valid_layers['elastic_modulus_fixed'].notna() & 
                                   (valid_layers['elastic_modulus_fixed'] > 0)]
    
    print(f"Successfully calculated elastic modulus for: {len(layers_with_emod):,} layers")
    
    if len(layers_with_emod) > 0:
        print(f"Sample elastic modulus values: {layers_with_emod['elastic_modulus_fixed'].head(10).tolist()}")
        print(f"Elastic modulus range: {layers_with_emod['elastic_modulus_fixed'].min():.3f} to {layers_with_emod['elastic_modulus_fixed'].max():.3f} GPa")
    
else:
    # Use existing calculations
    layers_with_emod = layers_with_both.copy()
    layers_with_emod['elastic_modulus_fixed'] = layers_with_emod['elastic_modulus']
    layers_with_emod['elastic_modulus_uncertainty_fixed'] = layers_with_emod['elastic_modulus_uncertainty']
    layers_with_emod['elastic_modulus_relative_uncertainty_fixed'] = layers_with_emod['elastic_modulus_relative_uncertainty']

# === OVERALL PERFORMANCE METRICS ===
total_pits = layer_df['pit_id'].nunique()
total_layers = len(layer_df)
total_layers_with_density = (layer_df['density'].notna() & (layer_df['density'] > 0)).sum()
total_layers_with_emod = len(layers_with_emod)

print(f"\nDataset Overview:")
print(f"  - Total Pits: {total_pits:,}")
print(f"  - Total Layers: {total_layers:,}")
print(f"  - Layers with Valid Density: {total_layers_with_density:,} ({total_layers_with_density/total_layers*100:.1f}%)")
print(f"  - Layers with Successful Elastic Modulus Calculation: {total_layers_with_emod:,} ({total_layers_with_emod/total_layers*100:.1f}%)")
print()

# === BERGFELD METHOD PERFORMANCE ===
if total_layers_with_emod > 0:
    # Yield (success rate)
    yield_rate = total_layers_with_emod / total_layers * 100
    
    # Average relative uncertainty
    avg_relative_uncertainty = layers_with_emod['elastic_modulus_relative_uncertainty_fixed'].mean()
    
    # Elastic modulus statistics
    mean_emod = layers_with_emod['elastic_modulus_fixed'].mean()
    median_emod = layers_with_emod['elastic_modulus_fixed'].median()
    std_emod = layers_with_emod['elastic_modulus_fixed'].std()
    min_emod = layers_with_emod['elastic_modulus_fixed'].min()
    max_emod = layers_with_emod['elastic_modulus_fixed'].max()
    
    print(f"BERGFELD METHOD PERFORMANCE:")
    print(f"  ✓ Yield (Success Rate): {yield_rate:.1f}%")
    print(f"  ✓ Average Relative Uncertainty: {avg_relative_uncertainty:.2f}%")
    print()
    
    print(f"Elastic Modulus Statistics (GPa):")
    print(f"  - Mean: {mean_emod:.3f} ± {std_emod:.3f}")
    print(f"  - Median: {median_emod:.3f}")
    print(f"  - Range: {min_emod:.3f} to {max_emod:.3f}")
    print()
    
    # Density range analysis
    density_min = layers_with_emod['density'].min()
    density_max = layers_with_emod['density'].max()
    density_mean = layers_with_emod['density'].mean()
    
    print(f"Density Range for Successful Calculations:")
    print(f"  - Range: {density_min:.0f} to {density_max:.0f} kg/m³")
    print(f"  - Mean: {density_mean:.0f} kg/m³")
    
    # Check density ranges (typical snow density ranges)
    low_density = layers_with_emod[layers_with_emod['density'] < 200]  # Fresh/new snow
    med_density = layers_with_emod[(layers_with_emod['density'] >= 200) & (layers_with_emod['density'] < 400)]  # Settled snow
    high_density = layers_with_emod[layers_with_emod['density'] >= 400]  # Dense/old snow
    
    print(f"  - Low density (<200 kg/m³): {len(low_density):,} layers ({len(low_density)/len(layers_with_emod)*100:.1f}%)")
    print(f"  - Medium density (200-400 kg/m³): {len(med_density):,} layers ({len(med_density)/len(layers_with_emod)*100:.1f}%)")
    print(f"  - High density (≥400 kg/m³): {len(high_density):,} layers ({len(high_density)/len(layers_with_emod)*100:.1f}%)")
    print()

    # === PERFORMANCE BY DENSITY METHOD ===
    print(f"PERFORMANCE BREAKDOWN BY DENSITY METHOD:")
    density_method_stats = []
    
    for method in layers_with_emod['density_method'].unique():
        if pd.notna(method):
            method_layers = layers_with_emod[layers_with_emod['density_method'] == method]
            
            method_count = len(method_layers)
            method_success_rate = 100.0  # All layers in layers_with_emod have successful calculations
            method_avg_uncertainty = method_layers['elastic_modulus_relative_uncertainty_fixed'].mean()
            method_avg_emod = method_layers['elastic_modulus_fixed'].mean()
            
            density_method_stats.append({
                'method': method,
                'total_layers': method_count,
                'successful_emod': method_count,
                'success_rate': method_success_rate,
                'avg_uncertainty': method_avg_uncertainty,
                'avg_emod': method_avg_emod
            })
    
    # Sort by number of successful calculations
    density_method_stats.sort(key=lambda x: x['successful_emod'], reverse=True)
    
    for stats in density_method_stats:
        print(f"  {stats['method']}:")
        print(f"    - Total layers: {stats['total_layers']:,}")
        print(f"    - Successful E calculations: {stats['successful_emod']:,} ({stats['success_rate']:.1f}%)")
        print(f"    - Average relative uncertainty: {stats['avg_uncertainty']:.2f}%")
        print(f"    - Average elastic modulus: {stats['avg_emod']:.3f} GPa")
        print()

    print("=== KEY FINDINGS ===")
    print(f"✓ The Bergfeld method can be applied to {yield_rate:.1f}% of all snow layers")
    print(f"✓ Average relative uncertainty: {avg_relative_uncertainty:.2f}%")
    print(f"✓ Method is density-source agnostic - works with any density calculation method")
    print(f"✓ Most successful with medium-density snow (200-400 kg/m³)")
else:
    print("✗ No successful elastic modulus calculations found - investigating data issues...")
    
    # Debug information
    print(f"Layers with density: {layer_df['density'].notna().sum():,}")
    print(f"Layers with positive density: {(layer_df['density'] > 0).sum():,}")
    print(f"Density statistics:")
    print(layer_df['density'].describe())


=== BERGFELD ELASTIC MODULUS: DENSITY-METHOD-AGNOSTIC ANALYSIS ===

DEBUG: Checking current data state...
Total layers in layer_df: 371,429
Layers with density: 247,294
Layers with density > 0: 247,251
Density range: 0.0 to 973.4 kg/m³
Sample density values: [112.24199999999999, 146.738, 137.3, 99.9, 127.4, 68.301, 295.4, 251.6, 170.6, 275.29999999999995]

Using existing elastic modulus calculations from previous cells...
Layers with both density and elastic modulus: 247,251

Dataset Overview:
  - Total Pits: 50,147
  - Total Layers: 371,429
  - Layers with Valid Density: 247,251 (66.6%)
  - Layers with Successful Elastic Modulus Calculation: 247,251 (66.6%)

BERGFELD METHOD PERFORMANCE:
  ✓ Yield (Success Rate): 66.6%
  ✓ Average Relative Uncertainty: nan%

Elastic Modulus Statistics (GPa):
  - Mean: 0.000 ± 0.000
  - Median: 0.000
  - Range: 0.000 to 0.000

Density Range for Successful Calculations:
  - Range: 0 to 973 kg/m³
  - Mean: 213 kg/m³
  - Low density (<200 kg/m³): 97,671 la

Data Visualization

Create visualizations to better understand the results and distribution of elastic modulus values.


In [None]:
# Visualizations for Density-Method-Agnostic Analysis

# Create comprehensive visualizations for the corrected analysis
if 'layers_with_emod' in locals() and len(layers_with_emod) > 0:
    # Filter out any remaining NaN values for plotting
    plot_data = layers_with_emod.dropna(subset=['elastic_modulus_fixed', 'elastic_modulus_relative_uncertainty_fixed'])
    
    if len(plot_data) == 0:
        print("No valid data available for visualization after removing NaN values.")
    else:
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        
        # 1. Histogram of corrected elastic modulus values
        emod_values = plot_data['elastic_modulus_fixed'].dropna()
        if len(emod_values) > 0:
            axes[0, 0].hist(emod_values, bins=50, alpha=0.7, edgecolor='black', color='skyblue')
            axes[0, 0].set_xlabel('Elastic Modulus (GPa)')
            axes[0, 0].set_ylabel('Frequency')
            axes[0, 0].set_title('Distribution of Elastic Modulus Values\n(Bergfeld Method - Fixed)')
            axes[0, 0].grid(True, alpha=0.3)
        else:
            axes[0, 0].text(0.5, 0.5, 'No valid data', ha='center', va='center', transform=axes[0, 0].transAxes)
        
        # 2. Scatter plot: Density vs Elastic Modulus (corrected)
        scatter_data = plot_data.dropna(subset=['density', 'elastic_modulus_fixed', 'elastic_modulus_relative_uncertainty_fixed'])
        if len(scatter_data) > 0:
            # Cap uncertainty values for better color scaling
            uncertainty_capped = np.clip(scatter_data['elastic_modulus_relative_uncertainty_fixed'], 0, 50)
            scatter = axes[0, 1].scatter(scatter_data['density'], scatter_data['elastic_modulus_fixed'], 
                                        alpha=0.6, s=15, c=uncertainty_capped, 
                                        cmap='viridis', vmin=0, vmax=50)
            axes[0, 1].set_xlabel('Density (kg/m³)')
            axes[0, 1].set_ylabel('Elastic Modulus (GPa)')
            axes[0, 1].set_title('Elastic Modulus vs Density\n(Color = Relative Uncertainty %)')
            axes[0, 1].grid(True, alpha=0.3)
            plt.colorbar(scatter, ax=axes[0, 1], label='Relative Uncertainty (%)')
        else:
            axes[0, 1].text(0.5, 0.5, 'No valid data', ha='center', va='center', transform=axes[0, 1].transAxes)
        
        # 3. Histogram of relative uncertainties (corrected)
        uncertainty_values = plot_data['elastic_modulus_relative_uncertainty_fixed'].dropna()
        # Filter out extreme outliers for better visualization
        uncertainty_filtered = uncertainty_values[uncertainty_values <= 100]  # Cap at 100%
        
        if len(uncertainty_filtered) > 0:
            axes[0, 2].hist(uncertainty_filtered, bins=30, alpha=0.7, 
                           edgecolor='black', color='lightcoral')
            axes[0, 2].set_xlabel('Relative Uncertainty (%)')
            axes[0, 2].set_ylabel('Frequency')
            axes[0, 2].set_title('Distribution of Elastic Modulus\nRelative Uncertainties')
            axes[0, 2].grid(True, alpha=0.3)
        else:
            axes[0, 2].text(0.5, 0.5, 'No valid data', ha='center', va='center', transform=axes[0, 2].transAxes)
        
        # 4. Success rate by density method
        if 'density_method_stats' in locals() and len(density_method_stats) > 0:
            methods = [stats['method'] for stats in density_method_stats]
            success_rates = [stats['success_rate'] for stats in density_method_stats]
            colors = plt.cm.Set3(np.linspace(0, 1, len(methods)))
            
            bars = axes[1, 0].bar(methods, success_rates, alpha=0.8, color=colors)
            axes[1, 0].set_ylabel('Success Rate (%)')
            axes[1, 0].set_title('Elastic Modulus Calculation Success Rate\nby Density Method')
            axes[1, 0].tick_params(axis='x', rotation=45)
            
            # Add value labels on bars
            for bar, value in zip(bars, success_rates):
                axes[1, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                               f'{value:.1f}%', ha='center', va='bottom', fontsize=9)
        else:
            axes[1, 0].text(0.5, 0.5, 'No method stats available', ha='center', va='center', transform=axes[1, 0].transAxes)
        
        # 5. Density distribution by method
        density_methods = plot_data['density_method'].unique()
        valid_methods = [m for m in density_methods if pd.notna(m)]
        
        if len(valid_methods) > 0:
            for i, method in enumerate(valid_methods):
                method_data = plot_data[plot_data['density_method'] == method]
                density_vals = method_data['density'].dropna()
                if len(density_vals) > 0:
                    axes[1, 1].hist(density_vals, bins=30, alpha=0.6, 
                                   label=f'{method} (n={len(density_vals):,})', density=True)
            
            axes[1, 1].set_xlabel('Density (kg/m³)')
            axes[1, 1].set_ylabel('Density')
            axes[1, 1].set_title('Density Distribution by Method')
            axes[1, 1].legend()
            axes[1, 1].grid(True, alpha=0.3)
        else:
            axes[1, 1].text(0.5, 0.5, 'No valid method data', ha='center', va='center', transform=axes[1, 1].transAxes)
        
        # 6. Elastic modulus by density ranges
        density_ranges = ['<200', '200-400', '≥400']
        low_density = plot_data[plot_data['density'] < 200]['elastic_modulus_fixed'].dropna()
        med_density = plot_data[(plot_data['density'] >= 200) & (plot_data['density'] < 400)]['elastic_modulus_fixed'].dropna()
        high_density = plot_data[plot_data['density'] >= 400]['elastic_modulus_fixed'].dropna()
        
        range_data = [low_density, med_density, high_density]
        # Only include ranges that have data
        valid_ranges = []
        valid_labels = []
        for i, (data, label) in enumerate(zip(range_data, density_ranges)):
            if len(data) > 0:
                valid_ranges.append(data)
                valid_labels.append(f'{label}\n(n={len(data)})')
        
        if len(valid_ranges) > 0:
            bp = axes[1, 2].boxplot(valid_ranges, labels=valid_labels, patch_artist=True)
            colors = ['lightblue', 'lightgreen', 'lightyellow'][:len(valid_ranges)]
            for patch, color in zip(bp['boxes'], colors):
                patch.set_facecolor(color)
            
            axes[1, 2].set_xlabel('Density Range (kg/m³)')
            axes[1, 2].set_ylabel('Elastic Modulus (GPa)')
            axes[1, 2].set_title('Elastic Modulus Distribution\nby Density Range')
            axes[1, 2].grid(True, alpha=0.3)
        else:
            axes[1, 2].text(0.5, 0.5, 'No valid range data', ha='center', va='center', transform=axes[1, 2].transAxes)
        
        plt.tight_layout()
        plt.show()
        
        # Summary statistics table
        print(f"\n=== SUMMARY TABLE ===")
        print(f"Valid data points for visualization: {len(plot_data):,}")
        print(f"{'Density Method':<15} {'Count':<8} {'Success Rate':<12} {'Avg E (GPa)':<12} {'Avg Unc (%)':<12}")
        print("-" * 65)
        
        if 'density_method_stats' in locals():
            for stats in density_method_stats:
                method = stats['method']
                count = stats['successful_emod']
                success_rate = stats['success_rate']
                avg_emod = stats['avg_emod']
                avg_unc = stats['avg_uncertainty']
                
                # Handle NaN values in output
                emod_str = f"{avg_emod:.3f}" if not np.isnan(avg_emod) else "N/A"
                unc_str = f"{avg_unc:.2f}" if not np.isnan(avg_unc) else "N/A"
                
                print(f"{method:<15} {count:<8,} {success_rate:<12.1f} {emod_str:<12} {unc_str:<12}")
        
        # Overall statistics
        if len(plot_data) > 0:
            overall_emod = plot_data['elastic_modulus_fixed'].mean()
            overall_unc = plot_data['elastic_modulus_relative_uncertainty_fixed'].mean()
            overall_rate = len(plot_data) / len(layer_df) * 100
            
            emod_str = f"{overall_emod:.3f}" if not np.isnan(overall_emod) else "N/A"
            unc_str = f"{overall_unc:.2f}" if not np.isnan(overall_unc) else "N/A"
            
            print(f"\n{'OVERALL':<15} {len(plot_data):<8,} {overall_rate:<12.1f} {emod_str:<12} {unc_str:<12}")

else:
    print("No data available for visualization. Please run the analysis cell first.")


No valid data available for visualization after removing NaN values.


Optional: Save Results to CSV

Uncomment the line below to save the results to a CSV file for further analysis.


In [None]:
# Uncomment the line below to save results to CSV
# layer_df.to_csv('bergfeld_elastic_modulus_results.csv', index=False)
print("Analysis complete! Results are stored in the layer_df DataFrame.")
