# Snow Density Calculations

This notebook demonstrates methods for calculating density from common snow pit measurements (hand hardness and grain form) using both the local snowpyt_mechparams package and snowpylot for CAAML parsing.


In [4]:
# Import Libraries
import os
import xml.etree.ElementTree as ET
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch
import glob

# Import snowpylot for CAAML parsing
from snowpylot import caaml_parser

# Add the src directory to the path to import snowpyt_mechparams
sys.path.append('../src')
from snowpyt_mechparams import density


Parse Snowpit Files

In [None]:
all_pits = []
failed_files = []

folder_path = 'data'

xml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')]

for file in xml_files:
    try:
        file_path = os.path.join(folder_path, file)
        pit = caaml_parser(file_path)
        all_pits.append(pit)
    except Exception as e:
        failed_files.append((file, str(e)))
        print(f"Warning: Failed to parse {file}: {e}")

print(f"Successfully parsed {len(all_pits)} files")
print(f"Failed to parse {len(failed_files)} files")


Found 46063 XML files to process
Successfully parsed 46063 files
Failed to parse 0 files


Collect relevant data for each pit and convert to form needed to implement Geldsetzer

In [13]:
def convert_grain_form(grain_form_obj):
    """
    Convert grain form object to code needed for Geldsetzer table.
    
    Parameters:
    grain_form_obj: Grain form object from CAAML data
    
    Returns:
    str: Grain form code for Geldsetzer table lookup, or None if not mappable
    """
    # Handle None grain form objects
    if grain_form_obj is None:
        return None
    
    # Check if sub_grain_class_code exists and is in our target codes
    if (hasattr(grain_form_obj, 'sub_grain_class_code') and 
        grain_form_obj.sub_grain_class_code and
        grain_form_obj.sub_grain_class_code in ["PPgp", "RGmx", "FCmx"]):
        return grain_form_obj.sub_grain_class_code
    
    # Otherwise, return basic grain class code if available
    if hasattr(grain_form_obj, 'basic_grain_class_code') and grain_form_obj.basic_grain_class_code:
        return grain_form_obj.basic_grain_class_code
    
    return None


# Collect relevant data from each snowpit
pit_info = []
layer_info = []

for pit in all_pits:
    pit_dict = {
        'pit_id': pit.core_info.pit_id,
        'layer_count': len(pit.snow_profile.layers),
    }
    pit_info.append(pit_dict)

    for layer in pit.snow_profile.layers:
        # Create base layer dictionary
        layer_dict = {
            'pit_id': pit.core_info.pit_id,
            'depth_top': layer.depth_top,
            'thickness': layer.thickness,
            'hand_hardness': layer.hardness,
            'wetness': layer.wetness,
            'layer_of_concern': layer.layer_of_concern,
            'grain_form_primary': layer.grain_form_primary,
            'grain_form_secondary': layer.grain_form_secondary,
        }
        
        # Add geldsetzer grain form conversion if grain form data exists
        if layer.grain_form_primary:
            layer_dict['geldsetzer_grain_form'] = convert_grain_form(layer.grain_form_primary)
        else:
            layer_dict['geldsetzer_grain_form'] = None
            
        layer_info.append(layer_dict)

# Create a dataframe from the pit and layer info
pit_df = pd.DataFrame(pit_info)
layer_df = pd.DataFrame(layer_info)


In [14]:
# Dataset Summary
print("=== Dataset Summary ===")
print(f"Total number of snowpits: {len(all_pits)}")
print(f"Total number of layers: {len(layer_df)}")

# Filter layers with both hand hardness and grain form data
layers_with_data = layer_df[layer_df['hand_hardness'].notna() & layer_df['grain_form_primary'].notna()]
print(f"Layers with both hand hardness and grain form data: {len(layers_with_data)}")
print(f"Percentage of layers with complete data: {len(layers_with_data)/len(layer_df)*100:.1f}%")


=== Dataset Summary ===
Total number of snowpits: 46063
Total number of layers: 340803
Layers with both hand hardness and grain form data: 257379
Percentage of layers with complete data: 75.5%


Implement Geldsetzer Method

In [None]:
# Calculate density using Geldsetzer method for each layer
def calculate_layer_density(row):
    try:
        # Only calculate if we have a mapped grain form
        if pd.isna(row['geldsetzer_grain_form']):
            return np.nan
            
        return density.calculate_density(
            method='geldsetzer',
            hand_hardness=row['hand_hardness'],
            grain_form=row['geldsetzer_grain_form']
        )
    except ValueError as e:
        # Return NaN for cases where density cannot be calculated
        return np.nan

# Add density column to the dataframe
layers_with_data = layers_with_data.copy()  # Avoid SettingWithCopyWarning
layers_with_data['density'] = layers_with_data.apply(calculate_layer_density, axis=1)

# Show summary of calculated densities
print("=== Density Calculation Results ===")
print(f"Layers with data and calculated density: {layers_with_data['density'].notna().sum()}")
print(f"Layers with data where density could not be calculated: {layers_with_data['density'].isna().sum()}")
print(f"Success rate of layers with data: {layers_with_data['density'].notna().sum()/len(layers_with_data)*100:.1f}%")
print(f"Success rate of all layers: {layers_with_data['density'].notna().sum()/len(layer_df)*100:.1f}%")


=== Density Calculation Results ===
Layers with calculated density: 183950
Layers with data where density could not be calculated: 73429
Success rate of layers with data: 71.5%
Success rate of all layers: 54.0%
