# Snow Density Calculations

This notebook demonstrates methods for calculating density from common snow pit measurements (hand hardness and grain form) using both the local snowpyt_mechparams package and snowpylot for CAAML parsing.


In [10]:
# Import Libraries
import os
import xml.etree.ElementTree as ET
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch
import glob

# Import snowpylot for CAAML parsing
from snowpylot import caaml_parser

# Add the src directory to the path to import snowpyt_mechparams
sys.path.append('../src')
from snowpyt_mechparams import density


Parse Snowpit Files

In [11]:
all_pits = []

folder_path = 'data'

for file in os.listdir(folder_path):
    file_path = folder_path + "/" + file  # create the file path
    all_pits.append(caaml_parser(file_path))


In [12]:
# Collect relevant data from each snowpit

pit_info = []
layer_info = []

for pit in all_pits:
    pit_dict = {
        'pit_id': pit.core_info.pit_id,
        'layer_count': len(pit.snow_profile.layers),
    }
    pit_info.append(pit_dict)

    for layer in pit.snow_profile.layers:
        layer_dict = {
            'pit_id': pit.core_info.pit_id,
            'depth_top': layer.depth_top,
            'thickness': layer.thickness,
            'hand_hardness': layer.hardness,
            'wetness': layer.wetness,
            'layer_of_concern': layer.layer_of_concern,
            'grain_form_primary': layer.grain_form_primary,
            'grain_form_secondary': layer.grain_form_secondary,
        }
        layer_info.append(layer_dict)

# Create a dataframe from the pit and layer info
pit_df = pd.DataFrame(pit_info)
layer_df = pd.DataFrame(layer_info)


In [13]:
# Dataset Summary
print("=== Dataset Summary ===")
print(f"Total number of snowpits: {len(all_pits)}")
print(f"Total number of layers: {len(layer_df)}")

# Filter layers with both hand hardness and grain form data
layers_with_data = layer_df[layer_df['hand_hardness'].notna() & layer_df['grain_form_primary'].notna()]
print(f"Layers with both hand hardness and grain form data: {len(layers_with_data)}")
print(f"Percentage of layers with complete data: {len(layers_with_data)/len(layer_df)*100:.1f}%")


=== Dataset Summary ===
Total number of snowpits: 48044
Total number of layers: 354680
Layers with both hand hardness and grain form data: 266199
Percentage of layers with complete data: 75.1%


In [None]:
# First, let's examine what grain form values are actually in our data
print("=== Grain Form Analysis ===")
print("Unique grain form values in data:")
unique_grain_forms = layers_with_data['grain_form_primary'].value_counts()
print(unique_grain_forms.head(20))

print(f"\nUnique hand hardness values in data:")
unique_hardness = layers_with_data['hand_hardness'].value_counts()
print(unique_hardness.head(20))


Implement Geldsetzer Method

In [None]:
def convert_grain_form(grain_form_str):
    """
    Convert grain form string to code needed for Geldsetzer table.
    
    Parameters:
    grain_form_str: String representing grain form from CAAML data
    
    Returns:
    str: Grain form code for Geldsetzer table lookup, or None if not mappable
    """
    # Direct mappings for exact matches
    direct_mappings = {
        'PP': 'PP',
        'PPgp': 'PPgp', 
        'DF': 'DF',
        'RG': 'RG',
        'RGmx': 'RGmx',
        'FC': 'FC',
        'FCmx': 'FCmx',
        'DH': 'DH'
    }
    
    # First check for direct matches
    if grain_form_str in direct_mappings:
        return direct_mappings[grain_form_str]
    
    # Handle common variations and mappings
    grain_form_upper = grain_form_str.upper() if isinstance(grain_form_str, str) else str(grain_form_str).upper()
    
    # Common mappings based on CAAML standards
    if 'PP' in grain_form_upper or 'PRECIP' in grain_form_upper:
        return 'PP'
    elif 'DF' in grain_form_upper or 'DECOMP' in grain_form_upper:
        return 'DF'
    elif 'RG' in grain_form_upper or 'ROUND' in grain_form_upper:
        return 'RG'
    elif 'FC' in grain_form_upper or 'FACET' in grain_form_upper:
        return 'FC'
    elif 'DH' in grain_form_upper or 'DEPTH' in grain_form_upper or 'HOAR' in grain_form_upper:
        return 'DH'
    else:
        return None  # Cannot map to Geldsetzer table

# Apply grain form conversion
layers_with_data = layers_with_data.copy()
layers_with_data['geldsetzer_grain_form'] = layers_with_data['grain_form_primary'].apply(convert_grain_form)

# Show conversion results
print("=== Grain Form Conversion Results ===")
conversion_success = layers_with_data['geldsetzer_grain_form'].notna().sum()
print(f"Layers with mappable grain forms: {conversion_success}")
print(f"Layers with unmappable grain forms: {len(layers_with_data) - conversion_success}")
print(f"Conversion success rate: {conversion_success/len(layers_with_data)*100:.1f}%")

print("\nMapped grain form distribution:")
print(layers_with_data['geldsetzer_grain_form'].value_counts().dropna())

# Calculate density using Geldsetzer method for each layer


TypeError: tuple indices must be integers or slices, not str

In [None]:
# Calculate density using Geldsetzer method for each layer
def calculate_layer_density(row):
    try:
        # Only calculate if we have a mapped grain form
        if pd.isna(row['geldsetzer_grain_form']):
            return np.nan
            
        return density.calculate_density(
            method='geldsetzer',
            hand_hardness=row['hand_hardness'],
            grain_form=row['geldsetzer_grain_form']
        )
    except ValueError as e:
        # Return NaN for cases where density cannot be calculated
        return np.nan

# Add density column to the dataframe
layers_with_data = layers_with_data.copy()  # Avoid SettingWithCopyWarning
layers_with_data['density'] = layers_with_data.apply(calculate_layer_density, axis=1)

# Show summary of calculated densities
print("=== Density Calculation Results ===")
print(f"Layers with calculated density: {layers_with_data['density'].notna().sum()}")
print(f"Layers where density could not be calculated: {layers_with_data['density'].isna().sum()}")
print(f"Success rate: {layers_with_data['density'].notna().sum()/len(layers_with_data)*100:.1f}%")


=== Density Calculation Results ===
Layers with calculated density: 0
Layers where density could not be calculated: 266199
Success rate: 0.0%
