In [21]:
# Import Libraries
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from uncertainties import ufloat
import warnings


# Add the src directory to the path to import snowpyt_mechparams
sys.path.append('../src')
from snowpilot_utils import parse_sample_pits


In [22]:
# Parse all snowpit files from the data folder
all_pits = parse_sample_pits('data')


Successfully parsed 50278 files
Failed to parse 0 files


In [28]:
# Collect Data
pit_info = []
layer_info = []
density_info = []
stability_test_info = []

for pit in all_pits:
    pit_info_dict = {
        # User info
        'pit_id': pit.core_info.pit_id,
        'user_id': pit.core_info.user.user_id,
        # Location
        'country': pit.core_info.location.country,
        'slope_angle': pit.core_info.location.slope_angle,
        'pit_near_avalanche': pit.core_info.location.pit_near_avalanche,
        'pit_near_avalanche_location': pit.core_info.location.pit_near_avalanche_location,
        # Stability tests
        'num_ct': len(pit.stability_tests.CT) if pit.stability_tests.CT else 0,
        'num_ect': len(pit.stability_tests.ECT) if pit.stability_tests.ECT else 0,
        'num_pst': len(pit.stability_tests.PST) if pit.stability_tests.PST else 0
    }
    pit_info.append(pit_info_dict)

    for layer in pit.snow_profile.layers:
        # Safely get grain_form_primary attribute
        grain_form_primary = getattr(layer, 'grain_form_primary', None)
        primary_grain_form = getattr(grain_form_primary, 'grain_form', None) if grain_form_primary else None
        
        # Safely get grain_size_primary attribute
        primary_grain_size = getattr(grain_form_primary, 'grain_size_avg', None) if grain_form_primary else None
        
        layer_info_dict = {
            'pit_id': pit.core_info.pit_id,
            'hardness': layer.hardness,
            'primary_grain_form': primary_grain_form,
            'primary_grain_size': primary_grain_size
        }
        layer_info.append(layer_info_dict)

pit_df = pd.DataFrame(pit_info)
layer_df = pd.DataFrame(layer_info)


In [29]:
## Pit Properties

# Total pits
total_pits = len(pit_df)
print(f"Total pits: {total_pits}")

# Unique users
unique_users = pit_df['user_id'].nunique()
print(f"Unique users: {unique_users}")

# Unique Countries
unique_countries = pit_df['country'].nunique()
print(f"Unique countries: {unique_countries}")

# Total CT results
total_ct = pit_df['num_ct'].sum()
print(f"Total CT results: {total_ct}")

# Total ECT results
total_ect = pit_df['num_ect'].sum()
print(f"Total ECT results: {total_ect}")

# Total PST results
total_pst = pit_df['num_pst'].sum()
print(f"Total PST results: {total_pst}")

# Total count and % of pits with "pit_near_avalanche" = True
pits_near_avalanche = pit_df['pit_near_avalanche'].sum()
pct_near_avalanche = (pits_near_avalanche / total_pits) * 100
print(f"Pits near avalanche: {pits_near_avalanche} ({pct_near_avalanche:.2f}%)")

# Total count and % of pits with "pit_near_avalanche_location" = Crown
pits_crown = (pit_df['pit_near_avalanche_location'] == 'crown').sum()
pct_crown = (pits_crown / total_pits) * 100
print(f"Pits at avalanche crown: {pits_crown} ({pct_crown:.2f}%)")

# Total count and % of pits with "pit_near_avalanche_location" = Flank
pits_flank = (pit_df['pit_near_avalanche_location'] == 'flank').sum()
pct_flank = (pits_flank / total_pits) * 100
print(f"Pits at avalanche flank: {pits_flank} ({pct_flank:.2f}%)")

# Total count and % of pits with slope angle measurement
pits_with_slope = pit_df['slope_angle'].notna().sum()
pct_with_slope = (pits_with_slope / total_pits) * 100
print(f"Pits with slope angle measurement: {pits_with_slope} ({pct_with_slope:.2f}%)")


Total pits: 50278
Unique users: 5381
Unique countries: 35
Total CT results: 51599
Total ECT results: 47684
Total PST results: 6213
Pits near avalanche: 1568 (3.12%)
Pits at avalanche crown: 795 (1.58%)
Pits at avalanche flank: 399 (0.79%)
Pits with slope angle measurement: 45515 (90.53%)


In [None]:
## Layer Info

# Total layers
total_layers = len(layer_df)
print(f"Total layers: {total_layers}")

# Layers with hand hardness
layers_with_hardness = layer_df['hardness'].notna().sum()
pct_with_hardness = (layers_with_hardness / total_layers) * 100
print(f"Layers with hand hardness: {layers_with_hardness} ({pct_with_hardness:.2f}%)")

# Layers with grain form
layers_with_grain_form = layer_df['primary_grain_form'].notna().sum()
pct_with_grain_form = (layers_with_grain_form / total_layers) * 100
print(f"Layers with primary grain form: {layers_with_grain_form} ({pct_with_grain_form:.2f}%)")

# Layers with grain size
layers_with_grain_size = layer_df['primary_grain_size'].notna().sum()
pct_with_grain_size = (layers_with_grain_size / total_layers) * 100
print(f"Layers with primary grain size: {layers_with_grain_size} ({pct_with_grain_size:.2f}%)")


Total layers: 371429
Layers with hand hardness: 336888 (90.70%)
Layers with grain form: 303726 (81.77%)
Layers with grain size: 176044 (47.40%)
