Summary Stats
* Number of pits
* Number of layers
* Information about the layers
* Types of tests
* Information about the tests
* How many have density?


In [1]:
# Import libraries
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
from caaml_parser import caaml_parser

pit3
SnowPit: 
 caamlVersion: {http://caaml.org/Schemas/SnowProfileIACS/v6.0.3} 
 pitID: 72805 
 Date: 2025-02-23 
 User:
    OperationID: None
    OperationName: None
    Professional: None
    ContactPersonID: None
    ContactPersonName: None
 Location:
    Latitude: 45.8174
    Longitude: -110.8966
    Elevation: ['2134', 'm']
    Aspect: ['SE', None]
    SlopeAngle: ['32', 'deg']
    Country: US
    Region: MT
 Snow Profile: 
    measurementDirection: top down
    profileDepth: [70.0, 'cm']
    weatherConditions: 
	 skyCond: OVC
	 precipTI: -SN
	 airTempPres: [5.0, 'degC']
	 windSpeed: C
	 windDir: SE
    hS: [168.0, 'cm']
    surfCond: 
	 windLoading: yes
	 penetrationFoot: [20.0, 'cm']
	 penetrationSki: [5.0, 'cm']
    Layers:
    Layer 1: 
	 depthTop: ['0', 'cm']
	 thickness: ['20', 'cm']
	 hardness: F
	 hardnessTop: None
	 hardnessBottom: None
	 grainFormPrimary: 
		 grainForm: DF
		 grainSizeAvg: [8.0, 'mm']
		 grainSizeMax: [3.0, 'mm']
		 basicGrainClass_code: DF
		 basicGrai

In [2]:
# Load and parse xml files

folder_path = "snowpits/wumph_pits" # wumph data set pits exported on 2/19/25

# Create a list of all CAAML files in the folder
caaml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')] # List of all CAAML files in the folder

In [9]:

# Parse all pits and summarize available info

pitList=[]
pitDict={}

for file in caaml_files:
    file_path = folder_path + '/' + file
    pit = caaml_parser(file_path)

    pitDict = {
        'PitID': pit.pitID,
        'numLayers': len(pit.snowProfile.layers),
        'tempMeasurements': len(pit.snowProfile.tempProfile) if pit.snowProfile.tempProfile != None else 0,
        'densityMeasurements': len(pit.snowProfile.densityProfile) if pit.snowProfile.densityProfile != None else 0,
        'ECT_qty': len(pit.stabilityTests.ECT),
        'CT_qty': len(pit.stabilityTests.CT),
        'PST_qty': len(pit.stabilityTests.PST),
        'RBT_qty': len(pit.stabilityTests.RBT),
        'whumpfCracking': pit.wumphData.wumphCracking,
        'whumpfNoCracking': pit.wumphData.wumphNoCracking,
        'crackingNoWhumpf': pit.wumphData.crackingNoWhumpf,
        'whumpfNearPit': pit.wumphData.whumpfNearPit,
        'whumpfDepthWeakLayer': pit.wumphData.whumpfDepthWeakLayer,
        'whumpfTriggeredRemoteAva': pit.wumphData.whumpfTriggeredRemoteAva,
        'whumpfSize': pit.wumphData.whumpfSize
    }
    pitList.append(pitDict)

df = pd.DataFrame(pitList)
print(df)


print(df['PitID'].count())
   

     PitID  numLayers  tempMeasurements  densityMeasurements  ECT_qty  CT_qty  \
0    25670          6                 0                    0        1       0   
1    25671          4                 0                    0        1       0   
2    25672          5                 0                    0        1       0   
3    25680          2                 0                    0        1       0   
4    25683          4                 0                    0        0       0   
..     ...        ...               ...                  ...      ...     ...   
301  69403          4                 0                    0        1       0   
302  71246          8                 0                    0        1       0   
303  71923          5                 0                    0        4       0   
304  72292          5                 0                    0        1       1   
305  72324          5                 0                    0        3       0   

     PST_qty  RBT_qty whump

In [11]:
print("Count of Pits")
print(df['PitID'].count())

print("Sum of Layers")
print(df['numLayers'].sum())

print("Count of Pits with Density Info")
print((df['densityMeasurements']!=0).sum())

print("Num of Density Measurements")
print(df['densityMeasurements'].sum())

print("Count of Pits with Temp Info")
print((df['tempMeasurements']!=0).sum())

print("Num of Temp Measurements")
print(df['tempMeasurements'].sum())

print("Num of ECT Results")
print(df['ECT_qty'].sum())

print("Number of CT Results")
print(df['CT_qty'].sum())

print("Number of PST Results")
print(df['PST_qty'].sum())

print("Number of RBT Results")
print(df['RBT_qty'].sum())

print("Count of wumpfCracking = true")
print((df['whumpfCracking']=='true').sum())

Count of Pits
306
Sum of Layers
1903
Count of Pits with Density Info
7
Num of Density Measurements
31
Count of Pits with Temp Info
27
Num of Temp Measurements
218
Sum of ECT Results
418
Number of CT Results
78
Number of PST Results
81
Number of RBT Results
1
Count of wumpfCracking = true
179


In [3]:
# Print example pit
pit1 = caaml_parser(folder_path + '/' + caaml_files[0])
print(pit1)

SnowPit: 
 caamlVersion: {http://caaml.org/Schemas/SnowProfileIACS/v6.0.3} 
 pitID: 25670 
 Date: 2020-11-12 
 User:
    OperationID: None
    OperationName: None
    Professional: None
    ContactPersonID: None
    ContactPersonName: None
 Location:
    Latitude: None
    Longitude: None
    Elevation: ['3642', 'm']
    Aspect: ['SW', None]
    SlopeAngle: ['25', 'deg']
    Country: US
    Region: CO
 Snow Profile: 
    measurementDirection: top down
    profileDepth: [79.0, 'cm']
    weatherConditions: 
	 skyCond: SCT
	 precipTI: Nil
	 airTempPres: [-9.5, 'degC']
	 windSpeed: M
	 windDir: SW
    hS: [79.0, 'cm']
    surfCond: 
	 windLoading: previous
	 penetrationFoot: None
	 penetrationSki: [20.0, 'cm']
    Layers:
    Layer 1: 
	 depthTop: ['0', 'cm']
	 thickness: ['19', 'cm']
	 hardness: F+
	 hardnessTop: None
	 hardnessBottom: None
	 grainFormPrimary: 
		 grainForm: PP
		 grainSizeAvg: [1.5, 'mm']
		 grainSizeMax: None
		 basicGrainClass_code: PP
		 basicGrainClass_name: Precipit