Summary Stats
* Number of pits
* Number of layers
* Information about the layers
* Types of tests
* Information about the tests
* How many have density?


In [4]:
# Import libraries
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
from caaml_parser import caaml_parser

In [6]:
# Load and parse xml files

folder_path = "snowpits/wumph_pits" # wumph data set pits exported on 2/19/25

# Create a list of all CAAML files in the folder
caaml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')] # List of all CAAML files in the folder

In [7]:
# Print example pit
pit1 = caaml_parser(folder_path + '/' + caaml_files[0])
print(pit1)

SnowPit: 
 caamlVersion: {http://caaml.org/Schemas/SnowProfileIACS/v6.0.3} 
 pitID: 25670 
 Date: 2020-11-12 
 User:
    OperationID: None
    OperationName: None
    Professional: None
    ContactPersonID: None
    ContactPersonName: None
 Location:
    Latitude: None
    Longitude: None
    Elevation: ['3642', 'm']
    Aspect: ['SW', None]
    SlopeAngle: ['25', 'deg']
    Country: US
    Region: CO
 Snow Profile: 
    measurementDirection: top down
    profileDepth: [79.0, 'cm']
    hS: [79.0, 'cm']
    surfCond: 
	 windLoading: previous
	 penetrationFoot: None
	 penetrationSki: [20.0, 'cm']
    Layers:
    Layer 1: 
	 depthTop: ['0', 'cm']
	 thickness: ['19', 'cm']
	 hardness: F+
	 hardnessTop: None
	 hardnessBottom: None
	 grainFormPrimary: 
		 grainForm: PP
		 grainSizeAvg: [1.5, 'mm']
		 grainSizeMax: None
		 basicGrainClass_code: PP
		 basicGrainClass_name: Precipitation particles
		 subGrainClass_code: None
		 subGrainClass_name: None
	 grainFormSecondary: 
		 grainForm: DF
		

In [11]:

# Parse all pits and summarize available info

pitList=[]
pitDict={}

for file in caaml_files:
    file_path = folder_path + '/' + file
    pit = caaml_parser(file_path)

    pitDict = {
        'PitID': pit.pitID,
        'numLayers': len(pit.snowProfile.layers),
        'layers_w_Density': sum(1 for layer in pit.snowProfile.layers if layer.density != None),
        'ECT_qty': len(pit.stabilityTests.ECT),
        'CT_qty': len(pit.stabilityTests.CT),
        'PST_qty': len(pit.stabilityTests.PST),
        'RBT_qty': len(pit.stabilityTests.RBT),
        'whumpfCracking': pit.wumphData.wumphCracking,
        'whumpfNoCracking': pit.wumphData.wumphNoCracking,
        'crackingNoWhumpf': pit.wumphData.crackingNoWhumpf,
        'whumpfNearPit': pit.wumphData.whumpfNearPit,
        'whumpfDepthWeakLayer': pit.wumphData.whumpfDepthWeakLayer,
        'whumpfTriggeredRemoteAva': pit.wumphData.whumpfTriggeredRemoteAva,
        'whumpfSize': pit.wumphData.whumpfSize
    }
    pitList.append(pitDict)

df = pd.DataFrame(pitList)
print(df)


print(df['PitID'].count())
   

     PitID  numLayers  layers_w_Density  ECT_qty  CT_qty  PST_qty  RBT_qty  \
0    25670          6                 0        1       0        0        0   
1    25671          4                 0        1       0        0        0   
2    25672          5                 0        1       0        0        0   
3    25680          2                 0        1       0        0        0   
4    25683          4                 0        0       0        0        0   
..     ...        ...               ...      ...     ...      ...      ...   
301  69403          4                 0        1       0        0        0   
302  71246          8                 0        1       0        0        0   
303  71923          5                 0        4       0        0        0   
304  72292          5                 0        1       1        0        0   
305  72324          5                 0        3       0        0        0   

    whumpfCracking whumpfNoCracking crackingNoWhumpf whumpfNear

In [10]:
print("Count of Pits")
print(df['PitID'].count())

print("Sum of Layers")
print(df['numLayers'].sum())

print("Layers w/Density info")
print(df['layers_w_Density'].sum())

print("Sum of ECT Results")
print(df['ECT_qty'].sum())

print("Number of CT Results")
print(df['CT_qty'].sum())

print("Number of PST Results")
print(df['PST_qty'].sum())

print("Number of RBT Results")
print(df['RBT_qty'].sum())

print("Count of wumpfCracking = true")
print((df['whumpfCracking']=='true').sum())

Count of Pits
306
Sum of Layers
1903
Layers w/Density info
0
Sum of ECT Results
418
Number of CT Results
78
Number of PST Results
81
Number of RBT Results
1
Count of wumpfCracking = true
179
