Summary Stats
* Number of pits
* Number of layers
* Information about the layers
* Types of tests
* Information about the tests
* How many have density?


In [1]:
# Import libraries
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
from caaml_parser import caaml_parser

In [2]:
# Load and parse xml files

folder_path = "snowpits/wumph_pits" # wumph data set pits exported on 2/19/25

# Create a list of all CAAML files in the folder
caaml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')] # List of all CAAML files in the folder

In [6]:

# Parse all pits and summarize available info

pitList=[]
pitDict={}

for file in caaml_files:
    file_path = folder_path + '/' + file
    pit = caaml_parser(file_path)


    numPrimaryGrainForm = 0;
    numPrimaryGrainSize = 0;
    for layer in pit.snowProfile.layers:
        if layer.grainFormPrimary is not None:
            numPrimaryGrainForm += 1
            if layer.grainFormPrimary.grainSizeAvg is not None:
                numPrimaryGrainSize += 1

    pitDict = {
        'PitID': pit.pitID,
        'num Layers': len(pit.snowProfile.layers),
        'num Layers wPrimary Grain Form': numPrimaryGrainForm,
        'num Layers wPrimary Grain Size': numPrimaryGrainSize,
        'tempMeasurements': len(pit.snowProfile.tempProfile) if pit.snowProfile.tempProfile != None else 0,
        'densityMeasurements': len(pit.snowProfile.densityProfile) if pit.snowProfile.densityProfile != None else 0,
        'ECT_qty': len(pit.stabilityTests.ECT),
        'CT_qty': len(pit.stabilityTests.CT),
        'PST_qty': len(pit.stabilityTests.PST),
        'RBT_qty': len(pit.stabilityTests.RBT),
        'whumpfCracking': pit.wumphData.wumphCracking,
        'whumpfNoCracking': pit.wumphData.wumphNoCracking,
        'crackingNoWhumpf': pit.wumphData.crackingNoWhumpf,
        'whumpfNearPit': pit.wumphData.whumpfNearPit,
        'whumpfDepthWeakLayer': pit.wumphData.whumpfDepthWeakLayer,
        'whumpfTriggeredRemoteAva': pit.wumphData.whumpfTriggeredRemoteAva,
        'whumpfSize': pit.wumphData.whumpfSize
    }
    pitList.append(pitDict)

df = pd.DataFrame(pitList)
print(df)


print(df['PitID'].count())
   

     PitID  num Layers  num Layers wPrimary Grain Form  \
0    25670           6                               6   
1    25671           4                               4   
2    25672           5                               5   
3    25680           2                               2   
4    25683           4                               4   
..     ...         ...                             ...   
301  69403           4                               3   
302  71246           8                               8   
303  71923           5                               5   
304  72292           5                               5   
305  72324           5                               5   

     num Layers wPrimary Grain Size  tempMeasurements  densityMeasurements  \
0                                 6                 0                    0   
1                                 4                 0                    0   
2                                 4                 0                

In [10]:
summary_info = {
    'Pits': int(df['PitID'].count()),
    'Layers': int(df['num Layers'].sum()),
    'Layers wPrimary Grain Form': int(df['num Layers wPrimary Grain Form'].sum()),
    'Layers wPrimary Grain Size': int(df['num Layers wPrimary Grain Size'].sum()),
    'Pits with Density Info': int((df['densityMeasurements']!=0).sum()),
    'Density Measurements': int(df['densityMeasurements'].sum()),
    'Pits with Temp Info': int((df['tempMeasurements']!=0).sum()),
    'Temp Measurements': int(df['tempMeasurements'].sum()),
    'ECT Results': int(df['ECT_qty'].sum()),
    'CT Results': int(df['CT_qty'].sum()),
    'PST Results': int(df['PST_qty'].sum()),
    'RBT Results': int(df['RBT_qty'].sum()),
    'Pits where wumpfCracking = true': int((df['whumpfCracking']=='true').sum()),
    'Pits where whumpfNoCracking = true': int((df['whumpfNoCracking']=='true').sum()),
    'Pits where crackingNoWhumpf = true': int((df['crackingNoWhumpf']=='true').sum()),
    'Pits where whumpfNearPit = true': int((df['whumpfNearPit']=='true').sum()),
    'Pits where whumpfDepthWeakLayer = true': int((df['whumpfDepthWeakLayer']=='true').sum()),
    'Pits where whumpfTriggeredRemoteAva = true': int((df['whumpfTriggeredRemoteAva']=='true').sum()),
    'Pits where whumpfSize is specified': int((df['whumpfSize'] != None).sum())
}

for key,value in summary_info.items():
    print(key + ": " + str(value))

Pits: 306
Layers: 1903
Layers wPrimary Grain Form: 1840
Layers wPrimary Grain Size: 1143
Pits with Density Info: 7
Density Measurements: 31
Pits with Temp Info: 27
Temp Measurements: 218
ECT Results: 418
CT Results: 78
PST Results: 81
RBT Results: 1
Pits where wumpfCracking = true: 179
Pits where whumpfNoCracking = true: 105
Pits where crackingNoWhumpf = true: 24
Pits where whumpfNearPit = true: 235
Pits where whumpfDepthWeakLayer = true: 150
Pits where whumpfTriggeredRemoteAva = true: 21
Pits where whumpfSize is not None: 306


In [5]:
# Print example pit
pit1 = caaml_parser(folder_path + '/' + caaml_files[0])
print(pit1)

SnowPit: 
 caamlVersion: {http://caaml.org/Schemas/SnowProfileIACS/v6.0.3} 
 pitID: 25670 
 Date: 2020-11-12 
 User:
    OperationID: None
    OperationName: None
    Professional: None
    ContactPersonID: None
    ContactPersonName: None
 Location:
    Latitude: None
    Longitude: None
    Elevation: ['3642', 'm']
    Aspect: ['SW', None]
    SlopeAngle: ['25', 'deg']
    Country: US
    Region: CO
 Snow Profile: 
    measurementDirection: top down
    profileDepth: [79.0, 'cm']
    weatherConditions: 
	 skyCond: SCT
	 precipTI: Nil
	 airTempPres: [-9.5, 'degC']
	 windSpeed: M
	 windDir: SW
    hS: [79.0, 'cm']
    surfCond: 
	 windLoading: previous
	 penetrationFoot: None
	 penetrationSki: [20.0, 'cm']
    Layers:
    Layer 1: 
	 depthTop: ['0', 'cm']
	 thickness: ['19', 'cm']
	 hardness: F+
	 hardnessTop: None
	 hardnessBottom: None
	 grainFormPrimary: 
		 grainForm: PP
		 grainSizeAvg: [1.5, 'mm']
		 grainSizeMax: None
		 basicGrainClass_code: PP
		 basicGrainClass_name: Precipit