# Summary Statistics and Comparisons by Snow Year

In [1]:
# Import libraries
import os
import pandas as pd
import matplotlib.pyplot as plt
from snowpylot.caaml_parser import caaml_parser

In [2]:
def parse_pits(folder_path):
    '''
    Function to parse CAAML files in the specified folder
    '''

    files = [f for f in os.listdir(folder_path) if f.endswith(".xml")]  # List of all .xml files in the folder
    
    pits_list = []

    for file in files: # iterate through each file in the folder
        file_path = folder_path + "/" + file # create the file path
        pit = caaml_parser(file_path) # parse the file
        pits_list.append(pit)

    return pits_list

In [4]:
# Define folders and create a list of all CAAML files

pits_19_20 = parse_pits("snowpits/by_season/2019-2020")
pits_20_21 = parse_pits("snowpits/by_season/2020-2021")
pits_21_22 = parse_pits("snowpits/by_season/2021-2022")
pits_22_23 = parse_pits("snowpits/by_season/2022-2023")
pits_23_24 = parse_pits("snowpits/by_season/2023-2024")

# 2019-2020 
folder_path = "snowpits/by_season/2019-2020"  

caaml_files_19_20 = [
    f for f in os.listdir(folder_path) if f.endswith(".xml")
]  # List of all CAAML files in the folder

# 2020-2021 
folder_path = "snowpits/by_season/2019-2020"  

caaml_files_20_21 = [
    f for f in os.listdir(folder_path) if f.endswith(".xml")
]  # List of all CAAML files in the folder

# 2021-2022 
folder_path = "snowpits/by_season/2019-2020"  

caaml_files_21_22 = [
    f for f in os.listdir(folder_path) if f.endswith(".xml")
]  # List of all CAAML files in the folder

# 2022-2023 
folder_path = "snowpits/by_season/2019-2020"  

caaml_files_22_23 = [
    f for f in os.listdir(folder_path) if f.endswith(".xml")
]  # List of all CAAML files in the folder

In [5]:
# Parse 2019-2020 CAAML files

pitList = []

for file in caaml_files_19_20: # iterate through each file in the folder
    file_path = folder_path + "/" + file # create the file path
    pit = caaml_parser(file_path) # parse the file

    numPrimaryGrainForm = 0 # initialize
    numPrimaryGrainSize = 0 # initialize
    for layer in pit.snowProfile.layers: # iterate through each layer in the pit
        if layer.grainFormPrimary is not None: # if the layer has a primary grain form
            numPrimaryGrainForm += 1 # increment the number of primary grain forms
            if layer.grainFormPrimary.grainSizeAvg is not None: # if the layer has a primary grain size
                numPrimaryGrainSize += 1 # increment the number of primary grain sizes

    pitDict = { # create a dictionary of the pit information for each pit
        "PitID": pit.coreInfo.pitID,
        "Operation Name": pit.coreInfo.user.operationName,
        "SnowPilot Username": pit.coreInfo.user.username,
        "num Layers": len(pit.snowProfile.layers),
        "num Layers wPrimary Grain Form": numPrimaryGrainForm,
        "num Layers wPrimary Grain Size": numPrimaryGrainSize,
        "tempMeasurements": len(pit.snowProfile.tempProfile)
        if pit.snowProfile.tempProfile != None
        else 0,
        "densityMeasurements": len(pit.snowProfile.densityProfile)
        if pit.snowProfile.densityProfile != None
        else 0,
        "ECT_qty": len(pit.stabilityTests.ECT),
        "CT_qty": len(pit.stabilityTests.CT),
        "PST_qty": len(pit.stabilityTests.PST),
        "RBT_qty": len(pit.stabilityTests.RBlock)
    }
    pitList.append(pitDict) # append the pit dictionary to the list

df = pd.DataFrame(pitList)


KeyError: 'PPhl'

In [None]:
# Create a dictionary to summarize available info

summary_info = {
    "Pits": int(df["PitID"].count()), # number of pits  
    "Layers": int(df["num Layers"].sum()), # number of layers
    "Layers wPrimary Grain Form": int(df["num Layers wPrimary Grain Form"].sum()), # number of layers with primary grain form
    "Layers wPrimary Grain Size": int(df["num Layers wPrimary Grain Size"].sum()), # number of layers with primary grain size
    "Pits with Density Info": int((df["densityMeasurements"] != 0).sum()), # number of pits with density info
    "Density Measurements": int(df["densityMeasurements"].sum()), # number of density measurements
    "Pits with Temp Info": int((df["tempMeasurements"] != 0).sum()), # number of pits with temp info
    "Temp Measurements": int(df["tempMeasurements"].sum()), # number of temp measurements
    "ECT Results": int(df["ECT_qty"].sum()), # number of ECT results
    "CT Results": int(df["CT_qty"].sum()), # number of CT results
    "PST Results": int(df["PST_qty"].sum()), # number of PST results
    "RBT Results": int(df["RBT_qty"].sum()), # number of RBT results
}

for key, value in summary_info.items():
    print(key + ": " + str(value))