## Snowpit Layer Information - RQ1

 Create tables of value and qty for the following properties of layers in a group of snow pits
 

 **Done**
* grainFormPrimary
* grainFormSecondary
* hardness

**Possible**
* depthTop
* thickness
* wetness

Snow Pit Group 1: 200 snow pits from Montana

folder: snowpits_200_MT


In [1]:
# Import librarys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from xml.dom import minidom
from Layer import Layer

# Define the path to snowpits folder
folder_path = "snowpits_200_MT" # The latest 200 snow pits from MT

In [2]:
# iterate through caaml files and create list of layer objects
# layerslist is a list of layer objects for all the layers in all the caaml files

#caaml_parser in process

layersList = [] # Initialize list of layer objects

# Create a list of all CAAML files in the folder
caaml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')] # List of all CAAML files in the folder

# Iterate through each file
for file in caaml_files:
        
        # Parse the file
        file_path = folder_path + '/' + file
        doc = minidom.parse(file_path)
        root = doc.documentElement

        # Get all Layer nodes
        layers = root.getElementsByTagName('caaml:Layer')

        # Process each layer
        for layer in layers:
            # Initialize layer object parameters
            depthTop = None
            thickness = None
            grainFormPrimary = None
            grainFormSecondary = None
            hardness = None
            wetness = None

            for node in layer.childNodes:
                if node.localName == 'depthTop':
                    depthTop = node.firstChild.nodeValue                
                if node.localName == 'thickness':
                    thickness = node.firstChild.nodeValue
                if node.localName == 'grainFormPrimary':
                    grainFormPrimary = node.firstChild.nodeValue
                if node.localName == 'grainFormSecondary':
                    grainFormSecondary = node.firstChild.nodeValue
                if node.localName == 'hardness':
                    hardness = node.firstChild.nodeValue
                if node.localName == 'wetness':
                    wetness = node.firstChild.nodeValue

            # Create layer object
            layer_i = Layer(depthTop,thickness,grainFormPrimary,grainFormSecondary,hardness,wetness)
            layersList.append(layer_i)
    

AttributeError: 'Layer' object has no attribute 'set_grainClass'

In [None]:
layer1=layersList[1]
print(layer1)
print(vars(layer1))
print(layer1.get_depthTop())
#print(layer1.get_grainFormPrimaryClass())
#print(layer1.get_grainFormPrimarySubClass())    

In [None]:
# Create a dataframe from the list of layer objects
df = pd.DataFrame([vars(layer) for layer in layersList]) 
print(df)

In [None]:
# Create a value counts table for grainFormPrimary
grain_form_table = df['grainFormPrimary'].value_counts().reset_index()
grain_form_table.columns = ['Grain Form', 'Count']

# Add percentage column
grain_form_table['Percentage'] = (grain_form_table['Count'] / grain_form_table['Count'].sum() * 100).round(1)

# Display the table
print("\nPrimary Grain Form Distribution:")
print(grain_form_table.to_string(index=False))

#Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(grain_form_table['Grain Form'], grain_form_table['Count'])
plt.xticks(rotation=45, ha='right')
plt.title('Distribution of Primary Grain Forms for 200 MT Snow Pits')
plt.xlabel('Grain Form')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

In [None]:
# Create a value counts table for hardness
hardness_table = df['hardness'].value_counts().reset_index()
hardness_table.columns = ['Hardness', 'Count']

# Add percentage column
hardness_table['Percentage'] = (hardness_table['Count'] / hardness_table['Count'].sum() * 100).round(1)

# Display the table
print("\nHardness Distribution:")
print(hardness_table.to_string(index=False))

#Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(hardness_table['Hardness'], hardness_table['Count'])
plt.xticks(rotation=45, ha='right')
plt.title('Distribution of Hardness for 200 MT Snow Pits')
plt.xlabel('Hardness')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

In [None]:
# Create a crosstable
crosstab = pd.crosstab(df['grainFormPrimary'], df['hardness'])

# Create a heatmap  
sns.heatmap(crosstab, annot=True, cmap="YlGnBu")
plt.show()

In [None]:
# Create a value counts table for grainFormSecondary
grain_form_table = df['grainFormSecondary'].value_counts().reset_index()
grain_form_table.columns = ['Grain Form', 'Count']

# Add percentage column
grain_form_table['Percentage'] = (grain_form_table['Count'] / grain_form_table['Count'].sum() * 100).round(1)

# Display the table
print("\nPrimary Grain Form Distribution:")
print(grain_form_table.to_string(index=False))

#Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(grain_form_table['Grain Form'], grain_form_table['Count'])
plt.xticks(rotation=45, ha='right')
plt.title('Distribution of Secondary Grain Forms for 200 MT Snow Pits')
plt.xlabel('Grain Form')
plt.ylabel('Count')
plt.tight_layout()
plt.show()