### process_aco_dm_form.ipynb
The following script is used to process incoming device_magic forms for ACO Snow Surveys.

It is scripted to automatically parse data for multiple survey locations (Cruickshank, Englishman, Tsitika, Metro_Van) and perform the following operations separately for each:

1. Extract and rename desired columns from the DM form.
2. Generate UTM coordinates for each sample, using provided GNSS data.
3. Output a summary report for each survey location that includes summary statistics for each plot_id.
4. Output a detailed spreadsheet for each survey location that includes XXX.
5. Output a less-detailed spreadsheet that contains data necessary for use in the XXX model.

In [71]:
#import libraries
import numpy as np
import pandas as pd
import os

**USER INPUTS:**

In [72]:
# input flight number (AUTOMATE?)
flt_no = '1'

# provide target .csv file for DM form
path = r"G:\ACO\2024"
file = "CRU_24_P01.csv"

**CODE:**

In [121]:
# read file
df = pd.read_csv(file)

# set strings for each study area
study_area = ['Cruickshank', 'Englishman', 'Metro Vancouver', 'Russell Creek']
study_area_abr = ['CRU', 'EGM' , 'MV', 'TSI']

 # select columns to keep and new column names
cols2keep = np.array(["Survey_Start_Time",
      "username", 
      "Study_Area", 
      "Other_Study_Area", 
      "User_s_", 
      "Plot_ID", 
      "Tube_Name", 
      "Pre_Survey_Notes", 
      "GNSS_Used_", 
      "GNSS_Setup", 
      "Other_GNSS_Setup",
      "GNSS_Status",
      "GNSS_Height_Rover_to_Snow__cm_", 
      "Tare_Weight__g_",
      "Tare_Weight__cm_", 
      "Type_of_Plot", 
      "Point_Observation.Cardinal_Direction", 
      "Point_Observation.Distance_From_Centre__m_",
      "Point_Observation.Custom_Distance_From_Centre__m_",
      "Point_Observation.Plot_Features", 
      "Point_Observation.Sample_Type", 
      "Point_Observation.Depth__cm_", 
      "Point_Observation.Depth__cm_:timestamp", 
      "Point_Observation.Depth_Final__cm_",
      "Point_Observation.Core_Length__cm_", 
      "Point_Observation.Plug__cm_", 
      "Point_Observation.SWE_cm",
      "Point_Observation.Mass___Tube__g_",
      "Point_Observation.Multi_Part_Core_",
      "Point_Observation.Multi_Part_Core_Section_Number", 
      "Point_Observation.Additional_Measurements.Multi_Part_Core_Section_Number_copy",
      "Point_Observation.Additional_Measurements.New_Depth__cm_", 
      "Point_Observation.Additional_Measurements.New_Depth__cm_:timestamp", 
      "Point_Observation.Additional_Measurements.Core_Section_Length__cm_",
      "Point_Observation.Additional_Measurements.Plug_", 
      "Point_Observation.Additional_Measurements.SWE",
      "Point_Observation.Additional_Measurements.Mass___Tube", 
      "Point_Observation.Core_Length_Final__cm_",
      "Point_Observation.Core_Features", 
      "Point_Observation.Depth_of_Saturation", 
      "Point_Observation.Mass_Final__g_", 
      "Point_Observation.SWE_Final__cm_",    
      "Point_Observation.Depth_Max",
      "Point_Observation.Retrieval____", 
      "Point_Observation.SWE__cm_", 
      "Point_Observation.Density", 
      "Point_Observation.Density_MetroVan",
      "Point_Observation.Sample_Rating", 
      "Point_Observation.Point_Observation_Notes", 
      "Snow_Pit_Measurement.Distance_from_centre_of_plot__m_", 
      "Snow_Pit_Measurement.Depth_above_ground__cm_", 
      "Snow_Pit_Measurement.Temperature___C_",
      "Snow_Pit_Measurement.Density_Notes", 
      "Survey_End_TIme"])

new_colnames = np.array(["plot_datetime",
    "user_name",
    "study_area",
    "other_study_area",
    "users",
    "plot_id",
    "tube_name",
    "pre_survey_notes",
    "gnss_unit",
    "gnss_setup",
    "other_gnss_setup",
    "gnss_status",
    "rover_height",
    "tare_weight_g",
    "tare_weight_cm",
    "plot_type",
    "cardinal",
    "distance_m",
    "custom_distance",
    "plot_features",
    "sample_type",
    "depth_cm",
    "depth_timestamp",
    "depth_final_cm",
    "core_length_cm",
    "plug_cm",
    "swe_cm",
    "mass_tube_g_drop",
    "multi_core",
    "multi_part_core_num",
    "multi_core_num_copy",
    "depth_cm_new",
    "depth_cm_timestamp_new",
    "core_section_length_cm",
    "plug_cm_copy",
    "core_swe",
    "mass_tube_g_new",
    "core_length_final",
    "core_features",
    "depth_of_saturation",
    "mass_final_g",
    "swe_final_cm",
    "depth_max",
    "retrieval",
    "swe_cm_drop",
    "density_drop",
    "density",
    "sample_rating",
    "notes",
    "snow_pit_distance_from_centre",
    "snow_pit_depth_above_ground",
    "snow_pit_temperature",
    "snow_pit_density",
    "survey_end"])

# find columns to keep that exist in spreadsheet
ix = np.in1d(cols2keep, df.columns, assume_unique=True)

 # extract and rename these columns
df = df[cols2keep[ix]].set_axis(new_colnames[ix], axis='columns')

#  add ACO flight no.
df.insert(0, 'aco_flight_number', str(flt_no))

# fill nan depths with 0
df[['depth_final_cm', 'depth_max']] = df[['depth_final_cm', 'depth_max']].fillna(value=0)

# calculate snow depth (for multi-cores?)
df.insert(df.columns.get_loc('multi_core'), 'snow_depth',  df['depth_final_cm'] + df['depth_max'])

In [110]:
# NEED TO KNOW WHAT VARABLES ARE TO BE INCLUDED (SOME ARE READ ABOVE AND THEN DROPPED HERE...? a MV VS. OTHER SITE THING?)

# df = new_df.drop(columns=['submissionid', 
#                                 'submissiondatetime', 
#                                 'deviceid', 
#                                 'user_name', 
#                                 'Sampling_Design',
#                                 'Point_Observation.Abbreviations', 
#                                 'Point_Observation.Picture', 
#                                 'Point_Observation.Number_of_probe_extensions_used_',
#                                 'mass_tube_g_drop',
#                                 'mass_final_g',
#                                 'swe_cm_drop',
#                                 'density_drop'])


Index(['aco_flight_number', 'plot_datetime', 'user_name', 'study_area',
       'other_study_area', 'users', 'plot_id', 'tube_name', 'pre_survey_notes',
       'gnss_unit', 'gnss_setup', 'other_gnss_setup', 'gnss_status',
       'rover_height', 'tare_weight_g', 'plot_type', 'cardinal', 'distance_m',
       'custom_distance', 'plot_features', 'sample_type', 'depth_cm',
       'depth_timestamp', 'depth_final_cm', 'core_length_cm', 'plug_cm',
       'mass_tube_g_drop', 'snow_depth', 'multi_core', 'multi_part_core_num',
       'multi_core_num_copy', 'depth_cm_new', 'depth_cm_timestamp_new',
       'core_section_length_cm', 'plug_cm_copy', 'mass_tube_g_new',
       'core_length_final', 'core_features', 'depth_of_saturation',
       'mass_final_g', 'depth_max', 'retrieval', 'swe_cm_drop', 'density_drop',
       'sample_rating', 'notes', 'snow_pit_distance_from_centre',
       'snow_pit_depth_above_ground', 'snow_pit_temperature',
       'snow_pit_density', 'survey_end'],
      dtype='object')

In [126]:
# loop study areas and export cleaned spreadsheet and summary spreadheet for each
for ii in df["study_area"].unique():
    ix = study_area.index(ii)
    
    # get data for study area
    df_area = df[df['study_area'] == ii]
    
    # export cleaned spreadsheet
    df_area.to_csv(study_area_abr[ix] + '_trip' + str(flt_no) + '_DMform_clean.csv', index=False)
    
    # get summary statistics
    df_area_summary = df_area.groupby(['aco_flight_number','plot_id']).agg({
    "snow_depth": ["mean", "median", "std", "count"],
    "density": ["mean", "median", "std", "count"],
    "swe_cm": ["mean", "median", "std", "count"]})
    
    #check if directory exists
    # if not os.path.isdir(path + r"\Cruickshank\4_field_data\plots\working\P" + str(flt_no)):
    #if the folder path is not presnet, then create it
    # os.makedirs(path + r"\Cruickshank\4_field_data\plots\working\P" + str(flt_no))
    
    #Export the filtered summary stats to created folder
    df_area_summary.to_csv(study_area_abr[ix] + '_trip' + str(flt_no) + '_DMform_summary.csv', index=True)
    

KeyError: "Column(s) ['density', 'swe_cm'] do not exist"