### process_aco_dm_form.ipynb
The following script is used to process incoming device_magic forms for ACO Snow Surveys.

It is scripted to automatically parse data for multiple survey locations (Cruickshank, Englishman, Tsitika, Metro_Van) and perform the following operations separately for each:

1. Extract and rename desired columns from the DM form.
2. Generate UTM coordinates for each sample, using provided GNSS data.
3. Output a summary report for each survey location that includes summary statistics for each plot_id.
4. Output a detailed spreadsheet for each survey location that includes XXX.
5. Output a less-detailed spreadsheet that contains data necessary for use in the XXX model.

In [1]:
#import libraries
import numpy as np
import pandas as pd
import utm
import warnings
import os

**USER INPUTS:**

In [2]:
# input flight number (AUTOMATE?)
flt_no = '1'

# provide target .csv file for DM form
path = r"G:\ACO\2024"
file = "CRU_24_P01.csv"

**CODE:**

In [38]:
# read file and get column names
df_file = pd.read_csv(file)
file_cols = df_file.columns

# set strings for each study area
study_area = ['Cruickshank', 'Englishman', 'Metro Vancouver', 'Russell Creek']
study_area_abr = ['CRU', 'EGM' , 'MV', 'TSI']

# read in fieldnames spreadsheet (contains python colnames and current and historical DMform names for both .csv and google sheets)
df_cols = pd.read_csv('form_fieldnames .csv')

# df_cols['read_flag'].astype('bool')
ix_keep = df_cols['read_flag'].apply(lambda x: True if x == 1 else False)

# initialize dataframe
df = pd.DataFrame(columns=df_cols['post_process'][ix_keep])

# loop fields that we want to keep
for ii in df_cols['post_process'][ix_keep].index:
    # if the file contains data for this field (any historical naming convention)
    if any(np.in1d(df_cols.iloc[ii,:], file_cols)): 
        # take that data and enter it into new df with post-processing column name
        df[df_cols['post_process'][ii]] = df_file[np.unique(df_cols.iloc[ii,np.in1d(df_cols.iloc[ii,:], file_cols)])]
        
#  add ACO flight no.
df.insert(0, 'aco_flight_number', str(flt_no))

# fill nan depths with 0 (WHY?)
# df[['depth_final_cm', 'depth_max']] = df[['depth_final_cm', 'depth_max']].fillna(value=0)

# populate snow_depth (== depth values from both density and depth surveys combined in one field)
df.insert(df.columns.get_loc('multicore'), 'snow_depth',  np.nansum(df['depth_final_cm'] + df['depth_max']))

# loop study areas and export cleaned spreadsheet and summary spreadheet for each
for ii in df["study_area"].unique():
    ix = study_area.index(ii)
    
    # get data for study area
    df_area = df[df['study_area'] == ii]
    
    # export cleaned spreadsheet
    # df_area.to_csv(study_area_abr[ix] + '_trip' + str(flt_no) + '_DMform_clean.csv', index=False)
    df_area.to_csv('test_output_DMform_clean.csv', index=False)

    # get summary statistics
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        df_area_summary = df_area.groupby(['aco_flight_number','plot_id']).agg({
        "snow_depth": ["mean", "median", "std", "count"],
        "density_gscale": ["mean", "median", "std", "count"],
        "density_swescale": ["mean", "median", "std", "count"],
        "swe_final_swescale": ["mean", "median", "std", "count"],
        "swe_final_swescale": ["mean", "median", "std", "count"]})
    
    #Export the filtered summary stats to created folder
    # df_area_summary.to_csv(study_area_abr[ix] + '_trip' + str(flt_no) + '_DMform_summary.csv', index=True)
    df_area_summary.to_csv('test_output_summary.csv', index=True)
    
# template = "templates\cardinal_dir_template.csv"
file_template = "cardinal_dir_template.csv"
template = pd.read_csv(file_template, dtype={"cardinal": "string", "distance_m": "float64"})

# When known - SPECIFY COLUMNS HERE
file_gnss = "GNSS_cru_phase1_points.csv"
gnss = pd.read_csv(file_gnss, usecols=['plot_id', 'Easting_m', 'Northing_m'])

#  find entries that appear to be cardinal plots, but don't have the plot type entered (and assign plot type)
df.loc[pd.isnull(df['plot_type']) & ~pd.isnull(df['cardinal_dir']), 'plot_type'] = 'Cardinal 10 m'

# filter by cardinal plots assign to cardinal variable and to road transect variable.
df_not_cardinal = df_card = df[df["plot_type"]!="Cardinal 10 m"]
df = df[df["plot_type"]=="Cardinal 10 m"]

# add eastings and northings according to plot id
df = df.merge(gnss, on='plot_id')

# adjust eastings and northings according to sample distance from centre
# define dictionary containing angles for each cardinal direction
cardinal_ang = {
    'E': 0,
    'NE': 45,
    'N': 90,
    'NW': 135,
    'W': 180,
    'SW': 225,
    'S': 270,
    'SE': 315}

# get angles for each data point
ang = df["cardinal_dir"].apply(lambda x: cardinal_ang.get(x))

# use angles and distance from center to adjust eastings and northings
df['Easting_m'] = df['Easting_m'] + np.round(np.cos(np.deg2rad(ang)), 3) * df['distance_m']
df['Northing_m'] = df['Northing_m'] + np.round(np.sin(np.deg2rad(ang)), 3) * df['distance_m']

# fill any "0" coords (e.g. CRU plot R1O) with 999999
df.loc[df['Easting_m'] < 100, 'Easting_m'] = 999999
df.loc[df['Northing_m'] < 100, 'Northing_m'] = 999999

# get lat/lons?
# print(np.unique(df['Easting_m']))
(df['lat'], df['lon']) = utm.to_latlon(df['Easting_m'], df['Northing_m'], 10, 'U')

In [55]:
# CHECKS
tmp = pd.DataFrame(columns=['p', 'd', 'n']) # Note that there is no row data inserted.# distance to centre not entered
np.isnan(df['distance_m'])

# depth values for all entries?
np.sum(np.isnan(df['snow_depth']))

# coords for all entries
np.sum(np.isnan(df['Easting_m']))

# flag filled coordinate values
np.sum(df['Easting_m'] == 999999)

# at least 1 measurement for each cardinal dir (??)
c = 0
for ii in np.unique(df['plot_id']):
    print(ii, len(np.unique(df.loc[df['plot_id'] == ii, 'cardinal_dir'])) != 9)    


E3S False
E5F False
E5O False
N1A False
N3O True
R1O True
S1A False
S2F False
S3F False
S4F False
S5O False
