### Summary of survey results and meta data

Meta data includes dimensional data (beach length and surface area), number of partcipants, time to complete the survey (including counting, sorting and data entry) and the weights of platic items, micro-plastics and the total weight.

This notebook aggregates the daily survey results to pieces/meter and 'total pieces' and 'number of different categories' for each survey.

Those values are added to the meta data for each survey. The charts and data that result puts the number and variety of objects identified in the context of time, manpower, surface area and weight.

In [1]:
import numpy as np
import json
import csv
import datetime
import collections
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import seaborn as sns
import matplotlib.dates as mdates
from matplotlib import cm
from matplotlib import colors as mcolors
from matplotlib.ticker import MultipleLocator
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
from utilities.utility_functions import *
from utilities.style_title_suptitle import *
from utilities.summary_qc_utilities import *

In [2]:
# index slicing will be used 
idx = pd.IndexSlice

# store the curent working directory
here = os.getcwd()

# name the folders where the output will be sored:
folders = ["data", "charts", "utilities"]

# check for existing and if not make folders
check_for_folders(folders, here)

# make a dictionary to call path by directory name
my_folders = make_folders(folders, here)

# make a project folder to store charts
project_name = "summary_dims_daily_totals/"
project_directory = make_project_folder(my_folders['charts'], project_name)

print(project_directory)

Directory already in place
/home/mwshovel/dev/analyst/survey_qc/survey_qc/charts/summary_dims_daily_totals/


In [3]:
# Read the JSON data in from local
waterbody_groups = json_file_get('/home/mwshovel/dev/analyst/survey_qc/survey_qc/data/waterbody_groups.json')

In [4]:
# transform the the waterbody groups from an array to a python dictionary
water_body_names = {x['location']:x['beaches'] for x in waterbody_groups}

# Read in the .csv data
survey_results = pd.read_csv('/home/mwshovel/dev/analyst/survey_qc/survey_qc/data/survey_results.csv')
dims_df = pd.read_csv('/home/mwshovel/dev/analyst/survey_qc/survey_qc/data/dims_data.csv')

# make a timestamp column from the JSON string date
survey_results['py_date'] = pd.to_datetime(survey_results['date'])

# add key the daily survey results to the daily dimensional data
add_survey_summary(survey_results, dims_df)

dims_df['pcs_m_2'] = dims_df['quantity']/dims_df['area']

In [5]:
# these columns from the dims_df will be the index for a summary DF 
summary_survey_index = [
    'length',
    'area',
    'quantity',
    'time_minutes',
    'total_w',
    'mac_plast_w',
    'mic_plas_w',
    'pcs_m',
    'pcs_m_2'  
]

# these are the values for each variable that need to be reported
summary_survey_columns = [
    'min',
    'max',
    'average',
    'median'
]

# these should be the same as summary_survey_index
columns_of_interest = [
    'length',
    'area',
    'quantity',
    'time_minutes',
    'total_w',
    'mac_plast_w',
    'mic_plas_w',
    'pcs_m',
    'pcs_m_2'  
]

In [6]:
survey_data_summary = add_data_to_report_summary(dims_df, summary_survey_index, summary_survey_columns, columns_of_interest)
df_names = {
    'all_summary_data':dims_df,
    'survey_data_summary':survey_data_summary,
}
files_to_csv(df_names, my_folders['data'])

In [None]:
# assign variables

# the location of interest:
water_body = 'Lac Léman'
file_name = '{}-common-2015-2019'.format('lac-leman')

# dates for slicing data
start_date = '2015-11-15'
end_date = '2019-12-31'

# dates for the figure title
min_date = "Nov-2015"
max_date = "Dec-2019"

# legend and title contents
chart_title = 'All objects found'
figure_title = water_body
legend_title = 'Object description: quantity'

# Axis labels
y_axis = axis_label_props(label='Number of objects found')

x_axis = axis_label_props(label='*Identified items equal to less than 2% of the total.\nCaps and lids are plastic unless noted.')


# provide a color map https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
color_map = 'PuBuGn'

# choose wether to use a tight layout or not:
tight_layout = False

# get the relevant data
df_location = survey_data[survey_data['location'].isin(water_body_names[water_body])]


In [None]:
# define the date range
date_range = start_end_date(start_date,end_date, "%Y-%m-%d") 

# apply date date range to df
a_df = get_data_by_date_range(df_location, date_range)


# define the style and position for the chart title and the figure title
title_style = title_styles(fs=12,
                           ff='sans-serif',
                           fw='normal',
                           color='black',
                          )

sup_title_style = title_styles(fs=14,
                               ff='sans-serif',
                               fw='roman',
                               color='black'
                              )

the_title_position = title_position(pad=15,
                                    loc='left',
                                   )

sup_title_position = the_sup_title_position(x=0.13,
                                            va='baseline',
                                            ha='left',
                                            y=0.98
                                           )

# set the title and figure title
the_title = title_content(label=chart_title)

the_sup_title=title_content(label=figure_title)

# set the legend style and the location of the bounding box
the_legend_style = legend_style(title=legend_title)

# set the legend title and alignment
legend_title = legend_t_align()

# adjust the distance from the top of the chart to the plotting area
subplot_params = adjust_subplot_params()

# file save paramteters
save_this = file_params(project_directory, file_name, ['.svg','.jpeg'])

kwargs = {
     "min_date":min_date,
     "max_date":max_date,
     "a_df":a_df,
     "percent":.02,
     "date_range":date_range,
     "title_style":title_style,
     "the_title_position":the_title_position,
     "the_title":the_title,
     "sup_title_style":sup_title_style,
     "sup_title_position":sup_title_position,
     "the_sup_title":the_sup_title,
     "code_dict":code_dict,
     "the_legend_style":the_legend_style,
     "legend_title":legend_title,
     "color_map":color_map,
     "y_axis":y_axis,
     "x_axis":x_axis,
     "subplot_params":subplot_params,
     "save_this":save_this,
     "tight_layout":tight_layout
 }