In [1]:
# sys, file and nav packages:
import os
import datetime as dt
import csv

# math packages:
import pandas as pd
import numpy as np
from scipy import stats
import datetime as dt 
from statsmodels.distributions.empirical_distribution import ECDF
from scipy.stats import kendalltau, pearsonr, spearmanr
import math

# charting:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import ticker
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib.gridspec import GridSpec
from IPython.display import display, Markdown, Latex, HTML
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap, Colormap

# mapping
import geopandas as gpd

# home brew utitilties
import utilities.utility_functions as ut

# documenting
from IPython.display import Markdown as md


# returns the p_value for each test
def kendall_pval(x,y):
    return kendalltau(x,y)[1]

def pearsonr_pval(x,y):
    return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
    return spearmanr(x,y)[1]


def make_ecdf(somdata, numsamps):
    vals = somdata.pcs_m.sort_values()
    valsy = [i/numsamps for i in np.arange(numsamps)]
    return vals, valsy


def assign_a_level(x, a_list, labels):
    if x in a_list:
        this_level = labels[0]
    else:
        this_level = labels[1]
    return this_level

def count_k(a_string, limit):
    split = a_string.split(" ")
    total = 0
    new_words = []
    for i,word in enumerate(split):
        if (total + len(word))+1 >= limit:
            thisnewword = F"{split[i-1]}..."
            if (len(thisnewword) + total) <= limit:
                del new_words[-1]
                new_words.append(thisnewword)
            else:
                continue
        else:
            total += len(word)+1
            new_words.append(word)

    return " ".join(new_words)

# convenience functions for tables

def make_table_grids(anax):
    anax.grid(False)
    anax.spines["top"].set_visible(False)
    anax.spines["right"].set_visible(False)
    anax.spines["bottom"].set_visible(False)
    anax.spines["left"].set_visible(False)
    return(anax)

def table_fonts(a_table, size=12):
    a_table.auto_set_font_size(False)
    a_table.set_fontsize(size)

# table kwargs
table_k = dict(loc="top left", bbox=(0,0,1,1), colWidths=[.5, .5], cellLoc='center')
tablecenter_k = dict(loc="top left", bbox=(0,0,1,1), cellLoc='center')
tabtickp_k = dict(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelleft=False, labelbottom=False)

# chart kwargs
title_k = {'loc':'left', 'pad':14, 'linespacing':1.5, 'fontsize':12}
title_k14 = {'loc':'left', 'pad':16, 'linespacing':1.5, 'fontsize':14}
title_k20 = {'loc':'left', 'pad':10, 'linespacing':1.5, 'fontsize':12, 'color':'dodgerblue'}
title_k17 = {'loc':'left', 'pad':10, 'linespacing':1.5, 'fontsize':12, 'color':'salmon'}
titler_k20 = {'loc':'right', 'pad':10, 'linespacing':1.5, 'fontsize':12, 'color':'dodgerblue'}
titler_k17 = {'loc':'right', 'pad':10, 'linespacing':1.5, 'fontsize':12, 'color':'salmon'}
xlab_k = {'labelpad':10, 'fontsize':12}
ylab_k = {'labelpad':14, 'fontsize':14}
titler_k = {'loc':'right', 'pad':10, 'linespacing':1.5, 'fontsize':12}
label45r = {'rotation':45, 'ha':'right'}
label45c = {'rotation':45, 'ha':'center'}

# use these to format date axis in charts
weeks = mdates.WeekdayLocator(byweekday=1, interval=4)
onedayweek = mdates.DayLocator(bymonthday=1, interval=1)
everytwoweeks = mdates.WeekdayLocator(byweekday=1, interval=4)

months = mdates.MonthLocator(bymonth=[3,6,9,12])
bimonthly = mdates.MonthLocator(bymonth=[1,3,5,7,9,11])
allmonths = mdates.MonthLocator()
wks_fmt = mdates.DateFormatter('%d')
mths_fmt = mdates.DateFormatter('%b')

# map marker size:
markerSize = 100
survey_data, location_data, code_defs, stat_ent, geo_data, output = ut.make_local_paths()

In [2]:

# def make_group_map(a_dict_of_lists):
#     wiw = {}
#     for group in a_dict_of_lists:
#         keys = a_dict_of_lists[group]
#         a_dict = {x:group for x in keys}
#         wiw.update(**a_dict)
#     return wiw

# these_groups ={k:ut.json_file_get(F"{output}/code_groups/{v}") for k,v in som_names.items()}
# these_groups.update({"fragmented plastics":["G79", "G78", "G75"]})
# group_names = list(these_groups.keys())

# # collect the codes
# accounted = [v for k,v in these_groups.items()]
# accounted = [item for a_list in accounted for item in a_list]

In [3]:
# set some parameters:
start_date = '2020-04-01'
end_date = dt.datetime.today().strftime('%Y-%m-%d')

startyearmonth = '{}/{}'.format(start_date[5:7], start_date[:4])
endyearmonth = '{}/{}'.format(end_date[5:7], end_date[:4]) 

# decide which data to use
aggregated = False


# collect the names:
# group_names = list(these_groups.keys())

# choose a lake:
# lake = 'Lac Léman'
coi = 'Zürich'
bassin_label = 'Limmat'
bassin = ['Limmat', 'Linthkanal', 'Escherkanal', 'Seez', 'Zurichsee', 'Sihl', 'Jona']
# lavey_locs= ['lavey-les-bains-2','lavey-les-bains', 'lavey-la-source']


# define a significant event:
sig = .9
one_minus_sig = (1-sig)

# define explanatory variables:
expv = ['population','streets','buildings','rivs']

# name the folder:
name_of_project = 'laveysummary'

# use this to store things:
project_directory = ut.make_project_folder(output, name_of_project)

# get the data
# aggregated survey data
# dfAgg = pd.read_csv(F"{survey_data}/results_with_zeroes_aggregated_parent.csv")

files_generated = []

# method to save
def add_output(a_name, a_tag, atype="table", fignum=0, a_list=files_generated, data=[]):
    tableonefile = F"{project_directory}/{a_name}"
    files_generated.append({'tag':a_tag, 'number':fignum, 'file':tableonefile,'type':atype})
    if atype == 'data':
        data.to_csv(a_name, index=False)
    else:
        plt.savefig(tableonefile, dpi=300)

# save files
# survey_csv = F"{project_directory}/survey_data.csv"

# files_generated.append(survey_csv)
# useThis.to_csv(survey_csv, index=False)
# data_num = 1
# figname = F"{project_directory}/survey_data.csv"
# atype = "data"
# data = useThis
# atag = 'All survey data'
# add_output(figname, atag, fignum=data_num, atype=atype, data=data)

In [4]:
# non aggregated survey data
dfSurveys = pd.read_csv(F"{survey_data}/results_with_zeroes.csv")
dfSurveys['date'] = pd.to_datetime(dfSurveys['date'])
dfSurveys = dfSurveys[dfSurveys.date >= start_date]
dfSurveys['groupname'] = 'nogroup'

# beach data
dfBeaches = pd.read_csv(F"{location_data}/beaches_with_ranks.csv")
dfBeaches.set_index('slug', inplace=True)
dfBeaches.rename(columns={"NUMPOINTS":"intersects"}, inplace=True)

# code definitions
dfCodes = pd.read_csv(F"{code_defs}/mlw_codes.csv", index_col='code')

group_names_locations = {
    "waste water": "wastewater.json" ,
    "less than 5mm":"codeListMicros.json",
    "construction":"construction.json",
    "food":"foodstuff.json",
    "agg-con-trans":"cat.json",
    "agriculture":"ag.json",
    "tobacco":"tobac.json",
    "recreation":"recreation.json",    
    "packaging":"packaging.json",
    "personal items":"pi.json",    
}
frag_plas = {"fragmented plastics":["G79", "G78", "G75"]}
group_cols = ['loc_date', 'date','location','water_name', 'groupname']
levels={'muni':coi, 'catchment':bassin_label}

these_cols = ['loc_date', 'location', 'water_name', 'date']
foams={'G82':['G82', 'G912'], 'G81':['G81', 'G911'], 'G74':['G74', 'G910', 'G909']}

class PreprocessData:
    """preprocesses data"""
    def __init__(self, data, beaches, these_cols=these_cols, foams=foams, start_date=start_date, end_date=end_date):
        self.data = data.loc[(data.date >= start_date)&(data.date <= end_date)].copy()
        self.these_cols=these_cols
        self.foams=foams        
        self.beaches = beaches
        self.code_maps = self.make_code_maps(self.data, self.these_cols, self.foams)
        self.processed = self.add_exp_group_pop_locdate()
    def make_code_maps(self, data, these_cols, these_codes):
        wiw = {}
        for code in these_codes:
            a_map = data[data.code.isin(these_codes[code])].groupby(these_cols, as_index=False).agg({'pcs_m':'sum', 'quantity':'sum'})
            a_map['code']=code
            wiw.update({code:a_map})
        return wiw
    def agg_foams(self):
#         code_maps = self.make_code_maps(self.data, self.these_cols, self.foams)
        accounted = [v for k,v in self.foams.items()]
        accounted = [item for a_list in accounted for item in a_list]
        remove_foam = self.data[~self.data.code.isin(accounted)].copy()
        foam = [v for k,v in self.code_maps.items()]        
        newdf = pd.concat([remove_foam, *foam])        
        return newdf
    def add_exp_group_pop_locdate(self):
        anewdf = self.agg_foams()
        anewdf['groupname'] = 'groupname'
        anewdf['population']=anewdf.location.map(lambda x: self.beaches.loc[x]['population'])
        anewdf['loc_date'] = list(zip(anewdf.location, anewdf.date))
        return anewdf

In [5]:
a = PreprocessData(dfSurveys.copy(), dfBeaches)
b = a.processed
b.quantity.sum()

46773

In [6]:
dfSurveys.quantity.sum()

46773

In [7]:
clas_kwargs = dict(
    code_group_data=group_names_locations,
    new_code_group=frag_plas,
    levels=levels,
    catchment_features=bassin,
    end_date=end_date,
    start_date=start_date)

class CatchmentArea:
    """aggregates survey results"""
    def __init__(
        self,
        data,
        these_beaches,
        **kwargs):
#         print(new_code_group)
        self.data = data
        self.beaches = these_beaches
        self.start_date = kwargs['start_date']
        self.end_date = kwargs['end_date']
        self.levels = kwargs['levels']
        self.catchment = self.levels['catchment']
        self.muni = self.levels['muni']
        self.locations_in_use = self.data.location.unique()
        self.muni_beaches = self.get_locations_by_region(self.locations_in_use, self.beaches[self.beaches.city == self.muni].index)
        self.catchment_features = kwargs['catchment_features']
        self.bassin_beaches = self.get_locations_by_region(self.locations_in_use, self.beaches[self.beaches.water_name.isin(self.catchment_features)].index)        
        self.codes_in_use = data.code.unique()
        self.group_names_locations = kwargs['code_group_data']
        self.new_code_group = kwargs['new_code_group']
        self.code_groups = self.make_code_groups()
        self.code_group_map = self.make_group_map(self.code_groups)
        self.bassin_data = self.assign_regional_labels_to_data(self.assign_code_groups_to_results(data[data.location.isin(self.bassin_beaches)].copy(), self.code_group_map), self.levels, these_beaches)
        self.muni_data = self.assign_regional_labels_to_data(self.assign_code_groups_to_results(data[data.location.isin(self.muni_beaches)].copy(), self.code_group_map), self.levels, these_beaches)
        self.bassin_code_totals = self.code_totals_regional(self.bassin_data)
        self.muni_code_totals = self.code_totals_regional(self.muni_data)
        
           
    def make_group_map(self,a_dict_of_lists):
        wiw = {}
        for group in a_dict_of_lists:
            keys = a_dict_of_lists[group]
            a_dict = {x:group for x in keys}
            wiw.update(**a_dict)
        return wiw
    
    def make_code_groups(self):
        these_groups ={k:ut.json_file_get(F"{output}/code_groups/{v}") for k,v in self.group_names_locations.items()}
        these_groups.update(self.new_code_group)
        accounted = [v for k,v in these_groups.items()]
        accounted = [item for a_list in accounted for item in a_list]
        the_rest = [x for x in self.codes_in_use if x not in accounted]
        these_groups.update({'the rest':the_rest})
        return these_groups
    
    def assign_code_groups_to_results(self, data, code_group_map):
        data = data.copy()
        data['groupname'] = data.code.map(lambda x: code_group_map[x])
        return data
    
    def tag_regional_label(self,x, levels):
        if x in self.muni_beaches:
            a_label = self.muni
        else:
            a_label = self.catchment
        return a_label
    
    def assign_regional_labels_to_data(self, data, levels, these_beaches):
        data = data.copy()
        data['region'] = data.location.map(lambda x: self.tag_regional_label(x, self.levels))
        data['city'] = data.location.map(lambda x: these_beaches.loc[x]['city'])
        return data
    
    def code_totals_regional(self, data):
        data = data.groupby('code', as_index=False).quantity.sum()
        a_total = data.quantity.sum()
        data['% of total'] = data.quantity/a_total
        return data 
    
    def code_pcsm_regional(self,data):
        return data.groupby('code').pcs_m.median()
    
    def survey_totals_regional(self, data):
        return data.groupby(['region','water_name','loc_date', 'location', 'date'], as_index=False).agg({'pcs_m':'sum', 'quantity':'sum'})
    
    def get_locations_by_region(self, locations_in_use, locations_of_interest):        
        return [x for x in locations_of_interest if x in locations_in_use]        

In [8]:
a = CatchmentArea(b, dfBeaches, **clas_kwargs)

In [9]:
class Table:
    def __init__

Unnamed: 0,date,code,pcs_m,quantity,location,loc_date,water_name,groupname,population,region,city
21952,2021-02-18,G3,0.05,4,pfafikon-bad,"(pfafikon-bad, 2021-02-18 00:00:00)",Zurichsee,packaging,16391,Limmat,Freienbach
21953,2021-02-18,G941,0.05,4,pfafikon-bad,"(pfafikon-bad, 2021-02-18 00:00:00)",Zurichsee,packaging,16391,Limmat,Freienbach
21954,2021-02-18,G33,0.01,1,pfafikon-bad,"(pfafikon-bad, 2021-02-18 00:00:00)",Zurichsee,food,16391,Limmat,Freienbach
21955,2021-02-18,G96,0.01,1,pfafikon-bad,"(pfafikon-bad, 2021-02-18 00:00:00)",Zurichsee,waste water,16391,Limmat,Freienbach
21956,2021-02-18,G67,0.15,12,pfafikon-bad,"(pfafikon-bad, 2021-02-18 00:00:00)",Zurichsee,agg-con-trans,16391,Limmat,Freienbach
...,...,...,...,...,...,...,...,...,...,...,...
315,2020-10-13,G74,0.00,0,zurichsee_wollishofen_langendorfm,"(zurichsee_wollishofen_langendorfm, 2020-10-13...",Zurichsee,construction,415367,Zürich,Zürich
316,2020-11-12,G74,0.00,0,zurichsee_wollishofen_langendorfm,"(zurichsee_wollishofen_langendorfm, 2020-11-12...",Zurichsee,construction,415367,Zürich,Zürich
317,2020-12-10,G74,0.00,0,zurichsee_wollishofen_langendorfm,"(zurichsee_wollishofen_langendorfm, 2020-12-10...",Zurichsee,construction,415367,Zürich,Zürich
318,2021-01-10,G74,0.00,0,zurichsee_wollishofen_langendorfm,"(zurichsee_wollishofen_langendorfm, 2021-01-10...",Zurichsee,construction,415367,Zürich,Zürich


In [10]:
# scatter chart of all project surveys
# use all the surveys un aggregated
allsurveys =  useThis.groupby(['loc_date', 'location', 'date', 'water_name'], as_index=False).pcs_m.sum()

# survey_totals = useThis.groupby(['loc_date','location','water_name', 'date','population'], as_index=False).agg({"pcs_m":"sum", "quantity":"sum"})
allsurveys_pcs_median = allsurveys.groupby('location').pcs_m.median()

# identify lakes v/s rivers
allsurveys['type'] = allsurveys.location.map(lambda x: dfBeaches[dfBeaches.index == x]['water'].values[0])

# group the data for the regional levels
lavey = allsurveys[(allsurveys.location.isin(lavey_locs))].groupby(['loc_date', 'location', 'date', 'water_name'], as_index=False).pcs_m.sum()
bassin_versant = allsurveys[(allsurveys.location.isin(bassin_locs))].groupby(['loc_date', 'location', 'date', 'water_name'], as_index=False).pcs_m.sum()

# count the number of rivers and lakes from all the samples
v_counts = allsurveys['type'].value_counts()
rivercount = int(v_counts['r'])
lakecount = int(v_counts['l'])

# make a table of key statisitics for the different levels:
a_sum = pd.DataFrame(allsurveys.pcs_m.describe()[1:].round(2)).T
a_sum_table = [[x] for x in a_sum.values[0]]
rowLabels = [x for x in list(a_sum.columns)]

# the bassin versant
bassin_sum = pd.DataFrame(bassin_versant.pcs_m.describe()[1:].round(2)).T
bassin_table = [[x] for x in bassin_sum.values[0]]
bassinLabels = [x for x in list(bassin_sum.columns)]

# the municipality
lavey_sum = pd.DataFrame(lavey.pcs_m.describe()[1:].round(2)).T
lavey_table = [[x] for x in lavey_sum.values[0]]
laveyLabels = [x for x in list(lavey_sum.columns)]

NameError: name 'useThis' is not defined

In [None]:
# adjust table kwargs
tablecenter_k = dict(loc="center", bbox=(0,0,1,1), cellLoc='center')

fig = plt.figure(constrained_layout = False, figsize=(12,6))
figure_num += 1

# declare a grid
gs = GridSpec(1, 7, figure=fig)

# put an ax on it
ax1 = fig.add_subplot(gs[5:])

# the context matters for the row and column labels
if is_french:
    rowLabels = summary_row_fr
    col_label = [french_pcm]
else:
    col_label = ['pieces per meter']

# define the table
a_table = mpl.table.table(
    cellText=bassin_table,
    rowLabels=rowLabels,
    rowColours=['antiquewhite' for i in rowLabels],
    colLabels=col_label,
    colColours=['antiquewhite' for col in np.arange(1)],
    
    ax=ax1,
    **tablecenter_k)

def table_format(a_table, ax, size=12):
    table_fonts(a_table, size=size)
    make_table_grids(ax)
    ax.tick_params(**tabtickp_k)

table_format(a_table, ax1) 

# add table to ax
ax1.add_table(a_table )

# scatter plot
ax2 = fig.add_subplot(gs[0:5])
sns.scatterplot(data=allsurveys[~allsurveys.location.isin(bassin_locs)], x='date',  y='pcs_m', alpha=0.5, label="tous les autres prélèvements", color='darkslategray', edgecolor='darkslategray', linewidth=.1,s=70,ax=ax2)
sns.scatterplot(data=allsurveys[allsurveys.location.isin(bassin_locs)], x='date',  y='pcs_m', alpha=0.5, label=bassin_label, color='dodgerblue', edgecolor='dodgerblue', linewidth=.1,s=70,ax=ax2)
sns.scatterplot(data=lavey, x='date',  y='pcs_m', alpha=1, label=coi, color='red', edgecolor='red', linewidth=1,s=100, ax=ax2)
# format scatter
ax2.xaxis.set_major_formatter(mths_fmt)
ax2.xaxis.set_major_locator(allmonths)
ax2.set_xlabel("")
ha='right', 
# context
if is_french:
    ax1.set_title(F"{bassin_label}: valeurs clés", loc='right', ha='right', pad=10)
    ax2.set_title(F"Figure {figure_num}: {french_srs} {startyearmonth} - {endyearmonth}, lacs={lakecount}, rivières={rivercount}.", loc='left', pad=10)
    ax2.set_ylabel(F"{french_pcm}", **ylab_k)
else:
    ax1.set_title(F"{bassin_label}", loc='left', pad=10)
    ax2.set_ylabel("Pieces per meter", **ylab_k)
    ax2.set_title(F"Figure {figure_num}: survey totals {startyearmonth} - {endyearmonth}, lakes={lakecount}, rivers={rivercount}", loc='left', pad=10)

plt.tight_layout()

# tag the output:
figname = F"figure{figure_num}.jpg"
atype = "figure"
tag =  'all surveys: scatter plot, key values table'
add_output(figname, tag, fignum=figure_num, atype=atype)

plt.show()
plt.close()

In [None]:
fig, ax1 = plt.subplots(figsize=(4,8))
figure_num += 1

# the context matters for the row and column labels
# count the number of samples for this summary
n_samples = F"Nombre de recensements: {lavey.loc_date.nunique()}"

if is_french:
    rowLabels = summary_row_fr
    col_label = [french_pcm]
    title = F"{coi}, valeurs clés"
else:
    col_label = ['pieces per meter']
    title = F"{coi}, key values"

# define the table
a_table = mpl.table.table(
    cellText=lavey_table,
    rowLabels=rowLabels,
    rowColours=['antiquewhite' for i in rowLabels],
    colLabels=col_label,
    colColours=['antiquewhite' for col in np.arange(1)],
    
    ax=ax1,
    **tablecenter_k)

def table_format(a_table, ax, size=12):
    table_fonts(a_table, size=size)
    make_table_grids(ax)
    ax.tick_params(**tabtickp_k)

table_format(a_table, ax1)
ax1.set_title(F"Figure {figure_num}: {title}", pad=10, ha='right', loc='right')
ax1.set_xlabel(n_samples, **ylab_k)

# add table to ax
ax1.add_table(a_table)
plt.tight_layout()

# tag the output:
figname = F"figure{figure_num}.jpg"
atype = "figure"
tag =  F"key values {coi}"
add_output(figname, tag, fignum=figure_num, atype=atype)

plt.show()
plt.close()

In [None]:
if is_french:
    sommarkdown = """#### <span style="color:#008891"> Les objets les plus communs: toutes les enquêtes </span>\n    
    """
else:
    sommarkdown = """#### <span style="color:#008891">Most common objects: all surveys</span>\n    
     """

In [None]:
display(Markdown(sommarkdown))

In [None]:
# get the total quantity and pieces per meter for each code
# code quantity:
allcodevals = pd.DataFrame(newdfx.groupby('code').quantity.sum())
bassincodevals = pd.DataFrame(newdfx[newdfx.location.isin(bassin_locs)].groupby('code').quantity.sum())
lavey_codevals = pd.DataFrame(newdfx[(newdfx.location.isin(lavey_locs))].groupby('code').quantity.sum())

# code median pcs/m
bassin_pcsm = newdfx[(newdfx.location.isin(bassin_locs))].groupby('code').pcs_m.median()
lavey_pcsm = newdfx[(newdfx.location.isin(lavey_locs))].groupby('code').pcs_m.median()
all_pcsm = newdfx.groupby('code').pcs_m.median()

# make a df of all codes for each regional level add description, material, group and percent of regional total:
for a_df in [allcodevals, lavey_codevals, bassincodevals]:
    # add description and material from the codes df
    a_df['description'] = a_df.index.map(lambda x: dfCodes.loc[x].description)
    a_df['material'] = a_df.index.map(lambda x: dfCodes.loc[x].material)
    a_df['group'] = a_df.index.map(lambda x: a_group_map[x])
    a_total = a_df.quantity.sum()
    a_df['p_total'] = a_df.quantity/a_total
    a_df['p_total'] = a_df['p_total'].round(2)
    a_df.sort_values(by='quantity',ascending=False)
    a_df.rename(columns={'p_total':'% of total'}, inplace=True)

data_num += 1
figname = F"{project_directory}/bassin_code_totals.csv"
tag = F'{bassin_label}: code totals'
atype='data'
add_output(figname, tag, fignum=data_num, atype=atype, data=bassincodevals)

data_num += 1
figname = F"{project_directory}/bassin_code_pcsm_median.csv"
tag = F'{bassin_label}: code median pcs/m'
atype='data'
add_output(figname, tag, fignum=figure_num, atype=atype, data=bassin_pcsm)

# make a top ten table for each region:
lavey_tabledata = lavey_codevals[lavey_codevals.quantity > 0][['description', 'material', 'quantity', '% of total', 'group']].copy()
top_ten_lavey = lavey_tabledata.sort_values(by='quantity', ascending=False).iloc[:10].copy()
top_ten_lavey['pcs_m'] = top_ten_lavey.index.map(lambda x: lavey_pcsm.loc[x])
top_ten_lavey_table = top_ten_lavey[['description', 'material', 'quantity','% of total',  'pcs_m', 'group']].copy()
top_ten_lavey_table.reset_index(inplace=True)

bassin_tabledata = bassincodevals[bassincodevals.quantity > 0][['description', 'material', 'quantity', '% of total', 'group']].copy()
top_ten_bassin = bassin_tabledata.sort_values(by='quantity', ascending=False).iloc[:10].copy()
top_ten_bassin['pcs_m'] = top_ten_bassin.index.map(lambda x: bassin_pcsm.loc[x])
top_ten_bassin_table = top_ten_bassin[['description', 'material', 'quantity','% of total',  'pcs_m', 'group']].copy()
top_ten_bassin_table.reset_index(inplace=True)

all_tabledata = allcodevals[allcodevals.quantity > 0][['description', 'material', 'quantity', '% of total', 'group']].copy()
top_ten_all = all_tabledata.sort_values(by='quantity', ascending=False).iloc[:10].copy()
top_ten_all['pcs_m'] = top_ten_all.index.map(lambda x: all_pcsm.loc[x])
top_ten_all_table = top_ten_all[['description', 'material', 'quantity','% of total',  'pcs_m', 'group']].copy()
top_ten_all_table.reset_index(inplace=True)

In [None]:
# get the group total and percentage of total for each code group and level:
lavey_groups = useThis[(useThis.location.isin(lavey_locs))].groupby('groupname', as_index=False).quantity.sum()
lavey_group_pcsm = useThis[(useThis.location.isin(lavey_locs))].groupby(['groupname','loc_date', 'date', 'location'], as_index='groupname').pcs_m.sum()
lavey_group_pcsm = lavey_group_pcsm.groupby('groupname').median()
atotal = lavey_groups.quantity.sum()
lavey_groups['p_total'] = (lavey_groups.quantity/atotal)*100
lavey_groups['p_total'] = lavey_groups['p_total'].round(2)
lavey_groups.sort_values(by='p_total', ascending=True, inplace=True)
anorder = lavey_groups.groupname.values

bassin_groups = useThis[(useThis.location.isin(bassin_locs))].groupby('groupname', as_index=False).quantity.sum()
atotal = bassin_groups.quantity.sum()
bassin_groups['p_total'] = (bassin_groups.quantity/atotal)*100
bassin_groups['p_total'] = bassin_groups['p_total'].round(2)
bassin_groups.sort_values(by='p_total', ascending=True, inplace=True)

all_groups = useThis[(useThis.location.isin(lavey_locs))].groupby('groupname', as_index=False).quantity.sum()
atotal = all_groups.quantity.sum()
all_groups['p_total'] = (all_groups.quantity/atotal)*100
all_groups['p_total'] = all_groups['p_total'].round(2)
all_groups.sort_values(by='p_total', ascending=True, inplace=True)

In [None]:
boxes = useThis.groupby(['loc_date', 'date','location','water_name', 'groupname'], as_index=False).pcs_m.sum()
boxes_l = useThis[useThis.location.isin(lavey_locs)].groupby(['loc_date', 'date','location','water_name', 'groupname'], as_index=False).pcs_m.sum()
boxes_b = useThis[useThis.location.isin(bassin_locs)].groupby(['loc_date', 'date','location','water_name', 'groupname'], as_index=False).pcs_m.sum()

get_an_order_l = boxes_l.groupby('groupname').pcs_m.median()
an_order_of_boxes_l = get_an_order_l.sort_values(ascending=True)
a_box_order_l = an_order_of_boxes_l.index


In [None]:
# get the significant value for each code group
sig_vals = boxes.groupby('groupname').pcs_m.quantile(sig)
sig_vals_b = boxes_b.groupby('groupname').pcs_m.quantile(sig)
# get the significant value for each code
# sig_vals_code = useThis.groupby('code').pcs_m.median()
# sig_vals_bassin  = useThis[useThis.location.isin(bassin_locs)].groupby('code').pcs_m.median()
# bassin_pcsm = newdfx[(newdfx.location.isin(bassin_locs))].groupby('code').pcs_m.median()


# map the significant value to the results
boxes['limit'] = boxes.groupname.map(lambda x: sig_vals[x])

# create a boolean for significant
boxes['significant'] = boxes.pcs_m >= boxes.limit

# map to number of samples, and significant values
survey_totals = useThis.groupby(['loc_date','location','water_name', 'date','population'], as_index=False).agg({"pcs_m":"sum", "quantity":"sum"})
survey_totals.reset_index(inplace=True)

# number of samples per lake
tries = survey_totals.groupby(['water_name']).loc_date.nunique()

# number of locations per lake
num_locations = survey_totals.groupby('water_name').location.nunique()

# fails: number of locations where object group has been identified
num_with = useThis[useThis.quantity > 0].groupby(['water_name', 'groupname']).location.nunique()

# fails: number of samples with the object group
samps_with = useThis[useThis.quantity > 0].groupby(['water_name', 'groupname']).loc_date.nunique()

# median pcs_m
median_pcs = boxes.groupby(['water_name', 'groupname']).pcs_m.median()

# mean pcs_m
mean_pcs = boxes.groupby(['water_name', 'groupname']).pcs_m.mean()

# significant values
# determine wether or not the event was greater than the 90th percentile

# map limit to data
boxes['limit'] = boxes.groupname.map(lambda x: sig_vals[x])

# create boolean
boxes['significant'] = boxes.pcs_m >= boxes.limit

# make a df of tests and test failures
fails = boxes.groupby(['water_name', 'groupname'], as_index=False).significant.sum()

# get the number of samples for the lake
fails['samples'] = fails.water_name.map(lambda x: tries[x])

# display the ratio of significant values to samples
fails['frequency_s'] = fails.significant.astype('str') + '/' + fails.samples.astype('str')

# the number of locations
fails['locations'] = fails.water_name.map(lambda x: num_locations[x])

def locations_with(x,y,somdata):
    try:
        has = somdata[x][y]        
    except:
        has = 0
    return has
        
# the number of locations where the object group has been identified
fails['loc_with'] = fails.apply(lambda x:locations_with(x['water_name'], x['groupname'], num_with), axis=1)

# the number of samples where the object group has been identified
fails['samp_with'] = fails.apply(lambda x:locations_with(x['water_name'], x['groupname'], samps_with), axis=1)

# samples frequency of failure
fails['frequency'] = fails.samp_with.astype('str') + '/' + fails.samples.astype('str')

# locations frequency of failure
fails['frequency_l'] = fails.loc_with.astype('str') + '/' + fails.locations.astype('str')

# median/mean pcs_m:
fails['median pcs/m'] = fails.apply(lambda x:locations_with(x['water_name'], x['groupname'], median_pcs), axis=1)
fails['mean pcs/m'] = fails.apply(lambda x:locations_with(x['water_name'], x['groupname'], mean_pcs), axis=1)

# likelihood
fails['likelihood'] = ((fails.loc_with/fails.locations)*(fails.samp_with/(fails.samples+1)))

def make_minimum_likelihood(x,samples=100):
    # even if an item has never been found
    # it is assumed that there is always a chance
    # that it may be found. That chance is equal
    # 1/n samples * 1/n location (found at one sample and one location)
    if x == 0 :
            min_val = (1/samples)
    else:
        min_val = x
    return min_val


fails['likelihood'] = fails.apply(lambda x:make_minimum_likelihood(x['likelihood']), axis=1)

table_data = fails[['water_name','frequency_l', 'frequency',  'frequency_s', 'likelihood','median pcs/m', 'groupname']].copy()
table_data.rename(columns={'water_name':'name', 'frequency_l':"# locations", 'frequency':"# samples", 'frequency_s':'# significant'}, inplace=True)

national_median = boxes.groupby('groupname').pcs_m.median()

this_data = pd.DataFrame(boxes_l.groupby('groupname').pcs_m.median())
this_data['catchment area'] = this_data.index.map(lambda x:lavey_group_pcsm[x] )
if is_french:
    this_data.rename(columns={'catchment area':'bassin versant', 'pcs_m':'résultats locaux'}, inplace=True)
    this_data['groupname'] = this_data.index.map(french_names)
    this_data.set_index('groupname', inplace=True)

In [None]:

# sum the % of total of the most common objects
ptotal = top_ten_bassin_table['% of total'].sum()*100
psum = top_ten_bassin_table.quantity.sum()
toptenqty = bassincodevals.quantity.sum()

# context print tables
top_ten_table_p = top_ten_bassin_table.copy()
top_ten_table_p['% of total'] = top_ten_table_p['% of total']*100
top_ten_table_p['% of total'] = top_ten_table_p['% of total'].round(1).astype('str')
top_ten_table_p['% of total'] = top_ten_table_p['% of total'] + "%"
# context is_french 
if is_french:
    bassin_tabledatacopy = top_ten_table_p.copy()
    bassin_tabledatacopy.reset_index(inplace=True)
    bassin_tabledatacopy['description'] = bassin_tabledatacopy.code.map(lambda x: count_k(fr_defs_codes[x], limit))
    bassin_tabledatacopy['group'] = bassin_tabledatacopy.group.map(lambda x: count_k(french_names[x], limit))
    bassin_tabledatacopy.rename(columns=french_columns, inplace=True)
    thetabledata = bassin_tabledatacopy[['code','description', 'matériel', 'quantité', '% du total','groupe']].copy()
else:
    the_top_ten_table = top_ten_table_p[['code','description', 'material', 'quantity', '% of total','group']].copy()
    the_top_ten_table.reset_index=(True)
    thetabledata = the_top_ten_table.copy()
    

# make adjustments to table kwargs:
tablecenter_k = dict(loc="center", bbox=(0,0,1,1), colWidths=[9, 42, 8, 8, 9,13], cellLoc='center')

# plot the table:
fig, ax = plt.subplots(figsize=(15, len(thetabledata)*.75))
figure_num += 1
ax = make_table_grids(ax)
a_table = mpl.table.table(
    cellText=thetabledata.values,
    colLabels=thetabledata.columns,
    colColours=['antiquewhite' for col in list(thetabledata.columns)],    
    ax=ax,
    **tablecenter_k)

# set parameters
table_fonts(a_table, size=12)
ax.tick_params(**tabtickp_k)

# add the table
ax.add_table(a_table)

if is_french:
    ax.set_title(F"Figure {figure_num}: top {len(thetabledata)} codes {bassin_label}, {np.round(ptotal, 2)}% de {'{:,}'.format(toptenqty)} objets", **title_k14)
else:
    ax.set_title(F"Figure {figure_num}: top {len(thetabledata)} codes {bassin_label}, {np.round(ptotal, 2)}% of {'{:,}'.format(toptenqty)} objects", **title_k14)


plt.tight_layout()
figname = F"figure{figure_num}.jpg"
tag = F'{bassin_label}: 10 most common objects table'
add_output(figname, tag, fignum=figure_num)
plt.show()
plt.close()

In [None]:
# make a table of results that compares the pcs/m for the national top ten codes accross the regional levels

top_ten_bassin_table[coi] = top_ten_bassin_table.code.map(lambda x: lavey_pcsm[x])
top_ten_bassin_table['tous les bassins versants'] = top_ten_bassin_table.code.map(lambda x: all_pcsm[x])
top_ten_bassin_table.set_index('code', inplace=True)


top_ten_tablex = top_ten_bassin_table.rename(columns={'pcs_m':bassin_label})
top_ten_tablex['description'] = top_ten_bassin_table.index.map(lambda x: fr_defs_codes[x])
top_ten_tablex = top_ten_tablex[[coi, bassin_label,'tous les bassins versants', 'description']].copy()

top_ten_tablex.set_index('description', inplace=True)
top_ten_tablex.columns = [coi, bassin_label, 'tous les bassins versants']

fig, ax = plt.subplots(figsize=(17,8))
figure_num += 1
title=F"Figure {figure_num}: {bassin_label} les top-ten objets pièces par mètre"
sns.heatmap(data=top_ten_tablex, cmap='YlOrRd', linewidth=0.1, linecolor='darkred', annot=True, ax=ax, annot_kws={"fontsize":14})
ax.set_ylabel("")
ax.set_xlabel("")
ax.xaxis.set_ticks_position('top')
ax.tick_params(axis='x', which='both',labelsize=14)
yticklabs = ax.get_yticklabels()
ax.set_yticklabels(yticklabs, fontsize=12, rotation=0)
ax.set_title(title, **title_k14)

plt.tight_layout()
figname = F"figure{figure_num}.jpg"
tag = 'top-ten regional-local-national median'
add_output(figname, tag, fignum=figure_num)
plt.show()

plt.close()

In [None]:
# find the objects from the coi that exceed the median for catchment area:
exceeds_catchment_median = [x for x in lavey_pcsm.index if lavey_pcsm[x] > bassin_pcsm[x]]

# remove the objects that were already listed in the top ten
exceeds_catchment_median = [x for x in exceeds_catchment_median if x not in top_ten_bassin_table.index]

# apply that to the bassin table data:
codes_of_interest = bassin_tabledata.loc[bassin_tabledata.index.isin(exceeds_catchment_median)].copy()

# make a column for the coi results, map those
codes_of_interest[coi] = codes_of_interest.index.map(lambda x: lavey_pcsm.loc[x])

# make a column for the bassin results, map that
codes_of_interest[bassin_label] = codes_of_interest.index.map(lambda x: bassin_pcsm.loc[x])

# get the code description for each object
codes_of_interest['description'] = codes_of_interest.index.map(lambda x: fr_defs_codes[x])

# sort for charting
codes_of_interest = codes_of_interest[[coi, bassin_label, 'description']].copy()
codes_of_interest.sort_values(by=coi, ascending=False, inplace=True)
codes_of_interest.set_index('description', inplace=True)

In [None]:

codes_of_interest.columns = [coi, bassin_label]

fig, ax = plt.subplots(figsize=(13,len(codes_of_interest)*.75))
figure_num += 1
title=F"Figure {figure_num}: {coi} objets d'intérêt, supérieure à la médiane de {bassin_label}"
sns.heatmap(data=codes_of_interest, cmap='YlOrRd', linewidth=0.1, linecolor='darkred', annot=True, ax=ax, annot_kws={"fontsize":14})
ax.set_ylabel("")
ax.set_xlabel("")
ax.xaxis.set_ticks_position('top')
ax.tick_params(axis='x', which='both',labelsize=14)
yticklabs = ax.get_yticklabels()
ax.set_yticklabels(yticklabs, fontsize=14, rotation=0)
ax.set_title(title, ha='right', loc='right', fontsize=14, pad=14)

plt.tight_layout()
figname = F"figure{figure_num}.jpg"
tag = 'objects of interest'
add_output(figname, tag, fignum=figure_num)
plt.show()

plt.close()

In [None]:
sns.set_style('whitegrid')
boxes_b['level'] = boxes_b.location.map(lambda x: assign_a_level(x, lavey_locs, [coi, bassin_label]))

# assign a regional level to each set of data
bassin_groups['level'] = bassin_label
lavey_groups['level'] = coi

# merge the data sets
these_groups = lavey_groups.append(bassin_groups)

a_form = mtick.FormatStrFormatter('%.0f%%')

fig, ax = plt.subplots(1,2, figsize=(14, 10))
an_order = [coi, bassin_label]
figure_num += 1
if is_french:
    these_groups['groupname'] = these_groups['groupname'].map(lambda x: french_names[x])
    boxes_b['groupname'] = boxes_b['groupname'].map(lambda x: french_names[x])
#     this_data.rename(columns={'catchment area':'bassin versant', 'pcs_m':'résultats locaux'}, inplace=True)
#     this_data['groupname'] = this_data.index.map(french_names)
#     this_data.set_index('groupname', inplace=True)

    sns.barplot(data=these_groups, x='groupname', hue='level', hue_order=an_order, palette={coi:'salmon', bassin_label:'dodgerblue'}, y='p_total', ax=ax[0])
    ax[0].set_ylabel(F"{french_pct} niveau régional", **ylab_k)
    ax[0].set_title(F"groupe de codes % du total, pour chaque niveau régional", **title_k)
    ax[0].set_xlabel("")
    ax[0].tick_params(axis='x', which='both',labelsize=12, rotation=90)
    ax[0].legend(loc='upper left')    
    
    sns.boxplot(data=boxes_b[['groupname','pcs_m', 'level']], x='groupname', y='pcs_m', hue='level',  hue_order=an_order, palette={coi:'salmon', bassin_label:'dodgerblue'}, ax=ax[1],  showfliers=False)
    ax[1].set_ylabel(F"{french_pcm}", **ylab_k)
    ax[1].set_title(F"distribution {french_bg}, {french_nooutliers}", **title_k)
    ax[1].set_xlabel("")
    ax[1].tick_params(axis='x', which='both',labelsize=12, rotation=90)

else:
    sns.barplot(data=these_groups, x='groupname', hue='level', palette=['red', 'blue'], y='p_total', dodge=False, ax=ax[0])
    ax[0].set_ylabel("Percent total of all objects at regaional level", **ylab_k)
    ax[0].set_title(F"code group % of total, for each regional level", **title_k)
    ax[0].yaxis.set_major_formatter(a_form)
    ax[0].set_xlabel("")
    ax[0].tick_params(axis='x', which='both',labelsize=12, rotation=90)
    ax[0].legend(loc='upper left')    
    
    sns.boxplot(data=boxes_l[['groupname','pcs_m']], x='groupname', y='pcs_m', hue='groupname', palette=grouppalette, order=a_box_order_l, dodge=False, ax=ax[1], showfliers=False)
    ax[1].set_ylabel("Pieces per meter", labelpad=10, **ylab_k)
    ax[1].set_title(F"disribution of groups, outliers not shown", **title_k)
    ax[1].set_xlabel("")
    ax[1].tick_params(axis='x', which='both',labelsize=12, rotation=90)
    
    
    
fignum=figure_num

if is_french:
    suptitle =F"Figure {figure_num} résultats regroupés par activité ou utilisation : % du total et répartition en pcs/m"
else:
    suptitle = F"Figure {figure_num} results grouped by activity or use: % of total and distribution in pcs/m"

plt.suptitle(suptitle, x=0, y=.99, ha='left', fontsize=16, linespacing=2)

    
plt.tight_layout()
plt.subplots_adjust(top=.88)
figname = F"figure{figure_num}.jpg"
atype='figure'
tag = 'code groups: percentage of total, distribution, regional results'
add_output(figname, tag, fignum=fignum, atype=atype)
plt.show()
plt.close()

In [None]:
a = boxes_b.groupby(['level', 'groupname'], as_index=False).pcs_m.median()
a=a.pivot(columns='level', index='groupname')
a.columns = a.columns.get_level_values(1)

In [None]:
fig, ax = plt.subplots(figsize=(9,10))
figure_num += 1
if is_french:
    title = F"Figure {figure_num} groupes de codes: pièces médianes par mètre"
    
else:
    title = "Local median compared to catchment area"
    
sns.heatmap(data=a, cmap='YlOrRd', linewidth=0.1, linecolor='darkred', annot=True, annot_kws={"fontsize":14}, ax=ax)
ax.set_ylabel("")
ax.set_xlabel("")
ax.xaxis.set_ticks_position('top')
ax.tick_params(axis='x', which='both',labelsize=12)
yticklabs = ax.get_yticklabels()
ax.set_yticklabels(yticklabs, fontsize=14, rotation=0)
ax.set_title(title, **title_k14)
plt.tight_layout()
figname = F"figure{figure_num}.jpg"
atype='figure'
tag = F"code groups: {coi} and {bassin_label} median pcs/m"
add_output(figname, tag, fignum=figure_num, atype=atype)
plt.show()
plt.close()

In [None]:
this_tabledata = lavey_tabledata.reset_index()

# context print tables
this_tabledata_p = lavey_tabledata.copy()
a_total = this_tabledata.quantity.sum()
this_tabledata_p['% of total'] = this_tabledata_p['% of total']*100
this_tabledata_p['% of total'] = this_tabledata_p['% of total'].round(1).astype('str')
this_tabledata_p['% of total'] = this_tabledata_p['% of total'] + "%"
# context is_french 
if is_french:
    tabledatacopy = this_tabledata_p.copy()
    tabledatacopy.reset_index(inplace=True)
    tabledatacopy.sort_values(by='group', inplace=True)
    tabledatacopy['description'] = tabledatacopy.code.map(lambda x: count_k(fr_defs_codes[x], limit))
    tabledatacopy['group'] = tabledatacopy.group.map(lambda x: count_k(french_names[x], limit))
    tabledatacopy.rename(columns=french_columns, inplace=True)
    thetabledata = tabledatacopy[['code','description', 'matériel', 'quantité', '% du total','groupe']].copy()
    this_tabledata = thetabledata 
tablecenter_k = dict(loc="center", bbox=(0,0,1,1), colWidths=[9, 44, 8, 8, 9,11], cellLoc='center')

# plot the table:
print(len(this_tabledata.columns))
fig, ax = plt.subplots(figsize=(18, len(this_tabledata)*.75))
figure_num += 1
ax = make_table_grids(ax)
a_table = mpl.table.table(
    cellText=this_tabledata.values,
    colLabels=this_tabledata.columns,
    colColours=['antiquewhite' for col in list(this_tabledata.columns)],    
    ax=ax,
    **tablecenter_k)

# set parameters
table_fonts(a_table, size=12)
ax.tick_params(**tabtickp_k)

# add the table
ax.add_table(a_table)


if is_french:
    ax.set_title(F"Figure {figure_num}: {coi} tous {a_total} objets trouvés", **title_k14)
else:
    ax.set_title(F"Figure {figure_num}: {coi} all objects found", **title_k14)


plt.tight_layout()

figname = F"figure{figure_num}.jpg"
tag = F"{coi}: all objects table"
add_output(figname, tag, fignum=figure_num)

plt.show()
plt.close()

In [None]:
# map number of sample per locations
tries_l = boxes.groupby('location').loc_date.nunique()

# map number of times at least one item was found
fails_l = boxes[boxes.pcs_m > 0].groupby(['location', 'groupname']).loc_date.nunique()

In [None]:
# make a df to track codegroup fail rates for each location
fails_beach = boxes.groupby(['location', 'water_name','groupname'], as_index=False).pcs_m.median()

# add a tries and fails column for each location and group
fails_beach['tries'] = fails_beach.location.map(lambda x: tries_l[x])
fails_beach['fails'] = fails_beach.apply(lambda x:locations_with(x['location'], x['groupname'], fails_l), axis=1)

# the ratio samples to failures
fails_beach['likelihood'] = (fails_beach.fails/fails_beach.tries)

In [None]:
# get the fail rate per beach (how many times the quantity was greater than zero)
fails_beach_bassin  = fails_beach[fails_beach.water_name.isin(bassin)].groupby('groupname').agg({ 'tries':'sum', 'fails':'sum'})
fails_beach_bassin['group_rates'] = fails_beach_bassin.fails/ fails_beach_bassin.tries
fails_beach_lavey = fails_beach[fails_beach.location.isin(lavey_locs)]

In [None]:
a = boxes.groupby('location').significant.sum()
these_beaches['significant'] = these_beaches.index.map(lambda x: a.loc[x])
these_beaches['median pcs/m'] = these_beaches.index.map(lambda x: allsurveys_pcs_median[x])
these_beaches.to_csv(F"{project_directory}/these_beaches.csv")

In [None]:
this_data = fails_beach[fails_beach.water_name.isin(bassin)].copy()
this_data['pop'] = this_data.location.map(lambda x: these_beaches.loc[x]['population'])
this_data = this_data.sort_values(by='pop')
this_order = this_data.location.unique()

In [None]:
heat_map_palette = 'YlOrRd'
linecolor = 'white'


fig,axx = plt.subplots(figsize=(19,12))
figure_num += 1

a = this_data[['location', 'groupname',  'pcs_m']].copy()

if is_french:
    a['groupname'] = a.groupname.map(lambda x: french_names[x])
    title = F"Figure {figure_num}: {bassin_label} groupes de codes, pièces par mètre médiane pour chaque lieu."
else:
    title = "code groups, median pieces per meter for each location."
a=a.pivot(columns='location', index='groupname')
other_locs = [x for x in a.columns.get_level_values(1) if x not in lavey_locs]
a = a.reindex(this_order, axis=1, level=1)
sns.heatmap(a,
            cmap=heat_map_palette, ax=axx, annot=True,annot_kws={'fontsize':12}, linewidths=.5, linecolor=linecolor)
labels = [a_text.get_text() for a_text in axx.get_xticklabels()]
newlabels = []
for i, a_text in enumerate(labels):
    oldlabel = a_text
    newlabel = oldlabel[6:]
    newlabels.append(newlabel)

axx.set_xticklabels(newlabels, fontsize=12)
axx.set_ylabel(" ")
axx.set_xlabel(" ")
axx.tick_params(axis='both', labelsize=12, pad=15)
axx.set_title(F"{title}", **title_k14)

    
plt.tight_layout()
atype="figure"
figname = F"figure{figure_num}.jpg"
tag = F"{bassin_label} all locations median pcs/m"
add_output(figname,tag, fignum=figure_num, atype=atype)
plt.show()
plt.close()

### Figures and data produced by this notebook:



In [None]:
files_df = pd.DataFrame(files_generated)
files_df.rename(columns={'tag':'description'}, inplace=True)

files_df = files_df[['type','number', 'description']]
files_df = files_df.sort_values(by='type')
files_df.sort_values(by=['type','number'], inplace=True)
tablecenter_k = dict(loc="top left", bbox=(0,0,1,1), cellLoc='center', colWidths=[20,10,70], fontsize=12)
tablecenter_kx = dict(loc="top left", bbox=(0,0,1,1), cellLoc='center', fontsize=12)
        
with sns.axes_style('white', {'xtick.color':'white', 'ytick.color':'white'}):
    fig, axs = plt.subplots(figsize=(12,(len(files_df)*.75)), frameon=False)
    figure_num += 1
    sns.despine(fig=fig, top=True, left=True, right=True, bottom=True)

    make_table_grids(ax1)    

    a_table = axs.add_table(mpl.table.table(
        cellText=files_df.values,
        colLabels=files_df.columns,
        colColours=['antiquewhite' for col in files_df.columns],
        ax=axs,
        **tablecenter_k))


    table_fonts(a_table)

    axs.tick_params(**tabtickp_k)
    figname = F"figure{figure_num}.jpg"
    tag = F"{coi}: output table"
    add_output(figname, tag, fignum=figure_num)
    plt.show()
    plt.close()

#### Hopefully that just worked for you

if not contact analyst@hammerdirt.ch