In [1]:
# -*- coding: utf-8 -*-
# This is a report using the data from IQAASL.
# IQAASL was a project funded by the Swiss Confederation
# It produces a summary of litter survey results for a defined region.
# These charts serve as the models for the development of plagespropres.ch
# The data is gathered by volunteers.
# Please remember all copyrights apply, please give credit when applicable
# The repo is maintained by the community effective January 01, 2022
# There is ample opportunity to contribute, learn and teach
# contact dev@hammerdirt.ch

# Dies ist ein Bericht, der die Daten von IQAASL verwendet.
# IQAASL war ein von der Schweizerischen Eidgenossenschaft finanziertes Projekt.
# Es erstellt eine Zusammenfassung der Ergebnisse der Littering-Umfrage für eine bestimmte Region.
# Diese Grafiken dienten als Vorlage für die Entwicklung von plagespropres.ch.
# Die Daten werden von Freiwilligen gesammelt.
# Bitte denken Sie daran, dass alle Copyrights gelten, bitte geben Sie den Namen an, wenn zutreffend.
# Das Repo wird ab dem 01. Januar 2022 von der Community gepflegt.
# Es gibt reichlich Gelegenheit, etwas beizutragen, zu lernen und zu lehren.
# Kontakt dev@hammerdirt.ch

# Il s'agit d'un rapport utilisant les données de IQAASL.
# IQAASL était un projet financé par la Confédération suisse.
# Il produit un résumé des résultats de l'enquête sur les déchets sauvages pour une région définie.
# Ces tableaux ont servi de modèles pour le développement de plagespropres.ch
# Les données sont recueillies par des bénévoles.
# N'oubliez pas que tous les droits d'auteur s'appliquent, veuillez indiquer le crédit lorsque cela est possible.
# Le dépôt est maintenu par la communauté à partir du 1er janvier 2022.
# Il y a de nombreuses possibilités de contribuer, d'apprendre et d'enseigner.
# contact dev@hammerdirt.ch

# sys, file and nav packages:
import datetime as dt
from datetime import date, datetime, time
from babel.dates import format_date, format_datetime, format_time, get_month_names
import locale

# math packages:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF
from math import pi

# charting:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import ticker
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
import seaborn as sns

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
from bokeh.sampledata.unemployment1948 import data

# the module that has all the methods for handling the data
import resources.featuredata as featuredata

# home brew utitilties
import resources.chart_kwargs as ck
import resources.sr_ut as sut

# images and display
from IPython.display import Markdown as md

# chart style
sns.set_style("whitegrid")

# colors for gradients
cmap2 = ck.cmap2
colors_palette = ck.colors_palette

# border and row shading fro tables
a_color = "saddlebrown"
table_row = "saddlebrown"

## !! Begin Note book variables !!
# There are two language variants: german and english
# change both: date_lang and language
date_lang =  'de_DE.utf8'
locale.setlocale(locale.LC_ALL, date_lang)

# the date format of the survey data is defined in the module
date_format = featuredata.date_format

# the language setting use lower case: en or de
# changing the language may require changing the unit label
language = "de"
unit_label = "p/100 m"

# the standard date format is "%Y-%m-%d" if your date column is
# not in this format it will not work.
# these dates cover the duration of the IQAASL project
start_date = "2020-03-01"
end_date ="2021-05-31"
start_end = [start_date, end_date]

# the fail rate used to calculate the most common codes is
# 50% it can be changed:
fail_rate = 50

# Changing these variables produces different reports
# Call the map image for the area of interest
bassin_map = "resources/maps/survey_areas/aare_scaled.jpeg"

# the label for the aggregation of all data in the region
top = "Alle Erhebungsgebiete"

# define the feature level and components
# the feature of interest is the Aare (aare) at the river basin (river_bassin) level.
# the label for charting is called 'name'
this_feature = {'slug':'all', 'name':"Alle Erhebungsgebiete", 'level':'all'}

# these are the smallest aggregated components
# choices are water_name_slug=lake or river, city or location at the scale of a river bassin 
# water body or lake maybe the most appropriate
this_level = 'river_bassin'

# identify the lakes of interest for the survey area
lakes_of_interest = ["neuenburgersee", "thunersee", "bielersee", "brienzersee"]

# !! End note book variables !!

## data
# Survey location details (GPS, city, land use)
dfBeaches = pd.read_csv("resources/beaches_with_land_use_rates.csv")
# set the index of the beach data to location slug
dfBeaches.set_index("slug", inplace=True)

# Survey dimensions and weights
dfDims = pd.read_csv("resources/corrected_dims.csv")

# code definitions
dxCodes = pd.read_csv("resources/codes_with_group_names")
dxCodes.set_index("code", inplace=True)

# columns that need to be renamed. Setting the language will automatically
# change column names, code descriptions and chart annotations
columns={"% to agg":"% agg", "% to recreation": "% recreation", "% to woods":"% woods", "% to buildings":"% buildings", "p/100m":"p/100 m"}

# key word arguments to construct feature data
# !Note the water type allows the selection of river or lakes
# if None then the data is aggregated together. This selection
# is only valid for survey-area reports or other aggregated data
# that may have survey results from both lakes and rivers.
fd_kwargs ={
    "filename": "resources/checked_sdata_eos_2020_21.csv",
    "feature_name": this_feature['slug'], 
    "feature_level": this_feature['level'], 
    "these_features": this_feature['slug'], 
    "component": this_level, 
    "columns": columns, 
    "language": 'de', 
    "unit_label": unit_label, 
    "fail_rate": fail_rate,
    "code_data":dxCodes,
    "date_range": start_end,
    "water_type": None,    
}

fdx = featuredata.Components(**fd_kwargs)

# call the reports and languages
fdx.adjustForLanguage()
fdx.makeFeatureData()
fdx.locationSampleTotals()
fdx.makeDailyTotalSummary()
fdx.materialSummary()
fdx.mostCommon()
# !this is the feature data!
fd = fdx.feature_data

# the period data is all the data that was collected
# during the same period from all the other locations
# not included in the feature data for a survey area
# or river bassin the parent and feature level are the
# the same.
period_kwargs = {
    "period_data": fdx.period_data,
    "these_features": this_feature['slug'],
    "feature_level":this_feature['level'],
    "feature_parent":this_feature['slug'],
    "parent_level": this_feature['level'],
    "period_name": top,
    "unit_label": unit_label,
    "most_common": fdx.most_common.index
}
period_data = featuredata.PeriodResults(**period_kwargs)

# the rivers are considered separately
# select only the results from rivers
fd_rivers = fd_kwargs.update({"water_type":"r"})
fdr = featuredata.Components(**fd_kwargs)
fdr.makeFeatureData()

# collects the summarized values for the feature data
# use this to generate the summary data for the survey area
# and the section for the rivers
admin_details = featuredata.AdministrativeSummary(data=fd, dims_data=dfDims, label=this_feature["name"], feature_component=this_level, date_range=start_end, **{"dfBeaches":dfBeaches})
admin_r_details = featuredata.AdministrativeSummary(data=fdr.feature_data, dims_data=dfDims, label=this_feature["name"], feature_component=this_level, date_range=start_end, **{"dfBeaches":dfBeaches})
admin_summary = admin_details.summaryObject()
admin_r_summary = admin_r_details.summaryObject()

Codes init called
this is german
making material and description map
making feature data
There are columns
renaming columns
sample totals
making daily total summary
making the code summary first
making material summary
getting the fail rate
making most common codes table
Codes init called
making feature data
There are columns
renaming columns


In [2]:
rivers = admin_details.riversOfInterest()
lakes = admin_details.lakesOfInterest()
        
# string objects for display
obj_string = featuredata.thousandsSeparator(admin_summary["quantity"], language)
surv_string = "{:,}".format(admin_summary["loc_date"])
pop_string = featuredata.thousandsSeparator(int(admin_summary["population"]), language)

# make strings
date_quantity_context = F"Im Zeitraum von {featuredata.dateToYearAndMonth(datetime.strptime(start_date, date_format), lang=date_lang)}  bis {featuredata.dateToYearAndMonth(datetime.strptime(end_date, date_format), lang= date_lang)} wurden im Rahmen von {surv_string} Datenerhebungen insgesamt {obj_string } Objekte entfernt und identifiziert."
geo_context = F"Die Ergebnisse des {this_feature['name']} umfassen {admin_summary['location']} Orte, {admin_summary['city']} Gemeinden und eine Gesamtbevölkerung von etwa {pop_string} Einwohnenden."

# lists of landmarks of interest
munis_joined = ", ".join(sorted(admin_details.populationKeys()["city"]))
lakes_joined = ", ".join(sorted(lakes))
rivers_joined = ", ".join(sorted(rivers))

# put that all together:
lake_string = F"""
{date_quantity_context} {geo_context }

*Seen:*\n\n>{lakes_joined}

*Fliessgewässer:*\n\n>{rivers_joined}

*Gemeinden:*\n\n>{munis_joined}
"""
md(lake_string)


Im Zeitraum von März 2020  bis Mai 2021 wurden im Rahmen von 386 Datenerhebungen insgesamt 54 744 Objekte entfernt und identifiziert. Die Ergebnisse des Alle Erhebungsgebiete umfassen 143 Orte, 77 Gemeinden und eine Gesamtbevölkerung von etwa 1 735 991 Einwohnenden.

*Seen:*

>Bielersee, Brienzersee, Lac Léman, Lago Maggiore, Lago di Lugano, Neuenburgersee, Quatre Cantons, Thunersee, Walensee, Zugersee, Zurichsee

*Fliessgewässer:*

>Aare, Aare|Nidau-Büren-Kanal, Cassarate, Dorfbach, Emme, Escherkanal, Jona, La Thièle, Limmat, Linthkanal, Maggia, Reuss, Rhône, Schüss, Seez, Sihl, Ticino

*Gemeinden:*

>Aarau, Allaman, Ascona, Beatenberg, Bellinzona, Bern, Biel/Bienne, Boudry, Bourg-en-Lavaux, Brienz (BE), Brissago, Brugg, Brügg, Burgdorf, Bönigen, Cheyres-Châbles, Cudrefin, Dietikon, Erlach, Estavayer, Freienbach, Gals, Gambarogno, Gebenstorf, Genève, Gland, Glarus Nord, Grandson, Hauterive (NE), Hünenberg, Kallnach, Köniz, Küsnacht (ZH), La Tour-de-Peilz, Lausanne, Lavey-Morcles, Le Landeron, Leuk, Ligerz, Locarno, Lugano, Luterbach, Lüscherz, Merenschwand, Minusio, Montreux, Neuchâtel, Nidau, Port, Préverenges, Quarten, Rapperswil-Jona, Richterswil, Riddes, Rubigen, Saint-Gingolph, Saint-Sulpice (VD), Salgesch, Schmerikon, Sion, Solothurn, Spiez, Stäfa, Thun, Tolochenaz, Unterengstringen, Unterseen, Versoix, Vevey, Vinelz, Walenstadt, Walperswil, Weesen, Weggis, Yverdon-les-Bains, Zug, Zürich


In [3]:
land_use_kwargs = {
    "data": period_data.period_data,
    "index_column":"loc_date",
    "these_features": this_feature['slug'],
    "feature_level":this_feature['level'],
   
}
project_profile = featuredata.LandUseProfile(**land_use_kwargs).byIndexColumn()

land_use_kwargs.update({"data":fdx.feature_data})
feature_profile = featuredata.LandUseProfile(**land_use_kwargs).featureOfInterest()

fig, axs = plt.subplots(2, 3, figsize=(9,8), sharey="row")
from matplotlib.ticker import MultipleLocator
for i, n in enumerate(featuredata.default_land_use_columns):
    r = i%2
    c = i%3
    ax=axs[r,c]
    
    # the value of landuse feature n for the survey area:
    data=feature_profile[n].values
    xs, ys = featuredata.empiricalCDF(data)   
    sns.lineplot(x=xs, y=ys, ax=ax, label=admin_details.label)
    
    # the value of the land use feature n for all the data
    testx, testy = featuredata.empiricalCDF(project_profile[n].values)
    sns.lineplot(x=testx, y=testy, ax=ax, label=top, color="magenta")
    
    # get the median from the data
    the_median = np.median(data)
    
    # plot the median and drop horzontal and vertical lines
    ax.scatter([the_median], 0.5, color="red",s=50, linewidth=2, zorder=100, label="Median")
    ax.vlines(x=the_median, ymin=0, ymax=0.5, color="red", linewidth=2)
    ax.hlines(xmax=the_median, xmin=0, y=0.5, color="red", linewidth=2)
    
    if i <= 3:
        if c == 0:            
            ax.set_ylabel("Ratio of samples", **ck.xlab_k)
            ax.yaxis.set_major_locator(MultipleLocator(.1))
        ax.xaxis.set_major_formatter(ticker.PercentFormatter(1.0, 0, "%"))        
    else:
        pass      
    
    handles, labels = ax.get_legend_handles_labels()
    ax.get_legend().remove()    
    ax.set_xlabel(list(featuredata.luse_ge.values())[i], **ck.xlab_k)
    
plt.tight_layout()
plt.subplots_adjust(top=.9, hspace=.3)
plt.suptitle("Landnutzung im Umkries von 1 500 m um den Erhebungsort", ha="center", y=1, fontsize=16)
fig.legend(handles, labels, bbox_to_anchor=(.5,.94), loc="center", ncol=3) 

# glue("aare_survey_area_landuse", fig, display=False)

plt.show()

KeyError: "['all'] not in index"

In [None]:
dims_data_r = admin_r_details.dimensionalSummary()
dims_data = admin_details.dimensionalSummary()

combined_dims = pd.concat([dims_data, dims_data_r])
# combined_dims.reset_index(drop=False, inplace=True)
# combined_dims.groupby('water_name_slug', as_index=False)
dims_data

In [None]:
fdx.material_summary

In [None]:
fdx.sample_summary

In [None]:
fdx.most_common

In [None]:
output_notebook()

In [None]:
dx = period_data.parentSampleTotals(parent=False)
d = fdx.sample_totals


p = figure(x_axis_type="datetime")

p.circle(dx["date"], dx["p/100 m"], fill_color="black", size=10, line_width=0)
p.circle(d["date"], d["p/100 m"], fill_color="magenta", size=10, line_width=0)
show(p)

In [None]:
components = fdx.componentMostCommonPcsM()

# pivot that
mc_comp = components[["item", unit_label, "river_bassin"]].pivot(columns="river_bassin", index="item")

# quash the hierarchal column index
mc_comp.columns = mc_comp.columns.get_level_values(1)

# the aggregated totals for the survey area
mc_feature = fdx.most_common[unit_label]
mc_feature = featuredata.changeSeriesIndexLabels(mc_feature, {x:fdx.dMap.loc[x] for x in mc_feature.index})

# the aggregated totals of all the data
mc_period = period_data.parentMostCommon(parent=False)
mc_period = featuredata.changeSeriesIndexLabels(mc_period, {x:fdx.dMap.loc[x] for x in mc_period.index})

mc_comp[this_feature["name"]]= mc_feature
mc_comp[top] = mc_period
mc_comp

In [None]:
components = fdx.componentCodeGroupResults()

# pivot that
pt_comp = components[["river_bassin", "groupname", 'pt' ]].pivot(columns="river_bassin", index="groupname")

# quash the hierarchal column index
pt_comp.columns = pt_comp.columns.get_level_values(1)

# # the aggregated totals for the parent level
# pt_parent = period_data.parentGroupTotals(parent=True, percent=True)
# pt_comp[this_feature["name"]] = pt_parent

# the aggregated totals for the period
pt_period = period_data.parentGroupTotals(parent=False, percent=True)
pt_comp[top] = pt_period
pt_comp

In [None]:
fd_rivers = fd_kwargs.update({"water_type":"r"})

fdr = featuredata.Components(**fd_kwargs)
fdr.adjustForLanguage()
fdr.makeFeatureData()
fdr.locationSampleTotals()
fdr.makeDailyTotalSummary()
fdr.materialSummary()
fdr.mostCommon()

In [None]:
dr = fdr.sample_totals
p = figure(x_axis_type="datetime")

p.circle(dr["date"], dr["p/100 m"], fill_color="magenta", size=10, line_width=0)
show(p)

In [None]:
fdr.sample_summary

In [None]:
fdr.most_common

In [None]:
before_agg = pd.read_csv("resources/checked_before_agg_sdata_eos_2020_21.csv")
some_foams = ["G81", "G82", "G83", "G74"]
before_agg.rename(columns={"p/100m":unit_label}, inplace=True)
agg_pcs_median = {unit_label:"median", "quantity":"sum"}
agg_pcs_quantity = {unit_label:"sum", "quantity":"sum"}

# the codes for the fragmented plastics
some_frag_plas = list(before_agg[before_agg.groupname == "plastic pieces"].code.unique())
mask = ((before_agg.code.isin([*some_frag_plas, *some_foams]))&(before_agg.location.isin(admin_summary["locations_of_interest"])))

fd_frags_foams = before_agg[mask].groupby(["loc_date","code"], as_index=False).agg(agg_pcs_quantity)
fd_frags_foams = fd_frags_foams.groupby("code").agg(agg_pcs_median)
fd_frags_foams["item"] = fd_frags_foams.index.map(lambda x: fdx.dMap.loc[x])
fd_frags_foams["% of total"] = (fd_frags_foams.quantity/fd.quantity.sum()*100).round(2)
fd_frags_foams

In [None]:
from bokeh.plotting import figure, show
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap

periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]

df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str)
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"]
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]

cmap = {
    "alkali metal"         : "#a6cee3",
    "alkaline earth metal" : "#1f78b4",
    "metal"                : "#d93b43",
    "halogen"              : "#999d9a",
    "metalloid"            : "#e08d49",
    "noble gas"            : "#eaeaea",
    "nonmetal"             : "#f1d4Af",
    "transition metal"     : "#599d7A",
}

TOOLTIPS = [
    ("Name", "@name"),
    ("Atomic number", "@{atomic number}"),
    ("Atomic mass", "@{atomic mass}"),
    ("Type", "@metal"),
    ("CPK color", "$color[hex, swatch]:CPK"),
    ("Electronic configuration", "@{electronic configuration}"),
]

p = figure(title="Periodic Table (omitting LA and AC Series)", width=1000, height=450,
           x_range=groups, y_range=list(reversed(periods)),
           tools="hover", toolbar_location=None, tooltips=TOOLTIPS)

r = p.rect("group", "period", 0.95, 0.95, source=df, fill_alpha=0.6, legend_field="metal",
           color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))

text_props = dict(source=df, text_align="left", text_baseline="middle")

x = dodge("group", -0.4, range=p.x_range)

p.text(x=x, y="period", text="symbol", text_font_style="bold", **text_props)

p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number",
       text_font_size="11px", **text_props)

p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name",
       text_font_size="7px", **text_props)

p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass",
       text_font_size="7px", **text_props)

p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"
p.hover.renderers = [r] # only hover element boxes

show(p)

In [None]:
def parentSampleTotals(self, parent: bool = True):

    print("making sample totals from period data")
    mask = self.makeMask(parent=parent)

    if isinstance(mask, tuple):
        print("applying mask")
        data = self.period_data[mask].copy()
        data = data.groupby(["loc_date", "date"], as_index=False)[self.unit_label].sum()

    else:
        data = self.period_data.groupby(["loc_date", "date"], as_index=False)[self.unit_label].sum()

    return data
    
    
def parentMostCommon(self, parent: bool = True, percent: bool = False):

    print("getting the most common results from period data")
    mask = self.makeMask(parent=parent)

    if isinstance(mask, pd.Series):
        print("applying mask")
        data = self.period_data[mask].copy()
        data = data[data.code.isin(self.most_common)]
        use_name = self.feature_parent
    else:

        data = self.period_data.copy()
        data = data[data.code.isin(self.most_common)]
        use_name = self.period_name

    if percent:
        print("getting most common % of total from period data")

        data = data.groupby('code', as_index=False).quantity.sum()
        data.set_index('code', inplace=True)
        data[use_name] = (data.quantity / data.quantity.sum()).round(2)

        return data[use_name]
    
    
def parentGroupTotals(self, parent: bool = True, percent: bool = False):

    print("getting the codegroup results from period data")
    mask = self.makeMask(parent=parent)

    if isinstance(mask, pd.Series):
        print("applying mask")
        data = self.period_data[mask].copy()
        use_name = self.feature_parent
    else:
        data = self.period_data.copy()
        use_name = self.period_name

    if percent:
        print("getting the codegroup % of total from period data")

        data = data.groupby('groupname', as_index=False).quantity.sum()
        data.set_index('groupname', inplace=True)
        data[use_name] = (data.quantity / data.quantity.sum()).round(2) * 100

        return data[use_name]

    else:
        print("getting the codegroup pcs/m from period data")

        data = data.groupby(["loc_date", 'groupname'], as_index=False)[self.unit_label].sum()
        data = data.groupby('groupname', as_index=False)[self.unit_label].median()
        data.set_index('groupname', inplace=True)
        data[use_name] = data[self.unit_label]

        return data[use_name]

In [None]:
def codeGroupTotals(data: pd.DataFrame = None, unit_label: str = None, column_operation: dict = {}, columns: list = None):
    
    codegroup_totals = data.groupby(columns, as_index=False).agg({unit_label:'sum', 'quantity':'sum'})
    codegroup_totals = codegroup_totals.groupby('groupname', as_index=False).agg(column_operation)

    # percent of totalk[k
    codegroup_totals["% pf total"] = ((codegroup_totals.quantity / codegroup_totals.quantity.sum()) * 100).round(2)
    
    # the code data comes from the feature data (survey results)
    # Add the description of the code and the material
    codegroup_totals.set_index("groupname", inplace=True)
    
    return codegroup_totals

def columnsAndOperations(column_operations: list = None, columns: list = None, unit_label: str = None):
    
    if column_operations is None:
        column_operation = {unit_label: "median", "quantity": "sum"}
    else:
        column_operation = {x[0]: x[1] for x in column_operations}    
    if columns is None:
        columns = ["loc_date", "groupname"]
        
    return columns, column_operation
    


def parentGroupTotals(self, data: pd.DataFrame: None, parent: bool = True, percent: bool = False, columns: list=None, column_operations: list = None ):

    print("getting the codegroup results from period data")
    
    cols_ops_kwargs = {
        "column_operations": column_operations,
        "columns": columns,
        "unit_label": self.unit_label
    }
    
    columns, column_operation = columnsAndOperations(**cols_ops_kwargs)

    if isinstance(mask, pd.Series):
        print("applying mask")
        data = self.period_data[mask].copy()
        
    else:
        data = self.period_data.copy()
        
    
    code_group_kwargs = {
        "data": data,
        "unit_label": unit_label,
        "column_operation": column_operation,
        "columns": columns
    }
    
    code_group_totals = codeGroupTotals(code_group_kwargs)
    
    return code_group_totals  



def codeGroupSummary(self,  columns: []=None, column_operations=None):
    if isinstance(self.codegroup_summary, pd.DataFrame):
        print("codegroup summary has already been generated, it can be accessed through FeatureData.code_summary")
        return self.codegroup_summary

    print("making feature codegroup summary")

    if column_operations is None:
        column_operation = {self.unit_label: "median", "quantity": "sum"}
    else:
        column_operation = self.columnOperation(column_operations)
    if columns is None:
        columns = ["loc_date", "groupname"]

    # apply the column operations
    codegroup_totals = self.feature_data.groupby(columns, as_index=False).agg({self.unit_label:'sum', 'quantity':'sum'})
    codegroup_totals = codegroup_totals.groupby('groupname', as_index=False).agg(column_operation)

    # percent of total
    codegroup_totals["% of total"] = ((codegroup_totals.quantity / codegroup_totals.quantity.sum()) * 100).round(2)

    # the code data comes from the feature data (survey results)
    # Add the description of the code and the material
    codegroup_totals.set_index("groupname", inplace=True)

    self.codegroup_summary = codegroup_totals
    
    
def componentCodeGroupResults(self, columns: []=None, column_operations: []=None):

    """Produces two arrays of the aggregated survey results by codegroup for each feature component. Rows are the
    feature component, columns are the codegroup. One array is % of total the other is median pcs/m.
    """
    if column_operations is None:
        column_operation = {self.unit_label: "median", "quantity": "sum"}
    else:
        column_operation = self.columnOperation(column_operations)
    
    data = self.feature_data.copy()

    if isinstance(self.component_type, str):
        try:
            print("attempting type mask")
            type_mask = self.feature_data[self.type_column] == self.component_type
        except ValueError:
            print("Type mask could not be executed using the type_column and component_type variables")
            raise
        print("type mask successful")
        data = data[type_mask]

    results = data.groupby([self.feature_component, *columns], as_index=False).agg(column_operation)

    # the total amount per component, used for % of total array
    cg_tq = results.groupby(self.feature_component).quantity.sum()

    # the median per survey per group and the total quantity
    agg_this = {self.unit_label: "median", "quantity": "sum"}
    results = results.groupby([self.feature_component, "groupname"], as_index=False).agg(agg_this)
    results["f_total"] = results[self.feature_component].map(lambda x: cg_tq.loc[x])
    results["pt"] = (results.quantity / results.f_total).round(2) * 100

    return results