In [1]:
# -*- coding: utf-8 -*-
# This is a report using the data from IQAASL.
# IQAASL was a project funded by the Swiss Confederation
# It produces a summary of litter survey results for a defined region.
# These charts serve as the models for the development of plagespropres.ch
# The data is gathered by volunteers.
# Please remember all copyrights apply, please give credit when applicable
# The repo is maintained by the community effective January 01, 2022
# There is ample opportunity to contribute, learn and teach
# contact dev@hammerdirt.ch

# Dies ist ein Bericht, der die Daten von IQAASL verwendet.
# IQAASL war ein von der Schweizerischen Eidgenossenschaft finanziertes Projekt.
# Es erstellt eine Zusammenfassung der Ergebnisse der Littering-Umfrage für eine bestimmte Region.
# Diese Grafiken dienten als Vorlage für die Entwicklung von plagespropres.ch.
# Die Daten werden von Freiwilligen gesammelt.
# Bitte denken Sie daran, dass alle Copyrights gelten, bitte geben Sie den Namen an, wenn zutreffend.
# Das Repo wird ab dem 01. Januar 2022 von der Community gepflegt.
# Es gibt reichlich Gelegenheit, etwas beizutragen, zu lernen und zu lehren.
# Kontakt dev@hammerdirt.ch

# Il s'agit d'un rapport utilisant les données de IQAASL.
# IQAASL était un projet financé par la Confédération suisse.
# Il produit un résumé des résultats de l'enquête sur les déchets sauvages pour une région définie.
# Ces tableaux ont servi de modèles pour le développement de plagespropres.ch
# Les données sont recueillies par des bénévoles.
# N'oubliez pas que tous les droits d'auteur s'appliquent, veuillez indiquer le crédit lorsque cela est possible.
# Le dépôt est maintenu par la communauté à partir du 1er janvier 2022.
# Il y a de nombreuses possibilités de contribuer, d'apprendre et d'enseigner.
# contact dev@hammerdirt.ch

# sys, file and nav packages:
import datetime as dt
from datetime import date, datetime, time
from babel.dates import format_date, format_datetime, format_time, get_month_names
import locale

# math packages:
import pandas as pd
import numpy as np
from scipy import stats
from math import pi

# charting:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import ticker
from matplotlib.ticker import MultipleLocator
import seaborn as sns

# the module that has all the methods for handling the data
import resources.featuredata as featuredata

# home brew utitilties
import resources.chart_kwargs as ck
import resources.sr_ut as sut

# images and display
from IPython.display import Markdown as md
from myst_nb import glue

# chart style
sns.set_style("whitegrid")

# colors for gradients
cmap2 = ck.cmap2
colors_palette = ck.colors_palette
bassin_pallette = featuredata.bassin_pallette


# border and row shading fro tables
a_color = "saddlebrown"
table_row = "saddlebrown"

## !! Begin Note book variables !!
# There are two language variants: german and english
# change both: date_lang and language
date_lang =  'de_DE.utf8'
locale.setlocale(locale.LC_ALL, date_lang)

# the date format of the survey data is defined in the module
date_format = featuredata.date_format

# the language setting use lower case: en or de
# changing the language may require changing the unit label
language = "de"
unit_label = "p/100 m"

# the standard date format is "%Y-%m-%d" if your date column is
# not in this format it will not work.
# these dates cover the duration of the IQAASL project
start_date = "2020-03-01"
end_date ="2021-05-31"
start_end = [start_date, end_date]

# the fail rate used to calculate the most common codes is
# 50% it can be changed:
fail_rate = 50

# Changing these variables produces different reports
# Call the map image for the area of interest
bassin_map = "resources/maps/survey_areas/aare_scaled.jpeg"

# the label for the aggregation of all data in the region
top = "Alle Erhebungsgebiete"

# define the feature level and components
# the feature of interest is the Aare (aare) at the river basin (river_bassin) level.
# the label for charting is called 'name'
this_feature = {'slug':'all', 'name':"Alle Erhebungsgebiete", 'level':'all'}

# these are the smallest aggregated components
# choices are water_name_slug=lake or river, city or location at the scale of a river bassin 
# water body or lake maybe the most appropriate
this_level = 'river_bassin'

# identify the lakes of interest for the survey area
# lakes_of_interest = ["neuenburgersee", "thunersee", "bielersee", "brienzersee"]

# !! End note book variables !!

## data
# Survey location details (GPS, city, land use)
dfBeaches = pd.read_csv("resources/beaches_with_land_use_rates.csv")
# set the index of the beach data to location slug
dfBeaches.set_index("slug", inplace=True)

# Survey dimensions and weights
dfDims = pd.read_csv("resources/corrected_dims.csv")

# code definitions
dxCodes = pd.read_csv("resources/codes_with_group_names")
dxCodes.set_index("code", inplace=True)

# columns that need to be renamed. Setting the language will automatically
# change column names, code descriptions and chart annotations
columns={"% to agg":"% agg", "% to recreation": "% recreation", "% to woods":"% woods", "% to buildings":"% buildings", "p/100m":"p/100 m"}

# key word arguments to construct feature data
# !Note the water type allows the selection of river or lakes
# if None then the data is aggregated together. This selection
# is only valid for survey-area reports or other aggregated data
# that may have survey results from both lakes and rivers.
fd_kwargs ={
    "filename": "resources/checked_sdata_eos_2020_21.csv",
    "feature_name": this_feature['slug'], 
    "feature_level": this_feature['level'], 
    "these_features": this_feature['slug'], 
    "component": this_level, 
    "columns": columns, 
    "language": 'de', 
    "unit_label": unit_label, 
    "fail_rate": fail_rate,
    "code_data":dxCodes,
    "date_range": start_end,
    "water_type": None,    
}

fdx = featuredata.Components(**fd_kwargs)

# call the reports and languages
fdx.adjustForLanguage()
fdx.makeFeatureData()
fdx.locationSampleTotals()
fdx.makeDailyTotalSummary()
fdx.materialSummary()
fdx.mostCommon()
# !this is the feature data!
fd = fdx.feature_data

# the period data is all the data that was collected
# during the same period from all the other locations
# not included in the feature data for a survey area
# or river bassin the parent and feature level are the
# the same.
period_kwargs = {
    "period_data": fdx.period_data,
    "these_features": this_feature['slug'],
    "feature_level":this_feature['level'],
    "feature_parent":this_feature['slug'],
    "parent_level": this_feature['level'],
    "period_name": top,
    "unit_label": unit_label,
    "most_common": fdx.most_common.index
}
period_data = featuredata.PeriodResults(**period_kwargs)

# the rivers are considered separately
# select only the results from rivers
fd_kwargs.update({"water_type":"r"})
fdr = featuredata.Components(**fd_kwargs)
fdr.makeFeatureData()
fdr.adjustForLanguage()
fdr.makeFeatureData()
fdr.locationSampleTotals()
fdr.makeDailyTotalSummary()
fdr.materialSummary()
fdr.mostCommon()

# collects the summarized values for the feature data
# use this to generate the summary data for the survey area
# and the section for the rivers
admin_kwargs = {
    "data":fd,
    "dims_data":dfDims,
    "label": this_feature["name"],
    "feature_component": this_level,
    "date_range":start_end,
    **{"dfBeaches":dfBeaches}
}
admin_details = featuredata.AdministrativeSummary(**admin_kwargs)
admin_summary = admin_details.summaryObject()
admin_r_details = featuredata.AdministrativeSummary(data=fdr.feature_data, dims_data=dfDims, label=this_feature["name"], feature_component=this_level, date_range=start_end, **{"dfBeaches":dfBeaches})

admin_kwargs.update({"data":fdr.feature_data})
admin_r_details = featuredata.AdministrativeSummary(**admin_kwargs)
admin_r_summary = admin_r_details.summaryObject()

(gisoutput)=
# GIS output

In [2]:
# this gets all the data for the project
land_use_kwargs = {
    "data": period_data.period_data,
    "index_column":"loc_date",
    "these_features": this_feature['slug'],
    "feature_level":this_level   
}

# the landuse profile of the project
project_profile = featuredata.LandUseProfile(**land_use_kwargs).byIndexColumn()

# update the kwargs for the feature data
land_use_kwargs.update({"data":fdx.feature_data})

# build the landuse profile of the feature
feature_profile = featuredata.LandUseProfile(**land_use_kwargs)

# this is the component features of the report
feature_landuse = feature_profile.featureOfInterest()

### Gesamtergebnisse nach Erhebungsgebiet

In [3]:
# the dimensional data
dims_table = admin_details.dimensionalSummary()

# a method to update the place names from slug to proper name
name_map = featuredata.river_basin_de

# the order in which they are charted
name_order = list(name_map.keys())

# sort by quantity
dims_table.sort_values(by=["quantity"], ascending=False, inplace=True)

# translating column names
dims_table.rename(columns=featuredata.dims_table_columns_de, inplace=True)

# the values in these columns need formating to swiss spec
thousands_separated = ["Fläche (m2)", "Länge (m)", "Erhebungen", "Objekte (St.)"]
replace_decimal = ["Plastik (Kg)", "Gesamtgewicht (Kg)"]
dims_table["Plastik (Kg)"] = dims_table["Plastik (Kg)"]/1000
dims_table.reset_index(inplace=True)
dims_table["river_bassin"] = dims_table.river_bassin.map(lambda x: featuredata.updatePlaceNames(x=x, a_map=name_map))

dims_table

Unnamed: 0,river_bassin,Gesamtgewicht (Kg),Plastik (Kg),Fläche (m2),Länge (m),Erhebungen,Objekte (St.)
0,Alle Erhebungsgebiete,305.507,94.213346,96616.35,19722.0,386.0,54744.0
1,Rhone,151.309,45.967375,25986.25,4911.0,106.0,28454.0
2,Aare,71.976,31.44817,37017.8,7971.0,140.0,13847.0
3,Linth,35.961,12.772621,25637.8,5323.0,112.0,9412.0
4,Ticino,46.261,4.02518,7974.5,1517.0,28.0,3031.0


## Erhebungsergebnisse für alle Objekte

Verteilung der Erhebungsergebnisse. Die Werte werden als Anzahl der identifizierten Abfallobjekte pro 100 Meter (p/100 m) dargestellt. 

### The sample location layer

The median sample total in p/100 m for each location is figured and the lat, lon are attached. Included identifying information:

1. city
2. survey area
3. quantity
4. number of samples

In [4]:
# the sample totals of the parent feautre
dx = period_data.parentSampleTotals(parent=False)

# sample totals of the feature data
sample_totals = fdx.sample_totals
these_beaches = admin_details.df_beaches.loc[sample_totals.location.unique()]

# for each location or level sum the quantity and get the median survey value
agg_columns = {"quantity": "sum", "p/100 m": "median", "loc_date": "nunique"}
columns = ["river_bassin", "location"]

location_layer = sample_totals.groupby(columns, as_index=False).agg(agg_columns)

lat_lon = these_beaches[["latitude", "longitude"]]
cities = these_beaches["city"]

location_layer["lat"] = location_layer.location.map(lambda x: lat_lon.loc[x, "latitude"])
location_layer["lon"] = location_layer.location.map(lambda x: lat_lon.loc[x, "longitude"])
location_layer["city"] = location_layer.location.map(lambda x: cities.loc[x])
location_layer['samples'] = location_layer.loc_date

location_layer.drop("loc_date", axis=1, inplace=True)

location_layer.to_csv("resources/output/gis/location_layer.csv", index=False)

location_layer.head()

Unnamed: 0,river_bassin,location,quantity,p/100 m,lat,lon,city,samples
0,aare,aare-limmatspitz,70,60.0,47.50106,8.237371,Gebenstorf,1
1,aare,aare-port,99,253.0,47.11617,7.26955,Port,1
2,aare,aare-solothurn-lido-strand,27,244.0,47.196949,7.521643,Solothurn,1
3,aare,aare_bern_gerberm,134,363.0,46.989363,7.452098,Bern,1
4,aare,aare_bern_scheurerk,4,12.0,46.970967,7.452586,Bern,1


### The survey area layer

The survey area layer is the aggregate of the sample totals grouped by survey area. The latitude and longitude of the point is 
determined by the first record with a matching survey area. The attributes are assigned to a polygon that includes all the municipalities
in the survey area. The median sample total for the survey area is reported as well as

1. quantity
2. number of samples
3. survey area label

In [5]:
surveyarea_layer = sample_totals.groupby("river_bassin", as_index=False).agg(agg_columns)

rbassins =  these_beaches[["river_bassin", "latitude", "longitude"]].drop_duplicates("river_bassin")
rbassins.set_index("river_bassin", inplace=True, drop=True)

surveyarea_layer["lat"] = surveyarea_layer.river_bassin.map(lambda x: rbassins.loc[x, "latitude"])
surveyarea_layer["lon"] = surveyarea_layer.river_bassin.map(lambda x: rbassins.loc[x, "longitude"])
surveyarea_layer["samples"] = surveyarea_layer.loc_date

surveyarea_layer.drop("loc_date", axis=1, inplace=True)

surveyarea_layer.to_csv("resources/output/gis/surveyarea_layer.csv", index=False)

surveyarea_layer.head()

Unnamed: 0,river_bassin,quantity,p/100 m,lat,lon,samples
0,aare,13847,143.5,47.50106,8.237371,140
1,linth,9412,131.5,47.220989,8.940365,112
2,rhone,28454,442.5,46.447216,6.859612,106
3,ticino,3031,160.5,46.153882,8.76848,28


### The municipal area layer

The municipal layer is the aggregate of the sample totals grouped by city. The latitude and longitude of the point is 
determined by the first record with a matching city. The attributes are assigned to a polygon of that city. The median sample total for the survey area is reported as well as
111
1. quantity
2. number of samples
3. survey area label

In [6]:
# calling the components class on updated keywords
# aggregatest to the city level

fd_kwargs_city ={
    "filename": "resources/checked_sdata_eos_2020_21.csv",
    "feature_name": this_feature['slug'], 
    "feature_level": this_feature['level'], 
    "these_features": this_feature['slug'], 
    "component": "city", 
    "columns": columns, 
    "language": 'de', 
    "unit_label": unit_label, 
    "fail_rate": fail_rate,
    "code_data":dxCodes,
    "date_range": start_end,
    "water_type": None,    
}



fdc = featuredata.Components(**fd_kwargs_city)

fdc.adjustForLanguage()
fdc.makeFeatureData()
fdc.locationSampleTotals()
fdc.makeDailyTotalSummary()
fdc.materialSummary()
fdc.mostCommon()
# !this is the feature data!
municipal_layer = fdc.sample_totals.groupby("city", as_index=False).agg(agg_columns)

city_gps =  these_beaches[["city", "latitude", "longitude"]].drop_duplicates("city")
city_gps.set_index("city", inplace=True, drop=True)

municipal_layer["lat"] = municipal_layer.city.map(lambda x: city_gps.loc[x, "latitude"])
municipal_layer["lon"] = municipal_layer.city.map(lambda x: city_gps.loc[x, "longitude"])
municipal_layer["samples"] = municipal_layer.loc_date

municipal_layer.drop("loc_date", axis=1, inplace=True)

municipal_layer.to_csv("resources/output/gis/municipal_layer.csv", index=False)


municipal_layer.head()

Unnamed: 0,city,quantity,p/100 m,lat,lon,samples
0,Aarau,7,35.0,47.405669,8.066018,1
1,Allaman,631,716.0,46.463919,6.385732,3
2,Ascona,433,172.0,46.153882,8.76848,5
3,Beatenberg,104,242.5,46.684386,7.794768,2
4,Bellinzona,187,121.0,46.200625,9.015853,4
