In [1]:
# -*- coding: utf-8 -*-
# This is a report using the data from IQAASL.
# IQAASL was a project funded by the Swiss Confederation
# It produces a summary of litter survey results for a defined region.
# These charts serve as the models for the development of plagespropres.ch
# The data is gathered by volunteers.
# Please remember all copyrights apply, please give credit when applicable
# The repo is maintained by the community effective January 01, 2022
# There is ample opportunity to contribute, learn and teach
# contact dev@hammerdirt.ch

# Dies ist ein Bericht, der die Daten von IQAASL verwendet.
# IQAASL war ein von der Schweizerischen Eidgenossenschaft finanziertes Projekt.
# Es erstellt eine Zusammenfassung der Ergebnisse der Littering-Umfrage für eine bestimmte Region.
# Diese Grafiken dienten als Vorlage für die Entwicklung von plagespropres.ch.
# Die Daten werden von Freiwilligen gesammelt.
# Bitte denken Sie daran, dass alle Copyrights gelten, bitte geben Sie den Namen an, wenn zutreffend.
# Das Repo wird ab dem 01. Januar 2022 von der Community gepflegt.
# Es gibt reichlich Gelegenheit, etwas beizutragen, zu lernen und zu lehren.
# Kontakt dev@hammerdirt.ch

# Il s'agit d'un rapport utilisant les données de IQAASL.
# IQAASL était un projet financé par la Confédération suisse.
# Il produit un résumé des résultats de l'enquête sur les déchets sauvages pour une région définie.
# Ces tableaux ont servi de modèles pour le développement de plagespropres.ch
# Les données sont recueillies par des bénévoles.
# N'oubliez pas que tous les droits d'auteur s'appliquent, veuillez indiquer le crédit lorsque cela est possible.
# Le dépôt est maintenu par la communauté à partir du 1er janvier 2022.
# Il y a de nombreuses possibilités de contribuer, d'apprendre et d'enseigner.
# contact dev@hammerdirt.ch

# sys, file and nav packages:
import datetime as dt
from datetime import date, datetime, time
from babel.dates import format_date, format_datetime, format_time, get_month_names
import locale

# math packages:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF
from math import pi

# charting:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import ticker
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
import seaborn as sns

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
from bokeh.sampledata.unemployment1948 import data

# the module that has all the methods for handling the data
import resources.featuredata as featuredata

# home brew utitilties
import resources.chart_kwargs as ck
import resources.sr_ut as sut

# images and display
from IPython.display import Markdown as md

# chart style
sns.set_style("whitegrid")

# colors for gradients
cmap2 = ck.cmap2
colors_palette = ck.colors_palette

# border and row shading fro tables
a_color = "saddlebrown"
table_row = "saddlebrown"

## !! Begin Note book variables !!

# There are two language variants: german and english
# change both: date_lang and language
date_lang =  'de_DE.utf8'
locale.setlocale(locale.LC_ALL, date_lang)

# the date format of the survey data is defined in the module
date_format = featuredata.date_format

# the language setting use lower case: en or de
# changing the language may require changing the unit label
language = "de"
unit_label = "p/100 m"

# the standard date format is "%Y-%m-%d" if your date column is
# not in this format it will not work.
# these dates cover the duration of the IQAASL project
start_date = "2020-03-01"
end_date ="2021-05-31"
start_end = [start_date, end_date]

# the fail rate used to calculate the most common codes is
# 50% it can be changed:
fail_rate = 50

# Changing these variables produces different reports
# Call the map image for the area of interest
bassin_map = "resources/maps/bielersee_scaled.jpeg"

# the label for the aggregation of all data in the region
top = "Alle Erhebungsgebiete"

# define the feature level and components
# the feature of interest is the Aare (aare) at the river basin (river_bassin) level.
# the label for charting is called 'name'
this_feature = {'slug':'bielersee', 'name':"Bielersee", 'level':'water_name_slug'}

# the lake is in this survey area
this_bassin = "aare"
# label for survey area
bassin_label = "Aare-Erhebungsgebiet"

# these are the smallest aggregated components
# choices are water_name_slug=lake or river, city or location at the scale of a river bassin 
# water body or lake maybe the most appropriate
this_level = 'city'

# identify the lakes of interest for the survey area
lakes_of_interest = ["bielersee"]    


# !! End note book variables !!
## data
# Survey location details (GPS, city, land use)
dfBeaches = pd.read_csv("resources/beaches_with_land_use_rates.csv")
# set the index of the beach data to location slug
dfBeaches.set_index("slug", inplace=True)

# Survey dimensions and weights
dfDims = pd.read_csv("resources/corrected_dims.csv")

# code definitions
dxCodes = pd.read_csv("resources/codes_with_group_names")
dxCodes.set_index("code", inplace=True)

# columns that need to be renamed. Setting the language will automatically
# change column names, code descriptions and chart annotations
columns={"% to agg":"% agg", "% to recreation": "% recreation", "% to woods":"% woods", "% to buildings":"% buildings", "p/100m":"p/100 m"}

# !key word arguments to construct feature data
# !Note the water type allows the selection of river or lakes
# if None then the data is aggregated together. This selection
# is only valid for survey-area reports or other aggregated data
# that may have survey results from both lakes and rivers.
fd_kwargs ={
    "filename": "resources/checked_sdata_eos_2020_21.csv",
    "feature_name": this_feature['slug'], 
    "feature_level": this_feature['level'], 
    "these_features": this_feature['slug'], 
    "component": this_level, 
    "columns": columns, 
    "language": 'de', 
    "unit_label": unit_label, 
    "fail_rate": fail_rate,
    "code_data":dxCodes,
    "date_range": start_end,
    "water_type": None,    
}
fdx = featuredata.Components(**fd_kwargs)

# call the reports and languages
fdx.adjustForLanguage()
fdx.makeFeatureData()
fdx.locationSampleTotals()
fdx.makeDailyTotalSummary()
fdx.materialSummary()
fdx.mostCommon()
fdx.codeGroupSummary()
# !this is the feature data!
fd = fdx.feature_data

# !keyword args to build period data
# the period data is all the data that was collected
# during the same period from all the other locations
# not included in the feature data. For a survey area
# or river bassin these_features = feature_paren and 
# feature_level = parent_level
period_kwargs = {
    "period_data": fdx.period_data,
    "these_features": this_feature['slug'],
    "feature_level":this_feature['level'],
    "feature_parent":this_bassin,
    "parent_level": "river_bassin",
    "period_name": bassin_label,
    "unit_label": unit_label,
    "most_common": fdx.most_common.index
}
period_data = featuredata.PeriodResults(**period_kwargs)

# the rivers are considered separately
# select only the results from rivers
# this can be done by updating the fd_kwargs
fd_rivers = fd_kwargs.update({"water_type":"r"})
fdr = featuredata.Components(**fd_kwargs)
fdr.makeFeatureData()

# collects the summarized values for the feature data
# use this to generate the summary data for the survey area
# and the section for the rivers
admin_kwargs = {
    "data":fd,
    "dims_data":dfDims,
    "label": this_feature["name"],
    "feature_component": this_level,
    "date_range":start_end,
    **{"dfBeaches":dfBeaches}
}
admin_details = featuredata.AdministrativeSummary(**admin_kwargs)
admin_summary = admin_details.summaryObject()

# update the admin kwargs with river data to make the river summary
admin_kwargs.update({"data":fdr.feature_data})
admin_r_details = featuredata.AdministrativeSummary(**admin_kwargs)
admin_r_summary = admin_r_details.summaryObject()

Codes init called
this is german
making material and description map
making feature data
There are columns
renaming columns
sample totals
making daily total summary
making the code summary first
making material summary
getting the fail rate
making most common codes table
making feature codegroup summary
Codes init called
making feature data
There are columns
renaming columns


In [2]:
rivers = admin_details.riversOfInterest()
lakes = admin_details.lakesOfInterest()
        
# string objects for display
obj_string = featuredata.thousandsSeparator(admin_summary["quantity"], language)
surv_string = "{:,}".format(admin_summary["loc_date"])
pop_string = featuredata.thousandsSeparator(int(admin_summary["population"]), language)

# make strings
date_quantity_context = F"Im Zeitraum von {featuredata.dateToYearAndMonth(datetime.strptime(start_date, date_format), lang=date_lang)}  bis {featuredata.dateToYearAndMonth(datetime.strptime(end_date, date_format), lang= date_lang)} wurden im Rahmen von {surv_string} Datenerhebungen insgesamt {obj_string } Objekte entfernt und identifiziert."
geo_context = F"Die Ergebnisse des {this_feature['name']} umfassen {admin_summary['location']} Orte, {admin_summary['city']} Gemeinden und eine Gesamtbevölkerung von etwa {pop_string} Einwohnenden."

# lists of landmarks of interest
munis_joined = ", ".join(sorted(admin_details.populationKeys()["city"]))
lakes_joined = ", ".join(sorted(lakes))
rivers_joined = ", ".join(sorted(rivers))

# put that all together:
lake_string = F"""
{date_quantity_context} {geo_context}

*Gemeinden:*\n\n>{munis_joined}
"""
md(lake_string)


Im Zeitraum von März 2020  bis Mai 2021 wurden im Rahmen von 38 Datenerhebungen insgesamt 4 477 Objekte entfernt und identifiziert. Die Ergebnisse des Bielersee umfassen 10 Orte, 8 Gemeinden und eine Gesamtbevölkerung von etwa 70 810 Einwohnenden.

*Gemeinden:*

>Biel/Bienne, Erlach, Gals, Le Landeron, Ligerz, Lüscherz, Nidau, Vinelz


In [3]:
dims_data = admin_details.dimensionalSummary()
dims_data

Unnamed: 0_level_0,total_w,mac_plast_w,area,length,samples,quantity
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Biel/Bienne,7.338,3688.0,4584.0,580.0,14.0,2969.0
Erlach,0.05,48.0,374.0,57.0,1.0,101.0
Gals,0.43,405.0,64.0,38.0,2.0,48.0
Le Landeron,0.045,35.0,93.0,37.0,1.0,53.0
Ligerz,0.295,294.0,49.0,15.0,2.0,143.0
Lüscherz,1.718,332.0,538.0,257.0,5.0,202.0
Nidau,0.13,2.0,105.0,25.0,1.0,63.0
Vinelz,3.099,1864.0,536.0,238.0,12.0,898.0
Bielersee,13.105,6668.0,6343.0,1247.0,38.0,4477.0


In [4]:
dx = period_data.parentSampleTotals(parent=False)
d = fdx.sample_totals
output_notebook()

p = figure(x_axis_type="datetime")

p.circle(dx["date"], dx["p/100 m"], fill_color="black", size=10, line_width=0)
p.circle(d["date"], d["p/100 m"], fill_color="magenta", size=10, line_width=0)
show(p)

making sample totals from period data
make no mask


In [5]:
fdx.material_summary

Unnamed: 0,material,quantity,% of total
6,Plastik,3839,0.857494
1,Glas,213,0.047577
4,Metall,144,0.032164
5,Papier,109,0.024347
0,Chemikalien,65,0.014519
3,Holz,46,0.010275
7,Stoff,38,0.008488
2,Gummi,23,0.005137
8,Unbekannt,0,0.0


In [6]:
fdx.sample_summary

Erhebungen                38.00
Durchschnitt p/100 m     418.37
Standardabweichung       323.55
min p/100 m               40.00
25%                      142.75
50%                      340.00
75%                      599.75
max p/100 m             1480.00
Abfallobjekte           4477.00
dtype: float64

In [7]:
fdx.most_common

Unnamed: 0_level_0,p/100 m,quantity,% of total,fail,fail rate,item,material
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Gfrags,53.0,746,16.66,37,97,Fragmentierte Kunststoffe,Plastik
G27,9.0,744,16.62,30,78,Zigarettenfilter,Plastik
G30,21.0,325,7.26,33,86,Snack-Verpackungen,Plastik
G67,18.0,300,6.7,34,89,Industriefolie (Kunststoff),Plastik
G200,5.5,176,3.93,26,68,"Getränke Glasflasche, Stücke",Glas
Gfoam,5.5,173,3.86,24,63,Expandiertes Polystyrol,Plastik
G941,9.5,173,3.86,24,63,"Verpackungsfolien, nicht für Lebensmittel",Plastik
G25,5.5,92,2.05,28,73,"Tabak; Kunststoffverpackungen, Behälter",Plastik
G940,0.0,92,2.05,12,31,Schaumstoff EVA (flexibler Kunststoff),Plastik
G89,4.5,89,1.99,27,71,Kunststoff-Bauabfälle,Plastik


In [8]:
# calling componentsMostCommon gets the results for the most common codes
# at the component level
components = fdx.componentMostCommonPcsM()

# pivot that and quash the hierarchal column index that is created when the table is pivoted
mc_comp = components[["item", unit_label, "city"]].pivot(columns="city", index="item")
mc_comp.columns = mc_comp.columns.get_level_values(1)

# the aggregated total of the feature is taken from the most common objects table
mc_feature = fdx.most_common[unit_label]
mc_feature = featuredata.changeSeriesIndexLabels(mc_feature, {x:fdx.dMap.loc[x] for x in mc_feature.index})

# aggregated totals of the parent this is derived from the arguments in kwargs
mc_parent = period_data.parentMostCommon(parent=True)
mc_parent = featuredata.changeSeriesIndexLabels(mc_parent, {x:fdx.dMap.loc[x] for x in mc_parent.index})

# the aggregated totals of all the period data
mc_period = period_data.parentMostCommon(parent=False)
mc_period = featuredata.changeSeriesIndexLabels(mc_period, {x:fdx.dMap.loc[x] for x in mc_period.index})

# add the feature, bassin_label and period results to the components table
mc_comp[this_feature["name"]]= mc_feature
mc_comp[bassin_label] = mc_parent
mc_comp[top] = mc_period

mc_comp

getting the most common results from period data
mask is for parent level
applying mask
getting most common pcs/m from period data
getting the most common results from period data
make no mask
getting most common pcs/m from period data


city,Biel/Bienne,Erlach,Gals,Le Landeron,Ligerz,Lüscherz,Nidau,Vinelz,Bielersee,Aare-Erhebungsgebiet,Alle Erhebungsgebiete
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Expandiertes Polystyrol,19.0,2.0,0.0,8.0,6.0,0.0,4.0,0.0,5.5,4.0,5.0
Feuerwerkskörper; Raketenkappen,2.0,9.0,0.0,5.0,12.5,0.0,0.0,8.0,4.0,0.0,0.0
Fragmentierte Kunststoffe,50.5,49.0,28.5,22.0,193.5,14.0,64.0,128.0,53.0,18.5,18.0
"Getränke Glasflasche, Stücke",2.5,2.0,7.5,0.0,100.0,20.0,12.0,8.5,5.5,3.0,3.0
Industriefolie (Kunststoff),20.5,0.0,2.5,19.0,119.0,5.0,40.0,37.0,18.0,5.0,5.0
Industriepellets (Nurdles),4.5,0.0,0.0,0.0,0.0,0.0,8.0,10.5,2.5,0.0,0.0
Kunststoff-Bauabfälle,7.0,2.0,0.0,3.0,0.0,0.0,4.0,7.0,4.5,0.0,1.0
Schaumstoff EVA (flexibler Kunststoff),1.0,0.0,2.5,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0
Schaumstoffverpackungen/Isolierung,7.5,0.0,0.0,0.0,21.5,0.0,0.0,0.0,1.0,0.0,1.0
Snack-Verpackungen,41.5,5.0,12.5,27.0,78.5,2.0,60.0,18.5,21.0,8.0,9.0


In [9]:
components = fdx.componentCodeGroupResults()

# pivot that
grouppcs_comp = components[["city", "groupname", unit_label ]].pivot(columns="city", index="groupname")

# quash the hierarchal column index
grouppcs_comp.columns = grouppcs_comp.columns.get_level_values(1)

# the aggregated codegroup results from the feature
pt_feature = fdx.codegroup_summary[unit_label]
grouppcs_comp[this_feature["name"]] = pt_feature

# the aggregated totals for the parent level
pt_parent = period_data.parentGroupTotals(parent=True, percent=False)
grouppcs_comp[bassin_label] = pt_parent

# the aggregated totals for the period
pt_period = period_data.parentGroupTotals(parent=False, percent=False)
grouppcs_comp[top] = pt_period
grouppcs_comp

in codegroup results
Index(['city', 'groupname', 'p/100 m', 'quantity', 'f_total', '% of total'], dtype='object')
city
Biel/Bienne    2969
Erlach          101
Gals             48
Le Landeron      53
Ligerz          143
Lüscherz        202
Nidau            63
Vinelz          898
Name: quantity, dtype: int64
getting the codegroup results from period data
mask is for parent level
applying mask
getting the codegroup pcs/m from period data
getting the codegroup results from period data
make no mask
getting the codegroup pcs/m from period data


city,Biel/Bienne,Erlach,Gals,Le Landeron,Ligerz,Lüscherz,Nidau,Vinelz,Bielersee,Aare-Erhebungsgebiet,Alle Erhebungsgebiete
groupname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Abwasser,9.5,14.0,11.0,0.0,19.5,0.0,0.0,11.0,9.0,3.0,3.0
Essen und Trinken,93.0,29.0,36.5,30.0,236.0,27.0,80.0,62.0,58.0,27.0,37.0
Freizeit und Erholung,32.0,13.0,8.5,8.0,44.5,8.0,4.0,18.0,15.0,6.0,6.0
Infrastruktur,63.5,8.0,0.0,11.0,27.5,5.0,16.0,33.0,24.5,15.0,20.0
Landwirtschaft,26.0,0.0,2.5,19.0,125.0,5.0,40.0,44.5,20.0,6.0,7.0
Mikroplastik (< 5mm),20.5,2.0,16.5,0.0,0.0,0.0,16.0,21.5,12.0,1.0,1.0
Persönliche Gegenstände,10.5,12.0,5.0,3.0,38.5,5.0,0.0,9.0,9.0,4.0,6.0
Plastikfragmente,50.5,49.0,28.5,22.0,193.5,14.0,64.0,128.0,53.0,18.5,18.0
Tabakwaren,141.5,44.0,14.0,30.0,119.5,5.0,0.0,10.0,20.0,15.0,25.0
Verpackungen ohne Lebensmittel/Tabak,27.0,12.0,2.5,22.0,100.0,2.0,32.0,17.0,20.0,7.5,9.0


In [10]:
# components = fdx.componentCodeGroupResults()

# pivot that
pt_comp = components[["city", "groupname", '% of total' ]].pivot(columns="city", index="groupname")

# quash the hierarchal column index
pt_comp.columns = pt_comp.columns.get_level_values(1)

# the aggregated codegroup results from the feature
pt_feature = fdx.codegroup_summary["% of total"]
pt_comp[this_feature["name"]] = pt_feature

# the aggregated totals for the parent level
pt_parent = period_data.parentGroupTotals(parent=True, percent=True)
pt_comp[bassin_label] = pt_parent

# the aggregated totals for the period
pt_period = period_data.parentGroupTotals(parent=False, percent=True)
pt_comp[top] = pt_period
pt_comp

getting the codegroup results from period data
mask is for parent level
applying mask
getting the codegroup % of total from period data
getting the codegroup results from period data
make no mask
getting the codegroup % of total from period data


city,Biel/Bienne,Erlach,Gals,Le Landeron,Ligerz,Lüscherz,Nidau,Vinelz,Bielersee,Aare-Erhebungsgebiet,Alle Erhebungsgebiete
groupname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Abwasser,0.02,0.08,0.08,0.0,0.02,0.0,0.0,0.03,0.03,4.0,5.0
Essen und Trinken,0.18,0.16,0.29,0.21,0.26,0.34,0.32,0.15,0.19,19.0,19.0
Freizeit und Erholung,0.09,0.07,0.06,0.06,0.05,0.06,0.02,0.09,0.08,5.0,4.0
Infrastruktur,0.11,0.04,0.0,0.08,0.03,0.13,0.06,0.11,0.11,13.0,18.0
Landwirtschaft,0.06,0.0,0.02,0.13,0.14,0.06,0.16,0.13,0.08,7.0,6.0
Mikroplastik (< 5mm),0.05,0.01,0.12,0.0,0.0,0.02,0.06,0.05,0.05,5.0,8.0
Persönliche Gegenstände,0.02,0.06,0.04,0.02,0.04,0.09,0.0,0.04,0.03,3.0,3.0
Plastikfragmente,0.12,0.28,0.23,0.15,0.21,0.19,0.25,0.28,0.17,14.0,14.0
Tabakwaren,0.26,0.25,0.1,0.21,0.13,0.06,0.0,0.03,0.19,20.0,17.0
Verpackungen ohne Lebensmittel/Tabak,0.07,0.06,0.02,0.15,0.11,0.02,0.13,0.09,0.07,7.0,5.0


In [11]:
before_agg = pd.read_csv("resources/checked_before_agg_sdata_eos_2020_21.csv")
some_foams = ["G81", "G82", "G83", "G74"]
before_agg.rename(columns={"p/100m":unit_label}, inplace=True)
agg_pcs_median = {unit_label:"median", "quantity":"sum"}
agg_pcs_quantity = {unit_label:"sum", "quantity":"sum"}

# the codes for the fragmented plastics
some_frag_plas = list(before_agg[before_agg.groupname == "plastic pieces"].code.unique())
mask = ((before_agg.code.isin([*some_frag_plas, *some_foams]))&(before_agg.location.isin(admin_summary["locations_of_interest"])))

fd_frags_foams = before_agg[mask].groupby(["loc_date","code"], as_index=False).agg(agg_pcs_quantity)
fd_frags_foams = fd_frags_foams.groupby("code").agg(agg_pcs_median)
fd_frags_foams["item"] = fd_frags_foams.index.map(lambda x: fdx.dMap.loc[x])
fd_frags_foams["% of total"] = (fd_frags_foams.quantity/fd.quantity.sum()*100).round(2)
fd_frags_foams

Unnamed: 0_level_0,p/100 m,quantity,item,% of total
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G74,0.0,9,Schaumstoffverpackungen/Isolierung,0.2
G75,0.0,8,"Objekte aus Kunststoff/Polystyrol 0,5 - 2,5 cm",0.18
G76,0.0,5,"Kunststoff/Polystyrolschaumstoff 2,5 > < 50",0.11
G78,24.0,391,"Objekte aus Kunststoff 0,5 - 2,5 cm",8.73
G79,27.0,342,"Objekte aus Kunststoff 2,5 - 50 cm",7.64
G80,0.0,0,Objekte aus Kunststoff > 50 cm,0.0
G81,0.0,116,"Objekte aus expandiertem Polystyrol 0,5 - 2,5 cm",2.59
G82,2.0,57,"Objekte aus expandiertem Polystyrol 2,5 - 50 cm",1.27
G83,0.0,0,Objekte aus expandiertem Polystyrol > 50 cm,0.0


In [12]:
from bokeh.plotting import figure, show
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap

periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]

df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str)
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"]
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]

cmap = {
    "alkali metal"         : "#a6cee3",
    "alkaline earth metal" : "#1f78b4",
    "metal"                : "#d93b43",
    "halogen"              : "#999d9a",
    "metalloid"            : "#e08d49",
    "noble gas"            : "#eaeaea",
    "nonmetal"             : "#f1d4Af",
    "transition metal"     : "#599d7A",
}

TOOLTIPS = [
    ("Name", "@name"),
    ("Atomic number", "@{atomic number}"),
    ("Atomic mass", "@{atomic mass}"),
    ("Type", "@metal"),
    ("CPK color", "$color[hex, swatch]:CPK"),
    ("Electronic configuration", "@{electronic configuration}"),
]

p = figure(title="Periodic Table (omitting LA and AC Series)", width=1000, height=450,
           x_range=groups, y_range=list(reversed(periods)),
           tools="hover", toolbar_location=None, tooltips=TOOLTIPS)

r = p.rect("group", "period", 0.95, 0.95, source=df, fill_alpha=0.6, legend_field="metal",
           color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))

text_props = dict(source=df, text_align="left", text_baseline="middle")

x = dodge("group", -0.4, range=p.x_range)

p.text(x=x, y="period", text="symbol", text_font_style="bold", **text_props)

p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number",
       text_font_size="11px", **text_props)

p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name",
       text_font_size="7px", **text_props)

p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass",
       text_font_size="7px", **text_props)

p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"
p.hover.renderers = [r] # only hover element boxes

show(p)