In [1]:
import pandas as pd
import numpy as np
from myst_nb import glue
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.pyplot as plt

# this defines the css rules for the note-book table displays
header_row = {'selector': 'th:nth-child(1)', 'props': f'background-color: #FFF; text-align:right'}
even_rows = {"selector": 'tr:nth-child(even)', 'props': f'background-color: rgba(139, 69, 19, 0.08);'}
odd_rows = {'selector': 'tr:nth-child(odd)', 'props': 'background: #FFF;'}
table_font = {'selector': 'tr', 'props': 'font-size: 14px;'}
table_data = {'selector': 'td', 'props': 'padding: 14px;'}
table_caption = {'selector': 'caption', 'props': 'font-size: 14px; font-style: italic; caption-side: bottom; text-align: left; margin-top: 10px'}
table_css_styles = [even_rows, odd_rows, table_font, header_row, table_caption]


table_large_data = {'selector': 'tr', 'props': 'font-size: 14px; padding: 12px;'}
table_large_font = [even_rows, odd_rows, table_large_data, header_row, table_caption]

hl1 = pd.read_csv('resources/data/checks/hist_leman.csv')
hl2 = pd.read_csv('resources/data/u_all_data.csv')
hl2 = hl2[hl2.water_name_slug == 'lac-leman'].copy()

yu = [
    'canton',
    'orchards',
    'vineyards',
    'buildings',
    'forest',
    'undefined',
    'public_services',
    'streets'
   
]

t = hl1[hl1.location == 'maladaire'][yu][:1]
missing = hl2[hl2.loc_date == "('maladaire', '2021-06-02')"].copy()
new_column = {'loc_date':'sample_id', 'water_name_slug':'feature_name', 'water':'feature_type', 'river_bassin':'parent_boundary', 'pcs_m':'pcs/m'}
missing.rename(columns=new_column, inplace=True)
missing['project'] = 'IQAASL'
missing.loc[missing.location == 'maladaire', yu] = t.values[0]
p_plastock = pd.concat([hl1, missing])
work_data = p_plastock[p_plastock.code == "G70"].copy()
work_data = work_data.drop_duplicates(["sample_id", "code"])
work_data.loc[work_data.location == 'preverenges', 'region'] = 'Grand lac'
work_data.loc[work_data.location == 'vidy', 'region'] = 'Grand lac'
work_data.loc[work_data.location == 'tolochenaz', 'region'] = 'Grand lac'
work_data.loc[work_data.location == 'maladaire', 'region'] = 'Haut lac'
work_data.loc[work_data.location == 'versoix', 'region'] = 'Petit lac'
work_data['date'] = pd.to_datetime(work_data['date'], format='%Y-%m-%d')

# Plastic shotgun shell wadding

__A summary of observations on Lac Léman__

Plastic wadding is commonly used in modern shotgun shells due to its durability and consistency in performance. It helps to cushion the shot or slug as it moves down the barrel, ensuring it stays centered and providing a proper seal to maximize the energy transfer upon firing.

Plastic wadding can come in various forms, such as wads or cups, and they can have different designs depending on the specific requirements of the shotgun shell and the desired performance characteristics.

When plastic wadding is left behind after shooting activities or improperly disposed of, it can find its way into water bodies and eventually wash up on beaches. Once there, it can pose risks to wildlife through ingestion or entanglement.

## Essential statistics: 2015 - 2021

In [2]:
quantiles = [0.05, 0.25, 0.5, 0.75, 0.95]

start_date, end_date = work_data["date"].min(), work_data['date'].max()
nsamples = len(work_data)
ncities = work_data.city.nunique()
nlocations = work_data.location.nunique()

qty = work_data.quantity.sum()
fail_rate = np.mean(work_data.quantity > 0)
found_qtle = np.quantile(work_data.quantity.values, quantiles)
density_qtle = np.quantile(work_data['pcs/m'].values, quantiles)
dqtle = {quantiles[i]: int(x * 100) for i, x in enumerate(density_qtle)}
found_median, found_mean = work_data.quantity.median(), work_data.quantity.mean()

median_density, average_density = work_data['pcs/m'].median(), work_data['pcs/m'].mean()




print(f'Effective dates from {start_date} to {end_date}\n')
print(f'Total found: {qty}\n')
print(f'Number of samples: {nsamples}\n')
print(f'Number of municipalities: {ncities}\n')
print(f'Number of unique sample locations: {nlocations}\n')
print(f'Shotgun wadding found in {int(fail_rate * 100)}% of samples\n')
print(f'The average number found per sample {int(found_mean)}, the median found {found_median}\n')
print(f'The average number found per meter of shoreline {round(average_density, 2)}, the median found per meter of shoreline {round(median_density, 2)}\n')
print(f'Historical average for every 100 meters : {int(average_density*100)}, historical median {int(median_density*100)}\n')
print(f'The historical 95% range per 100 meter {dqtle}')

Effective dates from 2015-11-23 00:00:00 to 2021-11-14 00:00:00

Total found: 949

Number of samples: 250

Number of municipalities: 15

Number of unique sample locations: 38

Shotgun wadding found in 54% of samples

The average number found per sample 3, the median found 1.0

The average number found per meter of shoreline 0.1, the median found per meter of shoreline 0.02

Historical average for every 100 meters : 10, historical median 2

The historical 95% range per 100 meter {0.05: 0, 0.25: 0, 0.5: 2, 0.75: 8, 0.95: 54}


### Characteristics and weight of sampling conditions

We assume there is a relationship between how the land is used and what it is we find on the ground. Archeaologists and Anthropologists make this basic assumption every time they undertake an excavation and interpret the results in the context of other findings. This interpretation of beach litter data does exactly the same. As discussed in [Near or far](https://hammerdirt-analyst.github.io/landuse/titlepage.html) and the federal report [IQAASL](https://hammerdirt-analyst.github.io/IQAASL-End-0f-Sampling-2021/) : at the national level there is strong evidence to support a correlation between the density of objects found and specific topographic features that can be isolated on a standard topographical map.

Here we consider the following features as a % of the dry-land surface area within 1 500 meters of the survey location:

1. Buildings: the surface area attributed to constructed spaces
2. Forest: the surface are attributed to woods and forest
3. Orchards: the surface area attributed to orchards
4. Public services: the surface area attributed to any area that is associated to public use, includes hospitals, sports fields, schools etc.
5. Undefined: the surface area has no defined use
6. Vineyards: the surface area attributed to growing grapes for wine making

### Categories

The magnitued of the land-use features are divided into categories: 1 - 5. Each category represents the relative magnitude of the land use conditions, as a % of the available dryland from a surface area that is equal to a circle with of radius 1'500 metres centered at the survey location: a buffer zone. The term dry-land refers to the available surface area of the buffer zone that is not covered with water.

Consider the tables 1 and 2. The % of samples that were conducted in locations with the least amount of surface area dedicated to orchards is 91% (table 1), the average pieces per meter for locations with that attribute was 0.11 (table 2).



For a detailed description of how these values are derived: [Extracting land-use values](https://hammerdirt-analyst.github.io/feb_2024/landuse_model_next_itteration.html)

In [3]:
normalized_columns = ['orchards', 'vineyards', 'buildings', 'forest', 'undefined', 'public_services']
ranked_cols = [f'{x}_rank' for x in normalized_columns]
agg = ['mean', 'count']
cols = {'mean': 'average_pcs/m', 'count': 'number_of_samples'}
val = 'pcs/m'

# divide into 5 bins from 0 to 1
for col in normalized_columns:
    work_data[col + '_rank'] = pd.cut(work_data[col], bins=5, labels=range(1, 6))

#average pcs/m and number of samples per category of buildings
buildings = work_data.groupby('buildings_rank')[val].agg(agg).reset_index().rename(columns=cols)
bdng = buildings.set_index('buildings_rank', drop=True).style.set_table_styles(table_css_styles).format(precision=2)

forests = work_data.groupby('forest_rank')[val].agg(agg).reset_index().rename(columns=cols)
frst = forests.set_index('forest_rank', drop=True).fillna('-').style.set_table_styles(table_css_styles).format(precision=2)

undefined = work_data.groupby('undefined_rank')[val].agg(agg).reset_index().rename(columns=cols)
undf = undefined.set_index('undefined_rank', drop=True).fillna('-').style.set_table_styles(table_css_styles).format(precision=2)

orchards = work_data.groupby('orchards_rank')[val].agg(agg).reset_index().rename(columns=cols)
orch = orchards.set_index('orchards_rank', drop=True).fillna('-').style.set_table_styles(table_css_styles).format(precision=2)

public_services = work_data.groupby('public_services_rank')[val].agg(agg).reset_index().rename(columns=cols)
pubs = public_services.set_index('public_services_rank', drop=True).fillna('-').style.set_table_styles(table_css_styles).format(precision=2)

vineyards = work_data.groupby('vineyards_rank')[val].agg(agg).reset_index().rename(columns=cols)
vines = vineyards.set_index('vineyards_rank', drop=True).fillna('-').style.set_table_styles(table_css_styles).format(precision=2)

ps = {}
for feature in ranked_cols:
    ps[feature] = work_data[feature].value_counts(normalize=True)

samples_feature = pd.DataFrame(ps)
samples_feature.columns = normalized_columns

caption = "<b>Table 1:</b> Percent of samples collected with the designated land-use feature and category."
sd = samples_feature.style.set_table_styles(table_css_styles).format('{:.0%}').set_caption(caption)
glue('percent_samps_feature', sd, display=False)

In [4]:
d = pd.melt(work_data, value_vars=ranked_cols, id_vars=['pcs/m', 'sample_id'])
di = d.groupby(['variable', 'value'], as_index=False)['pcs/m'].mean()
di = di.pivot(columns='variable', index='value', values='pcs/m')
di.columns = [x[:-5] for x in di.columns]
di.index.name = None
caption = "<b>Table 2:</b> The average number of shotgun shells per linear meter of shoreline by feature and category."
sd = di.style.set_table_styles(table_css_styles).format(precision=2, na_rep='-').set_caption(caption)
glue('average_per_feature', sd, display=False)

::::{grid} 1

:::{grid-item}

{glue}`percent_samps_feature`

:::

:::{grid-item}

{glue}`average_per_feature`

:::
::::

### Regional results

Lake Geneva is divided into three parts because of its different types of formation (sedimentation, tectonic folding, glacial erosion):

* Haut Lac ('Upper Lake'), the eastern part from the Rhône estuary to the line of Meillerie–Rivaz
* Grand Lac ('Large Lake'), the largest and deepest basin with the lake's largest width
* Petit Lac ('Small Lake'), the most south-west, narrower and less deep part from Yvoire–Promenthoux next Prangins to the exit in Geneva

For a brief description of the geography of Lac Léman see: [Wikepedia Lake Geneva](https://en.wikipedia.org/wiki/Lake_Geneva)

In [5]:
work_data['date'] = pd.to_datetime(work_data['date'], format='%Y-%m-%d')
fig, ax = plt.subplots()
sns.scatterplot(data=work_data, x='date', y='pcs/m', hue='region')
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=(1, 7)))
ax.xaxis.set_major_locator(mdates.YearLocator())
plt.tight_layout()
glue('region_scatter', fig, display=False)
plt.close()

fig, ax = plt.subplots()
sns.ecdfplot(data=work_data, x="pcs/m", hue='region')
plt.tight_layout()
glue('region_ecdf', fig, display=False)
plt.close()

::::{grid} 1

:::{grid-item-card}

{glue}`region_scatter`

+++
<b>Figure 1:</b> Survey totals in number of shotgun shell waddings per linear meter of shoreline, November 2015 - December 2021.
:::

:::{grid-item-card}

{glue}`region_ecdf`

+++
<b>Figure 2:</b> The empirical cumulative distribution (ECDF) of the survey results.
:::
::::

In [6]:
# regional results
regions = work_data.groupby('region').quantity.describe()
regions.rename(columns={'count':'nsamples'}, inplace=True)
regions.index.name = None
caption = "The distribution of the total number of plastic shotgun wadding found per sample for each region"
rg = regions.style.set_table_styles(table_css_styles).format(precision=0).set_caption(caption)
glue('region_qty_dist', rg, display=False)

# regional results
regions = work_data.groupby('region')['pcs/m'].describe()
regions.rename(columns={'count':'nsamples'}, inplace=True)
regions.index.name = None
caption = "The distribution of the number of shotgun wadding found per meter of shoreline"
rg = regions.style.set_table_styles(table_css_styles).format(precision=2).set_caption(caption)
glue('region_med_dist', rg, display=False)

::::{grid} 1

:::{grid-item-card}
<b>Table 3:</b> The qauntile range of the number of plastic shotgun shells found per meter of shoreline by region.
^^
{glue}`region_med_dist`

:::
::::

### Municipal results

There are numerous municipalities on the lake, here we consider only the municipalities that were sampled.

In [7]:
# cities with more than two samples and none were found
two_or_more_samples = work_data.groupby('city').filter(lambda x: len(x) >= 2)
cumulative_sum = two_or_more_samples.groupby('city')['quantity'].sum()
total = len(cumulative_sum[cumulative_sum == 0])

# locations where none were found
by_city = work_data.groupby('city')['quantity'].sum()
cities_with_none = by_city[by_city == 0].index

print(f'Nummber of cities with two or more samples where none were found: {total}\n')
print(f'Cities where none were found, for all cities and number of samples: {cities_with_none.values}')

Nummber of cities with two or more samples where none were found: 0

Cities where none were found, for all cities and number of samples: ['Morges' 'Rolle']


__Municipal results in descending order__

In [8]:
# cities with the highest density

city_pcs_m = work_data.groupby('city', as_index=False).agg({'pcs/m':'mean', 'sample_id':'nunique'})
cpm = city_pcs_m.rename(columns={'sample_id':'nsamples'}, inplace=True)
cpm = city_pcs_m.sort_values(by='pcs/m', ascending=False).set_index('city', drop=True)
cpm.index.name = None
cp = cpm.style.set_table_styles(table_css_styles).format(precision=2)
glue('city_rankings', cp, display=True)

Unnamed: 0,pcs/m,nsamples
Saint-Gingolph,0.68,13
Allaman,0.14,3
Bourg-en-Lavaux,0.12,2
La Tour-de-Peilz,0.12,25
Montreux,0.11,53
Vevey,0.11,44
Saint-Sulpice (VD),0.06,15
Versoix,0.05,4
Genève,0.03,29
Préverenges,0.01,15


## Essential statistics: 2022

In [9]:
work_data2 = pd.read_csv('resources/data/macro_data_linearm.csv')
work_data2 = work_data2[work_data2.code == "G70"].copy()

beaches = pd.read_csv('resources/data/u_iq_ps_beaches.csv')
beaches.loc[beaches.location == 'Savonière', 'location'] = "Savonnière"
beaches.drop_duplicates('location', inplace=True)
city_map = beaches[['location', 'city']].copy()
city_map = city_map[city_map.city != 'Vidy']
# city_map.drop_duplicates(['location', 'city'], inplace=True)
city_map.set_index('location', drop=True, inplace=True)
work_data2['city'] = work_data2.Plage.apply(lambda x: city_map.loc[x]['city'])
new_columns = {"échantillon":"sample_id", 'quantité':'quantity', 'slug':'location'}
work_data2.rename(columns=new_columns, inplace=True)

In [10]:
quantiles = [0.05, 0.25, 0.5, 0.75, 0.95]

start_date, end_date = work_data2["date"].min(), work_data2['date'].max()
nsamples = len(work_data2)
ncities = work_data2.city.nunique()
nlocations = work_data2.location.nunique()

qty = work_data2.quantity.sum()
fail_rate = np.mean(work_data2.quantity > 0)
found_qtle = np.quantile(work_data2.quantity.values, quantiles)
density_qtle = np.quantile(work_data2['pcs/m'].values, quantiles)
dqtle = {quantiles[i]: int(x * 100) for i, x in enumerate(density_qtle)}
found_median, found_mean = work_data2.quantity.median(), work_data2.quantity.mean()

median_density, average_density = work_data2['pcs/m'].median(), work_data2['pcs/m'].mean()




print(f'Effective dates from {start_date} to {end_date}\n')
print(f'Total found: {qty}\n')
print(f'Number of samples: {nsamples}\n')
print(f'Number of municipalities: {ncities}\n')
print(f'Number of unique sample locations: {nlocations}\n')
print(f'Shotgun wadding found in {int(fail_rate * 100)}% of samples\n')
print(f'The average number found per sample {int(found_mean)}, the median found {found_median}\n')
print(f'The average number found per meter of shoreline {round(average_density, 2)}, the median found per meter of shoreline {round(median_density, 2)}\n')
print(f'Historical average for every 100 meters : {int(average_density*100)}, historical median {int(median_density*100)}\n')
print(f'The historical 95% range per 100 meter {dqtle}')

Effective dates from 2022-01-10 to 2022-12-17

Total found: 199

Number of samples: 98

Number of municipalities: 24

Number of unique sample locations: 25

Shotgun wadding found in 38% of samples

The average number found per sample 2, the median found 0.0

The average number found per meter of shoreline 0.02, the median found per meter of shoreline 0.0

Historical average for every 100 meters : 2, historical median 0

The historical 95% range per 100 meter {0.05: 0, 0.25: 0, 0.5: 0, 0.75: 2, 0.95: 9}


### Regional results 2022

In [11]:
# regional results
regions = work_data2.groupby('region')['pcs/m'].describe()
regions.rename(columns={'count':'nsamples'}, inplace=True)
regions.index.name = None
caption = "The distribution of the number of shotgun wadding found per meter of shoreline"
rg = regions.style.set_table_styles(table_css_styles).format(precision=2).set_caption(caption)
glue('region_med_dist_2', rg, display=True)

Unnamed: 0,nsamples,mean,std,min,25%,50%,75%,max
Grand lac,47.0,0.03,0.06,0.0,0.0,0.0,0.03,0.36
Haut lac,24.0,0.03,0.06,0.0,0.0,0.0,0.03,0.29
Petit lac,27.0,0.01,0.02,0.0,0.0,0.0,0.0,0.07


### Municipal results 2022

In [12]:
# cities with more than two samples and none were found
two_or_more_samples = work_data2.groupby('city').filter(lambda x: len(x) >= 2)
cumulative_sum = two_or_more_samples.groupby('city')['quantity'].sum()
total = len(cumulative_sum[cumulative_sum == 0])

# locations where none were found
by_city = work_data2.groupby('city')['quantity'].sum()
cities_with_none = by_city[by_city == 0].index

print(f'Nummber of cities with two or more samples where none were found: {total}\n')
print(f'Cities where none were found, for all cities and number of samples: {cities_with_none.values}')

Nummber of cities with two or more samples where none were found: 10

Cities where none were found, for all cities and number of samples: ['Anthy' 'Aubonne' 'Crans' 'Cully' 'Gland' 'Hermance' 'Meillerie' 'Rolle'
 'Tolochenaz' 'Tougues']


__Municipal results in descending order__

In [13]:
# cities with the highest density

city_pcs_m = work_data2.groupby('city', as_index=False).agg({'pcs/m':'mean', 'quantity': 'sum','sample_id':'nunique'})
cpm = city_pcs_m.rename(columns={'sample_id':'nsamples'}, inplace=True)
cpm = city_pcs_m.sort_values(by='pcs/m', ascending=False).set_index('city', drop=True)
cpm.index.name = None
cp = cpm.style.set_table_styles(table_css_styles).format(precision=2)
glue('city_rankings_2022', cp, display=True)

Unnamed: 0,pcs/m,quantity,nsamples
Amphion,0.17,62,4
Bouveret,0.1,46,4
Saint-Disdille,0.07,27,4
Lutry,0.05,14,4
Lugrin,0.03,9,4
Grangettes,0.03,8,4
Clarens,0.03,6,4
Savonière,0.02,5,4
Lausanne,0.02,6,4
Versoix,0.01,7,8


In [14]:
import datetime as dt
from IPython.display import Markdown as md
today = dt.datetime.now().date().strftime("%d/%m/%Y")
where = "Biel, CH"

my_block = f"""

This script updated {today} in {where}

\u2764\ufe0f __what you do everyday:__ *analyst at hammerdirt*
"""

md(my_block)



This script updated 22/03/2024 in Biel, CH

❤️ __what you do everyday:__ *analyst at hammerdirt*
