In [2]:
import pandas as pd
import altair as alt

In [3]:
from ecostyles import EcoStyles
# Create styles instance
styles = EcoStyles()
# Register and enable a theme
styles.register_and_enable_theme(theme_name="article")  # or "article"

In [4]:
# Import data set
bath_sites = pd.read_csv('/Users/sambickel-barlow/Desktop/Github/RADataHub/ChartOfTheDay/environment/bathing water/classifications.csv')

In [5]:
# Only keep 2006 directive classifications (rather than 1976 directive) and actual assessments (not projected). Remove closed bath sites (there are only 2)
bath_sites_sub = bath_sites[(bath_sites['regime'] == '2006 directive') & (bath_sites['classificationType'] == 'actual assessment')]
bath_sites_sub = bath_sites_sub[bath_sites_sub['classificationLabel'] != 'Closed']

In [6]:
# Get share of bathing waters at each classification level for each year
bath_sites_sub_gb = bath_sites_sub.groupby(['classificationLabel','year'])['EUBWID'].count().reset_index()
bath_sites_sub_y = bath_sites_sub.groupby(['year'])['EUBWID'].count().reset_index()
bath_sites_sub_gb= bath_sites_sub_gb.merge(bath_sites_sub_y, on='year')
bath_sites_sub_gb['share'] = bath_sites_sub_gb['EUBWID_x'] / bath_sites_sub_gb['EUBWID_y']

In [7]:
# Limit data to since 2019
bath_sites_sub_gb_limit = bath_sites_sub_gb[bath_sites_sub_gb['year'] >= 2019]

In [8]:
# Chart mappings
classification_order = {
    'Poor': 0,
    'Sufficient': 1,
    'Good': 2,
    'Excellent': 3
}

bath_sites_sub_gb_limit['classification_order'] = bath_sites_sub_gb_limit['classificationLabel'].map(classification_order)

color_mapping = {
    'Excellent': '#179fdb',
    'Good': '#36b7b4',
    'Sufficient': '#f4c245',
    'Poor': '#e6224b'
}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bath_sites_sub_gb_limit['classification_order'] = bath_sites_sub_gb_limit['classificationLabel'].map(classification_order)


In [48]:
# Create chart
bath_water_chart = alt.Chart(bath_sites_sub_gb_limit).encode(
    x=alt.X('year', axis=alt.Axis(format='0.0f', tickCount=6)),
    y=alt.Y('share', axis=alt.Axis(format='0.0%', titleFontSize=14), title='UK Bathing Water Quality by DEFRA Classification Since 2019'),
    color=alt.Color('classificationLabel', sort=['Excellent','Good','Sufficient','Poor'], 
                    scale=alt.Scale(
                        domain=list(color_mapping.keys()),
                        range=list(color_mapping.values())
                    ),
                    legend=alt.Legend(title='Classification')),
    order=alt.Order('classification_order:Q')
).mark_area()

bath_water_chart

In [42]:
# Get the latest year in the dataset
latest_year = bath_sites_sub_gb_limit['year'].max()

# Filter data to the latest year and compute cumulative positions
label_data = bath_sites_sub_gb_limit[bath_sites_sub_gb_limit['year'] == latest_year].copy()

# Sort by classification_order (bottom to top in stack)
label_data = label_data.sort_values('classification_order')

# Compute cumulative lower and upper bounds
label_data['y0'] = label_data['share'].cumsum() - label_data['share']
label_data['y1'] = label_data['share'].cumsum()
label_data['y_mid'] = (label_data['y0'] + label_data['y1']) / 2  # midpoint for label

# Base stacked area chart
area_chart = alt.Chart(bath_sites_sub_gb_limit).mark_area().encode(
    x=alt.X('year:Q', axis=alt.Axis(format='0.0f', tickCount=6)),
    y=alt.Y('share:Q', axis=alt.Axis(format='0.0%', titleFontSize=17),
            title='UK Bathing Water Quality by DEFRA Classification Since 2019'),
    color=alt.Color('classificationLabel:N',
                    sort=['Excellent', 'Good', 'Sufficient', 'Poor'],
                    scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
                    legend=None),
    order=alt.Order('classification_order:Q')
)

# Text labels with proper vertical placement
labels = alt.Chart(label_data).mark_text(
    align='left',
    baseline='middle',
    dx=5,
    fontSize=15
).encode(
    x=alt.X('year:Q'),
    y=alt.Y('y_mid:Q'),
    text='classificationLabel:N',
    color=alt.Color('classificationLabel:N',
                    scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())))
)

# Combine area and labels
final_chart = (area_chart + labels).properties(
    width=450,
    height=350
).configure_view(
    stroke=None  # removes outer border box (optional)
)

final_chart

In [43]:
# Use helper methods
styles.add_source(final_chart, 'DEFRA Bathing water data')

In [47]:
# Save charts
final_chart.save('bath_water_chart2.png', scale_factor=2)
final_chart.save('bath_water_chart2.json', scale_factor=2)