# Playgrounds Labels — Neighborhoods and Districts
Self-contained notebook to compute playground density labels and export CSVs:
- neighborhoods: [district, neighborhood, hashtags, source]
- districts: [district, hashtags, source]

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import folium

# Config (resolve project root so paths work from this notebook)
ROOT = Path.cwd()
if not (ROOT/'data').exists():
    ROOT = ROOT.parent
if not (ROOT/'data').exists():
    raise FileNotFoundError(f"Couldn't locate 'data' directory from {Path.cwd()}")
RAW_DIR = ROOT/'data'/'raw'
NEI_PATH = ROOT/'data'/'neighborhoods.geojson'
OUT_DIR = Path('outputs'); OUT_DIR.mkdir(parents=True, exist_ok=True)
PLAY_CSV = RAW_DIR/'playgrounds.csv'

def ensure_wgs84(gdf):
    if gdf.crs is None: return gdf.set_crs(4326)
    return gdf.to_crs(4326) if gdf.crs.to_epsg()!=4326 else gdf
def compute_area_km2(gdf):
    gutm = ensure_wgs84(gdf).to_crs(25833)
    gdf['area_km2'] = (gutm.geometry.area/1e6).values
    gdf['area_eff_km2'] = gdf['area_km2'].clip(lower=0.20)
    return gdf
def playgrounds_tag(label):
    lab = str(label).strip().lower()
    mapping = {
        'below average': '#low_playground_density',
        'average': '#average_playground_density',
        'above average': '#high_playground_density',
    }
    return mapping.get(lab, '#average_playground_density')

GDF = compute_area_km2(ensure_wgs84(gpd.read_file(NEI_PATH)))
df = pd.read_csv(PLAY_CSV)
mask = df['green_area_type'].astype(str).str.lower().str.contains('spielplatz', na=False)
cnt = df.loc[mask].groupby(['district_id','neighborhood'], dropna=False).size().rename('n_playgrounds').reset_index()
nei = GDF[['district_id','district','neighborhood','area_eff_km2']].merge(cnt, on=['district_id','neighborhood'], how='left').fillna({'n_playgrounds':0})
nei['playgrounds_per_km2'] = (nei['n_playgrounds']/nei['area_eff_km2']).replace([np.inf,-np.inf], np.nan)
med = np.nanmedian(nei['playgrounds_per_km2']); l,u = med-0.30, med+0.30
nei['playgrounds_density_label'] = nei['playgrounds_per_km2'].apply(lambda v: 'below average' if (not np.isnan(v) and v<l) else ('above average' if (not np.isnan(v) and v>u) else 'average'))
nei_out = pd.DataFrame({
    'district': nei['district'],
    'neighborhood': nei['neighborhood'],
    'hashtags': [playgrounds_tag(v) for v in nei['playgrounds_density_label']],
    'source': 'playgrounds.ipynb:rule-based'
})
nei_out.to_csv(OUT_DIR/'neighborhood_labels_playgrounds.csv', index=False)
# Append to combined neighborhoods long table (idempotent)
nei_long_path = OUT_DIR/'berlin_neighborhoods_labels_long.csv'
if nei_long_path.exists():
    _old = pd.read_csv(nei_long_path)
    _new = pd.concat([_old, nei_out], ignore_index=True)
else:
    _new = nei_out.copy()
_new = _new.drop_duplicates(subset=['district','neighborhood','hashtags','source'])
_new.to_csv(nei_long_path, index=False)

dist = nei.groupby(['district'], dropna=False)[['n_playgrounds','area_eff_km2']].sum().reset_index()
dist['playgrounds_per_km2'] = (dist['n_playgrounds']/dist['area_eff_km2']).replace([np.inf,-np.inf], np.nan)
medd = np.nanmedian(dist['playgrounds_per_km2']); ld,ud = medd-0.30, medd+0.30
dist['playgrounds_density_label'] = dist['playgrounds_per_km2'].apply(lambda v: 'below average' if (not np.isnan(v) and v<ld) else ('above average' if (not np.isnan(v) and v>ud) else 'average'))
dist_out = pd.DataFrame({
    'district': dist['district'],
    'hashtags': [playgrounds_tag(v) for v in dist['playgrounds_density_label']],
    'source': 'playgrounds.ipynb:rule-based'
})
dist_out.to_csv(OUT_DIR/'district_labels_playgrounds.csv', index=False)
# Append to combined districts long table (idempotent)
dist_long_path = OUT_DIR/'berlin_districts_labels_long.csv'
if dist_long_path.exists():
    _old = pd.read_csv(dist_long_path)
    _new = pd.concat([_old, dist_out], ignore_index=True)
else:
    _new = dist_out.copy()
_new = _new.drop_duplicates(subset=['district','hashtags','source'])
_new.to_csv(dist_long_path, index=False)

# Update/append to wide tables for Streamlit (idempotent)
# Neighborhoods wide
nei_wide_cols = ['district','neighborhood','n_playgrounds','playgrounds_per_km2','playgrounds_density_label']
nei_wide = nei[nei_wide_cols].copy()
nei_wide_path = OUT_DIR/'berlin_neighborhoods_labels_wide.csv'
if nei_wide_path.exists():
    _w = pd.read_csv(nei_wide_path)
    keys = ['district','neighborhood']
    _w = _w.merge(nei_wide, on=keys, how='outer', suffixes=('', '_new'))
    for c in ['n_playgrounds','playgrounds_per_km2','playgrounds_density_label']:
        if c+'_new' in _w.columns:
            _w[c] = _w[c].combine_first(_w[c+'_new'])
            _w = _w.drop(columns=[c+'_new'])
else:
    _w = nei_wide
_w.to_csv(nei_wide_path, index=False)

# Districts wide
dist_wide_cols = ['district','n_playgrounds','playgrounds_per_km2','playgrounds_density_label']
dist_wide = dist[dist_wide_cols].copy()
dist_wide_path = OUT_DIR/'berlin_districts_labels_wide.csv'
if dist_wide_path.exists():
    _wd = pd.read_csv(dist_wide_path)
    _wd = _wd.merge(dist_wide, on=['district'], how='outer', suffixes=('', '_new'))
    for c in ['n_playgrounds','playgrounds_per_km2','playgrounds_density_label']:
        if c+'_new' in _wd.columns:
            _wd[c] = _wd[c].combine_first(_wd[c+'_new'])
            _wd = _wd.drop(columns=[c+'_new'])
else:
    _wd = dist_wide
_wd.to_csv(dist_wide_path, index=False)

CAT = {'above average':'#1a9641','average':'#a6d96a','below average':'#fee08b'}
def style_cat(f, col):
    v = f['properties'].get(col); return {'fillColor': CAT.get(str(v).lower() if isinstance(v,str) else v, '#cccccc'), 'color':'#555','weight':0.5, 'fillOpacity':0.75}
m_nei = folium.Map(location=[52.52,13.405], zoom_start=10, tiles='cartodbpositron')
g_nei = GDF.merge(nei[['district','neighborhood','playgrounds_density_label']], on=['district','neighborhood'])
folium.GeoJson(g_nei, style_function=lambda f, c='playgrounds_density_label': style_cat(f,c), tooltip=folium.GeoJsonTooltip(fields=['neighborhood','district','playgrounds_density_label'])).add_to(m_nei)
m_nei.save(str(OUT_DIR/'playgrounds_map_neighborhoods.html'))
m_dist = folium.Map(location=[52.52,13.405], zoom_start=10, tiles='cartodbpositron')
dist_polys = GDF.dissolve(by=['district'], as_index=False)
g_dist = dist_polys.merge(dist[['district','playgrounds_density_label']], on='district')
folium.GeoJson(g_dist, style_function=lambda f, c='playgrounds_density_label': style_cat(f,c), tooltip=folium.GeoJsonTooltip(fields=['district','playgrounds_density_label'])).add_to(m_dist)
m_dist.save(str(OUT_DIR/'playgrounds_map_districts.html'))
