# data_d25a.ipynb
1. Time series: for GMSL and RSL at gauges, save fusion, high-end, low-end, and central projections for 2020–2100.
2. Year-2100: for RSL at gauges / near cities, save gauge/city info, high-end, low-end, and central projections for 2100.

Author: Benjamin S. Grandey.

In [1]:
import d25a
import datetime
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# Get start datetime
start_dt = datetime.datetime.now()

In [3]:
# Print package versions
print(d25a.get_watermark())

Python implementation: CPython
Python version       : 3.10.16
IPython version      : 8.31.0

matplotlib: 3.10.0
numpy     : 2.2.2
pandas    : 2.2.3
seaborn   : 0.13.2
xarray    : 2025.1.1

conda environment: d25a-rsl-fusion

Compiler    : Clang 18.1.8 
OS          : Darwin
Release     : 22.6.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [4]:
# Make output directories if they don't exist
for data_dir in (d25a.DATA_DIR, d25a.DATA_DIR / 'time_series', d25a.DATA_DIR / 'year_2100'):
    if not data_dir.exists():
        data_dir.mkdir()

## 1. Time series of GMSL and RSL at gauges

In [5]:
out_dir = d25a.DATA_DIR / 'time_series'

### 1a. Identify gauges with missing RSL data
These gauges will be dropped.

Note: gauges may now be dropped at earlier stage, by get_coastal_loc_df().

In [6]:
# Read fusion RSL data for one scenario
qfs_da = d25a.get_sl_qfs(workflow='fusion_1e+2e', slr_str='rsl', scenario='ssp585').copy()
# Identify locations with missing data
missing_gauges = qfs_da.where(qfs_da.isnull(), drop=True).locations.data
# Print some information about these gauges
print(f'{len(missing_gauges)} gauges have missing RSL data:')
for gauge_id in missing_gauges:
    gauge_info = d25a.get_gauge_info(gauge=gauge_id)
    print(f"{gauge_id}, {gauge_info['gauge_name']}, {gauge_info['country']}")

0 gauges have missing RSL data:


### 1b. Save fusion, high-end, and low-end projections

In [7]:
# Loop over GMSL, RSL, and RSL without VLM component
for slr_str in ('gmsl', 'rsl', 'novlm'):
    # Loop over two scenarios
    for scenario in ['ssp585', 'ssp126']:
        # Derive fusion projection
        qfs_da = d25a.get_sl_qfs(workflow='fusion_1e+2e', slr_str=slr_str, scenario=scenario).copy()
        # Drop gauges with missing RSL data
        if slr_str != 'gmsl':
            for gauge_id in missing_gauges:
                qfs_da.sel(locations=gauge_id).data[:] = np.nan  # this changes novlm data to also be NaN
            qfs_da = qfs_da.dropna(dim='locations')
        # Save fusion projection
        out_fn = out_dir / f'{slr_str}_fusion_{scenario}_d25a.nc'
        if slr_str == 'gmsl':
            print(f'Writing {out_fn.name}')
        else:
            print(f'Writing {out_fn.name} ({len(qfs_da.locations)} gauges)')
        qfs_da.to_netcdf(out_fn)
        # Derive and save high-end or low-end projection, depending on scenario
        if scenario == 'ssp585':
            high_da = qfs_da.sel(quantiles=0.95).squeeze()
            out_fn = out_dir / f'{slr_str}_high_d25a.nc'
            print(f'Writing {out_fn.name}')
            high_da.to_netcdf(out_fn)
        elif scenario == 'ssp126':
            low_da = qfs_da.sel(quantiles=0.05).squeeze()
            out_fn = out_dir / f'{slr_str}_low_d25a.nc'
            print(f'Writing {out_fn.name}')
            low_da.to_netcdf(out_fn)

Writing gmsl_fusion_ssp585_d25a.nc
Writing gmsl_high_d25a.nc
Writing gmsl_fusion_ssp126_d25a.nc
Writing gmsl_low_d25a.nc
Writing rsl_fusion_ssp585_d25a.nc (1843 gauges)
Writing rsl_high_d25a.nc
Writing rsl_fusion_ssp126_d25a.nc (1843 gauges)
Writing rsl_low_d25a.nc
Writing novlm_fusion_ssp585_d25a.nc (1843 gauges)
Writing novlm_high_d25a.nc
Writing novlm_fusion_ssp126_d25a.nc (1843 gauges)
Writing novlm_low_d25a.nc


### 1c. Save central projection
Defined as median of medium confidence mean under SSP2-4.5.

In [8]:
# Loop over GMSL/RSL and scenarios
for slr_str in ('gmsl', 'rsl', 'novlm'):
    # Derive medium confidence mean under SSP2-4.5
    qfs_da = d25a.get_sl_qfs(workflow='mean_1e+2e', slr_str=slr_str, scenario='ssp245').copy()
    # Drop locations with NaN
    if slr_str != 'gmsl':
        # Drop gauges with missing RSL data
        if slr_str != 'gmsl':
            for gauge_id in missing_gauges:
                qfs_da.sel(locations=gauge_id).data[:] = np.nan  # this changes novlm data to also be NaN
            qfs_da = qfs_da.dropna(dim='locations')
    # Derive and Save central projection
    central_da = qfs_da.sel(quantiles=0.5).squeeze()
    out_fn = out_dir / f'{slr_str}_central_d25a.nc'
    print(f'Writing {out_fn.name}')
    central_da.to_netcdf(out_fn)

Writing gmsl_central_d25a.nc
Writing rsl_central_d25a.nc
Writing novlm_central_d25a.nc


### 1d. Save gauge information

In [9]:
# Create DataFrame to hold gauge information
gauge_info_df = pd.DataFrame(columns=['gauge_id', 'gauge_name', 'country', 'lat', 'lon'])
# Loop over locations for which projections are available
qfs_da = d25a.get_sl_qfs().copy()
for location in qfs_da.locations.data:
    if location not in missing_gauges:
        # Get information about this gauge and save to DataFrame
        gauge_info = d25a.get_gauge_info(location)
        gauge_info_df.loc[len(gauge_info_df)] = gauge_info
# Rename columns
gauge_info_df = gauge_info_df.rename(columns={'country': 'gauge_country', 'lat': 'gauge_lat', 'lon': 'gauge_lon'})
# Index by gauge_id
gauge_info_df = gauge_info_df.set_index('gauge_id')
# Save to CSV
out_fn = out_dir / f'gauge_info_d25a.csv'
print(f'Writing {out_fn.name} ({len(gauge_info_df)} gauges)')
gauge_info_df.to_csv(out_fn)
gauge_info_df.head()

Writing gauge_info_d25a.csv (1843 gauges)


Unnamed: 0_level_0,gauge_name,gauge_country,gauge_lat,gauge_lon
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,BREST,FRANCE,48.38,-4.49
2,SWINOUJSCIE,POLAND,53.92,14.23
3,SHEERNESS,UNITED KINGDOM,51.45,0.74
5,HOLYHEAD,UNITED KINGDOM,53.31,-4.62
7,CUXHAVEN_2,GERMANY,53.87,8.72


## 2. Year-2100 projections at gauges / near cities, with corresponding gauge/city information

In [10]:
out_dir = d25a.DATA_DIR / 'year_2100'

### 2a. Gauges

In [11]:
# Copy gauge info
gauges_df = gauge_info_df.copy()
# Get low-end, central, and high-end rsl and novlm projections for 2100
for rsl_novlm in ('rsl', 'novlm'):  # loop over rsl and novlm
    for low_central_high in ('low', 'central', 'high'):  # loop over low-end, central, and high-end projections
        col = f'{rsl_novlm}_{low_central_high}'
        proj_da = xr.open_dataset(d25a.DATA_DIR / 'time_series' / f'{rsl_novlm}_{low_central_high}_d25a.nc'
                                  )['sea_level_change'].sel(years=2100)  # get year-2100 projections at gauges
        proj_da = proj_da.round(2)  # round to nearest cm
        for index, row_ser in gauges_df.iterrows():  # loop over gauges and save projection to gauges_df
            gauges_df.loc[index, col] = float(proj_da.sel(locations=index))
# Save to CSV
out_fn = out_dir / f'gauges_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(gauges_df)} gauges)')
gauges_df.to_csv(out_fn)
gauges_df.head(30)

Writing gauges_2100_d25a.csv (1843 gauges)


Unnamed: 0_level_0,gauge_name,gauge_country,gauge_lat,gauge_lon,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,BREST,FRANCE,48.38,-4.49,0.15,0.54,1.82,0.17,0.55,1.84
2,SWINOUJSCIE,POLAND,53.92,14.23,0.15,0.58,1.83,0.18,0.61,1.86
3,SHEERNESS,UNITED KINGDOM,51.45,0.74,0.17,0.61,1.87,0.14,0.58,1.83
5,HOLYHEAD,UNITED KINGDOM,53.31,-4.62,0.13,0.54,1.77,0.13,0.54,1.76
7,CUXHAVEN_2,GERMANY,53.87,8.72,0.2,0.64,1.89,0.13,0.57,1.82
8,WISMAR_2,GERMANY,53.9,11.46,0.05,0.64,1.93,0.05,0.63,1.93
9,MAASSLUIS,"NETHERLANDS, KINGDOM OF THE",51.92,4.25,0.16,0.6,1.86,0.13,0.57,1.84
10,SAN_FRANCISCO,UNITED STATES,37.81,-122.47,0.23,0.53,2.02,0.21,0.5,1.99
11,WARNEMUNDE_2,GERMANY,54.17,12.1,0.17,0.59,1.83,0.18,0.6,1.85
12,NEW_YORK,UNITED STATES,40.7,-74.01,0.4,0.81,2.21,0.26,0.67,2.07


### 2b. All cities
The cities are urban agglomerations with a population of at least 300,000 inhabitants in 2018, according to the UN's World Urbanization Prospects (2018).

Low-end, central, and high-end RSL projections for 2100 are saved if the distance to the nearest available tide gauge is ≤ 100km.

In [12]:
# Read World Urbanisation Prospects 2018 data
cities_df = pd.read_excel('data_in/wup18/WUP2018-F12-Cities_Over_300K.xls', header=16, usecols='A,C,E,G,H,X', index_col=None)
cities_df = cities_df.rename(columns={'Index': 'city_index', 'Country or area': 'city_country', 'Urban Agglomeration': 'city_name',
                                      'Latitude': 'city_lat', 'Longitude': 'city_lon', 2025: 'population_2025_1000s'})
# Reorder columns
cities_df = cities_df.set_index('city_index')
cities_df = cities_df[['city_name', 'city_country', 'city_lat', 'city_lon', 'population_2025_1000s']]
# Loop over these cities and get data for nearest tide gauge
for index, row_ser in cities_df.iterrows():
    lat0 = row_ser['city_lat']  # latitude of city
    lon0 = row_ser['city_lon']  # longitude of city
    temp_df = gauges_df.copy()  # copy tide gauge data (from above)
    temp_df['distance_km'] = 6378 * np.arccos(  # calculate great-circle distance between city and all available gauges
        np.sin(np.radians(lat0)) * np.sin(np.radians(temp_df['gauge_lat'])) +
        np.cos(np.radians(lat0)) * np.cos(np.radians(temp_df['gauge_lat'])) * np.cos(np.radians(temp_df['gauge_lon'] - lon0)))
    temp_df = temp_df.sort_values(by=['distance_km']).reset_index()  # sort by distance
    for col in ['gauge_id', 'gauge_name', 'gauge_lat', 'gauge_lon', 'distance_km']:
        cities_df.loc[index, col] = temp_df.loc[0, col]  # save gauge info to cities_df
    if temp_df.loc[0, 'distance_km'] < 100.5:  # if distance to nearest gauge is ≤ 100km...
        for rsl_novlm in ('rsl', 'novlm'):  # ...save projections for year-2100
            for low_central_high in ('low', 'central', 'high'):
                col = f'{rsl_novlm}_{low_central_high}'
                cities_df.loc[index, col] = temp_df.loc[0, col]
# Rounding
for col in ['city_lat', 'city_lon']:  # round to 2 d.p.
    cities_df[col] = cities_df[col].round(2)
for col in ['population_2025_1000s', 'gauge_id', 'distance_km']:  # round to nearest integer
    cities_df[col] = cities_df[col].round(0).astype(int)
# Save to CSV
out_fn = out_dir / f'cities_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(cities_df)} cities; {len(cities_df.dropna())} within 100km of gauge)')
cities_df.to_csv(out_fn)
cities_df.head(30)

Writing cities_2100_d25a.csv (1860 cities; 1193 within 100km of gauge)


Unnamed: 0_level_0,city_name,city_country,city_lat,city_lon,population_2025_1000s,gauge_id,gauge_name,gauge_lat,gauge_lon,distance_km,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high
city_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,Herat,Afghanistan,34.35,62.2,753,1005300540,grid_37.0_54.0,37.0,54.0,798,,,,,,
2,Kabul,Afghanistan,34.53,69.17,4877,1006200680,grid_28.0_68.0,28.0,68.0,735,,,,,,
3,Kandahar,Afghanistan,31.61,65.71,577,1006200680,grid_28.0_68.0,28.0,68.0,459,,,,,,
4,Mazar-e Sharif,Afghanistan,36.71,67.11,682,1006200680,grid_28.0_68.0,28.0,68.0,973,,,,,,
5,Tiranë (Tirana),Albania,41.33,19.82,536,1004900200,grid_41.0_20.0,41.0,20.0,39,0.09,0.52,1.83,0.17,0.53,1.81
6,Annaba,Algeria,36.9,7.77,379,1005300080,grid_37.0_8.0,37.0,8.0,24,0.12,0.56,1.9,0.2,0.56,1.88
7,Batna,Algeria,35.56,6.17,358,1005400060,grid_36.0_6.0,36.0,6.0,52,0.11,0.54,1.88,0.2,0.56,1.87
8,Blida,Algeria,36.48,2.83,536,1005400030,grid_36.0_3.0,36.0,3.0,56,0.13,0.55,1.89,0.22,0.56,1.87
9,El Djazaïr (Algiers),Algeria,36.75,3.04,3004,1005300030,grid_37.0_3.0,37.0,3.0,28,0.15,0.57,1.92,0.22,0.56,1.88
10,El Djelfa,Algeria,34.67,3.26,607,1005500030,grid_35.0_3.0,35.0,3.0,44,0.13,0.54,1.88,0.22,0.56,1.87


### 2c. Megacities
Select subset of cities with population ≥ 10 million in 2025.

In [13]:
# Select cities with a population ≥ 10,000 thousand
mega_df = cities_df.loc[cities_df['population_2025_1000s'] >= 10000].sort_values(by='population_2025_1000s', ascending=False)
# Identify shorter name for megacities with a long name
for index, row_ser in mega_df.iterrows():
    short_name = row_ser['city_name']  # use full name by default
    if short_name.split(' (')[0] in ['Mumbai', 'Kolkata', 'Chennai']:  # cases to use name outside parentheses
        short_name = short_name.split(' (')[0]
    elif '(' in short_name:  # cases to use name within parentheses
        short_name = short_name.split(' (')[-1].rstrip(')')
    elif ',' in short_name:  # cases to use name before comma
        short_name = short_name.split(',')[0]
    elif '-' in short_name:  # cases to use name before hyphen
        short_name = short_name.split('-')[0]
    mega_df.loc[index, 'city_short'] = short_name
# Save to CSV
out_fn = out_dir / f'megacities_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(mega_df)} megacities; {len(mega_df.dropna())} within 100km of gauge)')
mega_df.to_csv(out_fn)
mega_df

Writing megacities_2100_d25a.csv (37 megacities; 31 within 100km of gauge)


Unnamed: 0_level_0,city_name,city_country,city_lat,city_lon,population_2025_1000s,gauge_id,gauge_name,gauge_lat,gauge_lon,distance_km,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high,city_short
city_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1088,Tokyo,Japan,35.69,139.69,37036,1545,TOKYO_III,35.65,139.77,8,0.06,0.45,2.07,0.25,0.62,2.25,Tokyo
808,Delhi,India,28.67,77.22,34666,1006700730,grid_23.0_73.0,23.0,73.0,759,,,,,,,Delhi
449,Shanghai,China,31.22,121.46,30482,1005901210,grid_31.0_121.0,31.0,121.0,50,-0.08,0.54,2.15,0.24,0.58,2.1,Shanghai
61,Dhaka,Bangladesh,23.71,90.41,24653,1006600900,grid_24.0_90.0,24.0,90.0,53,-0.13,0.48,1.98,0.17,0.5,1.92,Dhaka
685,Al-Qahirah (Cairo),Egypt,30.04,31.24,23074,1006000310,grid_30.0_31.0,30.0,31.0,23,0.1,0.54,1.91,0.19,0.56,1.91,Cairo
138,São Paulo,Brazil,-23.55,-46.64,22990,1011403130,grid_-24.0_313.0,-24.0,-47.0,63,0.3,0.66,2.09,0.24,0.56,1.99,São Paulo
1147,Ciudad de México (Mexico City),Mexico,19.43,-99.14,22752,1007102610,grid_19.0_261.0,19.0,-99.0,50,0.4,0.79,2.28,0.25,0.56,2.04,Mexico City
204,Beijing,China,39.91,116.4,22596,1005001160,grid_40.0_116.0,40.0,116.0,35,-0.14,0.49,2.04,0.17,0.52,1.97,Beijing
882,Mumbai (Bombay),India,19.07,72.88,22089,1007100730,grid_19.0_73.0,19.0,73.0,15,-0.13,0.49,2.05,0.19,0.51,1.98,Mumbai
1758,New York-Newark,United States of America,40.72,-74.0,19154,12,NEW_YORK,40.7,-74.01,2,0.4,0.81,2.21,0.26,0.67,2.07,New York


In [14]:
# Get end datetime
end_dt = datetime.datetime.now()
# Calculate run timedelta
run_td = end_dt - start_dt
# Print timing information
print(f"Start:     {start_dt.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"End:       {end_dt.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Wall time: {run_td.seconds} s")

Start:     2025-04-25 17:40:34
End:       2025-04-25 17:42:05
Wall time: 90 s
