# data_d25a.ipynb
1. Time series: for GMSL and RSL at gauges, save fusion, high-end, low-end, and central projections for 2020–2100.
2. Year-2100: for RSL at gauges / near cities, save gauge/city info, high-end, low-end, and central projections for 2100.

Author: Benjamin S. Grandey.

In [1]:
import d25a
import datetime
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# Get start datetime
start_dt = datetime.datetime.now()

In [3]:
# Print package versions
print(d25a.get_watermark())

Python implementation: CPython
Python version       : 3.10.16
IPython version      : 8.31.0

matplotlib: 3.10.0
numpy     : 2.2.2
pandas    : 2.2.3
seaborn   : 0.13.2
xarray    : 2025.1.1

conda environment: d25a-rsl-fusion

Compiler    : Clang 18.1.8 
OS          : Darwin
Release     : 22.6.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [4]:
# Make output directories if they don't exist
for data_dir in (d25a.DATA_DIR, d25a.DATA_DIR / 'time_series', d25a.DATA_DIR / 'year_2100'):
    if not data_dir.exists():
        data_dir.mkdir()

## 1. Time series of GMSL and RSL at gauges

In [5]:
out_dir = d25a.DATA_DIR / 'time_series'

### 1a. Identify gauges with missing RSL data
These gauges will be dropped.

In [6]:
# Read fusion RSL data for one scenario
qfs_da = d25a.get_sl_qfs(workflow='fusion_1e+2e', gmsl_rsl_novlm='rsl', scenario='ssp585').copy()
# Identify locations with missing data
missing_gauges = qfs_da.where(qfs_da.isnull(), drop=True).locations.data
# Print some information about these gauges
print(f'{len(missing_gauges)} gauges have missing RSL data:')
for gauge_id in missing_gauges:
    gauge_info = d25a.get_gauge_info(gauge=gauge_id)
    print(f"{gauge_id}, {gauge_info['gauge_name']}, {gauge_info['country']}")

14 gauges have missing RSL data:
126, TROIS-RIVIERES, CANADA
137, PORT-SAINT-FRANCOIS, CANADA
144, BATISCAN, CANADA
173, QUEBEC, CANADA
192, NEUVILLE, CANADA
201, DESCHAILLONS, CANADA
387, GRONDINES, CANADA
951, PORTNEUF, CANADA
999, ST-FRANCOIS, CANADA
1005, CHAMPLAIN, CANADA
1219, TADOUSSAC, CANADA
1244, ST-JOSEPH-DE-LA-RIVE, CANADA
1392, PORT-ALFRED, CANADA
1798, BECANCOUR, CANADA


### 1b. Save fusion, high-end, and low-end projections

In [7]:
# Loop over GMSL, RSL, and RSL without VLM component
for gmsl_rsl_novlm in ('gmsl', 'rsl', 'novlm'):
    # Loop over two scenarios
    for scenario in ['ssp585', 'ssp126']:
        # Derive fusion projection
        qfs_da = d25a.get_sl_qfs(workflow='fusion_1e+2e', gmsl_rsl_novlm=gmsl_rsl_novlm, scenario=scenario).copy()
        # Drop gauges with missing RSL data
        if gmsl_rsl_novlm != 'gmsl':
            for gauge_id in missing_gauges:
                qfs_da.sel(locations=gauge_id).data[:] = np.nan  # this changes novlm data to also be NaN
            qfs_da = qfs_da.dropna(dim='locations')
        # Save fusion projection
        out_fn = out_dir / f'{gmsl_rsl_novlm}_fusion_{scenario}_d25a.nc'
        if gmsl_rsl_novlm == 'gmsl':
            print(f'Writing {out_fn.name}')
        else:
            print(f'Writing {out_fn.name} ({len(qfs_da.locations)} gauges)')
        qfs_da.to_netcdf(out_fn)
        # Derive and save high-end or low-end projection, depending on scenario
        if scenario == 'ssp585':
            high_da = qfs_da.sel(quantiles=0.95).squeeze()
            out_fn = out_dir / f'{gmsl_rsl_novlm}_high_d25a.nc'
            print(f'Writing {out_fn.name}')
            high_da.to_netcdf(out_fn)
        elif scenario == 'ssp126':
            low_da = qfs_da.sel(quantiles=0.05).squeeze()
            out_fn = out_dir / f'{gmsl_rsl_novlm}_low_d25a.nc'
            print(f'Writing {out_fn.name}')
            low_da.to_netcdf(out_fn)

Writing gmsl_fusion_ssp585_d25a.nc
Writing gmsl_high_d25a.nc
Writing gmsl_fusion_ssp126_d25a.nc
Writing gmsl_low_d25a.nc
Writing rsl_fusion_ssp585_d25a.nc (1016 gauges)
Writing rsl_high_d25a.nc
Writing rsl_fusion_ssp126_d25a.nc (1016 gauges)
Writing rsl_low_d25a.nc
Writing novlm_fusion_ssp585_d25a.nc (1016 gauges)
Writing novlm_high_d25a.nc
Writing novlm_fusion_ssp126_d25a.nc (1016 gauges)
Writing novlm_low_d25a.nc


### 1c. Save central projection
Defined as median of medium confidence mean under SSP2-4.5.

In [8]:
# Loop over GMSL/RSL and scenarios
for gmsl_rsl_novlm in ('gmsl', 'rsl', 'novlm'):
    # Derive medium confidence mean under SSP2-4.5
    qfs_da = d25a.get_sl_qfs(workflow='mean_1e+2e', gmsl_rsl_novlm=gmsl_rsl_novlm, scenario='ssp245').copy()
    # Drop locations with NaN
    if gmsl_rsl_novlm != 'gmsl':
        # Drop gauges with missing RSL data
        if gmsl_rsl_novlm != 'gmsl':
            for gauge_id in missing_gauges:
                qfs_da.sel(locations=gauge_id).data[:] = np.nan  # this changes novlm data to also be NaN
            qfs_da = qfs_da.dropna(dim='locations')
    # Derive and Save central projection
    central_da = qfs_da.sel(quantiles=0.5).squeeze()
    out_fn = out_dir / f'{gmsl_rsl_novlm}_central_d25a.nc'
    print(f'Writing {out_fn.name}')
    central_da.to_netcdf(out_fn)

Writing gmsl_central_d25a.nc
Writing rsl_central_d25a.nc
Writing novlm_central_d25a.nc


### 1d. Save gauge information

In [9]:
# Create DataFrame to hold gauge information
gauge_info_df = pd.DataFrame(columns=['gauge_id', 'gauge_name', 'lat', 'lon', 'country'])
# Loop over locations for which projections are available
qfs_da = d25a.get_sl_qfs().copy()
for location in qfs_da.locations.data:
    if location not in missing_gauges:
        # Get information about this gauge and save to DataFrame
        gauge_info = d25a.get_gauge_info(location)
        gauge_info_df.loc[len(gauge_info_df)] = gauge_info
# Rename columns
gauge_info_df = gauge_info_df.rename(columns={'lat': 'gauge_lat', 'lon': 'gauge_lon', 'country': 'gauge_country'})
# Index by gauge_id
gauge_info_df = gauge_info_df.set_index('gauge_id')
# Save to CSV
out_fn = out_dir / f'gauge_info_d25a.csv'
print(f'Writing {out_fn.name} ({len(gauge_info_df)} gauges)')
gauge_info_df.to_csv(out_fn)
gauge_info_df.head()

Writing gauge_info_d25a.csv (1016 gauges)


Unnamed: 0_level_0,gauge_name,gauge_lat,gauge_lon,gauge_country
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,BREST,48.38,-4.49,FRANCE
2,SWINOUJSCIE,53.92,14.23,POLAND
3,SHEERNESS,51.45,0.74,UNITED KINGDOM
5,HOLYHEAD,53.31,-4.62,UNITED KINGDOM
7,CUXHAVEN_2,53.87,8.72,GERMANY


## 2. Year-2100 projections at gauges / near cities, with corresponding gauge/city information

In [10]:
out_dir = d25a.DATA_DIR / 'year_2100'

### 2a. Gauges

In [11]:
# Copy gauge info
gauges_df = gauge_info_df.copy()
# Get low-end, central, and high-end rsl and novlm projections for 2100
for rsl_novlm in ('rsl', 'novlm'):  # loop over rsl and novlm
    for low_central_high in ('low', 'central', 'high'):  # loop over low-end, central, and high-end projections
        col = f'{rsl_novlm}_{low_central_high}'
        proj_da = xr.open_dataset(d25a.DATA_DIR / 'time_series' / f'{rsl_novlm}_{low_central_high}_d25a.nc'
                                  )['sea_level_change'].sel(years=2100)  # get year-2100 projections at gauges
        proj_da = proj_da.round(2)  # round to nearest cm
        for index, row_ser in gauges_df.iterrows():  # loop over gauges and save projection to gauges_df
            gauges_df.loc[index, col] = float(proj_da.sel(locations=index))
# Save to CSV
out_fn = out_dir / f'gauges_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(gauges_df)} gauges)')
gauges_df.to_csv(out_fn)
gauges_df.head(30)

Writing gauges_2100_d25a.csv (1016 gauges)


Unnamed: 0_level_0,gauge_name,gauge_lat,gauge_lon,gauge_country,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high
gauge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,BREST,48.38,-4.49,FRANCE,0.15,0.54,1.82,0.17,0.55,1.84
2,SWINOUJSCIE,53.92,14.23,POLAND,0.15,0.58,1.83,0.18,0.61,1.86
3,SHEERNESS,51.45,0.74,UNITED KINGDOM,0.17,0.61,1.87,0.14,0.58,1.83
5,HOLYHEAD,53.31,-4.62,UNITED KINGDOM,0.13,0.54,1.77,0.13,0.54,1.76
7,CUXHAVEN_2,53.87,8.72,GERMANY,0.2,0.64,1.89,0.13,0.57,1.82
8,WISMAR_2,53.9,11.46,GERMANY,0.05,0.64,1.93,0.05,0.63,1.93
9,MAASSLUIS,51.92,4.25,"NETHERLANDS, KINGDOM OF THE",0.16,0.6,1.86,0.13,0.57,1.84
10,SAN_FRANCISCO,37.81,-122.47,UNITED STATES,0.23,0.53,2.02,0.21,0.5,1.99
11,WARNEMUNDE_2,54.17,12.1,GERMANY,0.17,0.59,1.83,0.18,0.6,1.85
12,NEW_YORK,40.7,-74.01,UNITED STATES,0.4,0.81,2.21,0.26,0.67,2.07


### 2b. All cities
The cities are urban agglomerations with a population of at least 300,000 inhabitants in 2018, according to the UN's World Urbanization Prospects (2018).

Low-end, central, and high-end RSL projections for 2100 are saved if the distance to the nearest available tide gauge is ≤ 100km.

In [12]:
# Read World Urbanisation Prospects 2018 data
cities_df = pd.read_excel('data_in/wup18/WUP2018-F12-Cities_Over_300K.xls', header=16, usecols='A,C,E,G,H,X', index_col=None)
cities_df = cities_df.rename(columns={'Index': 'city_index', 'Country or area': 'city_country', 'Urban Agglomeration': 'city_name',
                                      'Latitude': 'city_lat', 'Longitude': 'city_lon', 2025: 'population_2025_1000s'})
cities_df = cities_df.set_index('city_index')
# Loop over these cities and get data for nearest tide gauge
for index, row_ser in cities_df.iterrows():
    lat0 = row_ser['city_lat']  # latitude of city
    lon0 = row_ser['city_lon']  # longitude of city
    temp_df = gauges_df.copy()  # copy tide gauge data (from above)
    temp_df['distance_km'] = 6378 * np.arccos(  # calculate great-circle distance between city and all available gauges
        np.sin(np.radians(lat0)) * np.sin(np.radians(temp_df['gauge_lat'])) +
        np.cos(np.radians(lat0)) * np.cos(np.radians(temp_df['gauge_lat'])) * np.cos(np.radians(temp_df['gauge_lon'] - lon0)))
    temp_df = temp_df.sort_values(by=['distance_km']).reset_index()  # sort by distance
    for col in ['gauge_id', 'gauge_name', 'gauge_lat', 'gauge_lon', 'distance_km']:
        cities_df.loc[index, col] = temp_df.loc[0, col]  # save gauge info to cities_df
    if temp_df.loc[0, 'distance_km'] < 100.5:  # if distance to nearest gauge is ≤ 100km...
        for rsl_novlm in ('rsl', 'novlm'):  # ...save projections for year-2100
            for low_central_high in ('low', 'central', 'high'):
                col = f'{rsl_novlm}_{low_central_high}'
                cities_df.loc[index, col] = temp_df.loc[0, col]
# Rounding
for col in ['city_lat', 'city_lon']:  # round to 2 d.p.
    cities_df[col] = cities_df[col].round(2)
for col in ['population_2025_1000s', 'gauge_id', 'distance_km']:  # round to nearest integer
    cities_df[col] = cities_df[col].round(0).astype(int)
# Save to CSV
out_fn = out_dir / f'cities_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(cities_df)} cities; {len(cities_df.dropna())} within 100km of gauge)')
cities_df.to_csv(out_fn)
cities_df.head(30)

Writing cities_2100_d25a.csv (1860 cities; 430 within 100km of gauge)


Unnamed: 0_level_0,city_country,city_name,city_lat,city_lon,population_2025_1000s,gauge_id,gauge_name,gauge_lat,gauge_lon,distance_km,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high
city_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,Afghanistan,Herat,34.35,62.2,753,204,KARACHI,24.81,66.97,1157,,,,,,
2,Afghanistan,Kabul,34.53,69.17,4877,204,KARACHI,24.81,66.97,1103,,,,,,
3,Afghanistan,Kandahar,31.61,65.71,577,204,KARACHI,24.81,66.97,767,,,,,,
4,Afghanistan,Mazar-e Sharif,36.71,67.11,682,204,KARACHI,24.81,66.97,1325,,,,,,
5,Albania,Tiranë (Tirana),41.33,19.82,536,1075,BAR,42.08,19.08,104,,,,,,
6,Algeria,Annaba,36.9,7.77,379,104,CAGLIARI,39.2,9.17,284,,,,,,
7,Algeria,Batna,35.56,6.17,358,104,CAGLIARI,39.2,9.17,484,,,,,,
8,Algeria,Blida,36.48,2.83,536,1892,PALMA_DE_MALLORCA,39.55,2.64,342,,,,,,
9,Algeria,El Djazaïr (Algiers),36.75,3.04,3004,1892,PALMA_DE_MALLORCA,39.55,2.64,313,,,,,,
10,Algeria,El Djelfa,34.67,3.26,607,960,ALICANTE,38.34,-0.48,528,,,,,,


### 2c. Megacities
Select subset of cities with population ≥ 10 million in 2025.

In [13]:
# Select cities with a population ≥ 10,000 thousand
mega_df = cities_df.loc[cities_df['population_2025_1000s'] >= 10000].sort_values(by='population_2025_1000s', ascending=False)
# Save to CSV
out_fn = out_dir / f'megacities_2100_d25a.csv'
print(f'Writing {out_fn.name} ({len(mega_df)} megacities; {len(mega_df.dropna())} within 100km of gauge)')
mega_df.to_csv(out_fn)
mega_df

Writing megacities_2100_d25a.csv (37 megacities; 16 within 100km of gauge)


Unnamed: 0_level_0,city_country,city_name,city_lat,city_lon,population_2025_1000s,gauge_id,gauge_name,gauge_lat,gauge_lon,distance_km,rsl_low,rsl_central,rsl_high,novlm_low,novlm_central,novlm_high
city_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1088,Japan,Tokyo,35.69,139.69,37036,1545,TOKYO_III,35.65,139.77,8,0.06,0.45,2.07,0.25,0.62,2.25
808,India,Delhi,28.67,77.22,34666,420,BHAUNAGAR_I,21.8,72.3,910,,,,,,
449,China,Shanghai,31.22,121.46,30482,979,LUSI,32.13,121.62,102,,,,,,
61,Bangladesh,Dhaka,23.71,90.41,24653,1496,CHARCHANGA,22.22,91.05,178,,,,,,
685,Egypt,Al-Qahirah (Cairo),30.04,31.24,23074,253,PORT_SAID,31.25,32.3,169,,,,,,
138,Brazil,São Paulo,-23.55,-46.64,22990,726,CANANEIA,-25.02,-47.93,210,,,,,,
1147,Mexico,Ciudad de México (Mexico City),19.43,-99.14,22752,918,TUXPAN,21.0,-97.33,258,,,,,,
204,China,Beijing,39.91,116.4,22596,1403,TANGGU,39.0,117.72,152,,,,,,
882,India,Mumbai (Bombay),19.07,72.88,22089,43,MUMBAI,18.92,72.83,18,0.14,0.46,1.94,0.2,0.51,1.99
1758,United States of America,New York-Newark,40.72,-74.0,19154,12,NEW_YORK,40.7,-74.01,2,0.4,0.81,2.21,0.26,0.67,2.07


In [14]:
# Get end datetime
end_dt = datetime.datetime.now()
# Calculate run timedelta
run_td = end_dt - start_dt
# Print timing information
print(f"Start:     {start_dt.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"End:       {end_dt.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Wall time: {run_td.seconds} s")

Start:     2025-02-18 16:19:25
End:       2025-02-18 16:20:00
Wall time: 34 s
