In [1]:
import rasterio
import rioxarray
import dask
import xarray as xr
xr.set_options(use_flox=True)
import json
import geopandas as gpd

from tqdm import tqdm

In [2]:
with open("solar_assets.geojson", "r") as f:
    solar_assets = json.load(f)
gdf = gpd.read_file("solar_assets.geojson")
gdf = gdf[gdf['status'] == 'operating']
gdf

Unnamed: 0,name,capacity_mw,status,commission_year,comission_end_year,capacity_update_date,source,url,geometry
0,Karavasta solar farm,140.0,operating,2023.0,,2024-02-22 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Karavasta_solar_farm,POINT (19.4535 40.8491)
1,Project Blue 1 solar farm,58.0,operating,2024.0,,2024-02-23 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Project_Blue_1_solar_farm,POINT (19.3912 40.7617)
3,Adrar Solar Plant,20.0,operating,2015.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Adrar_Solar_Plant,POINT (-0.3174 27.9077)
5,Ain El Ibel Solar,20.0,operating,2016.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ain_El_Ibel_Solar,POINT (3.1636 34.3447)
6,Ain El Ibel Solar,33.0,operating,2017.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ain_El_Ibel_Solar,POINT (3.1696 34.3433)
...,...,...,...,...,...,...,...,...,...
11313,Bangweulu solar farm,54.0,operating,2019.0,,2024-11-13 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Bangweulu_solar_farm,POINT (28.4329 -15.5138)
11317,Itimpi solar farm,64.0,operating,2024.0,,2024-11-05 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Itimpi_solar_farm,POINT (28.182 -12.6946)
11320,Ngonye solar farm,34.0,operating,2019.0,,2024-11-13 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ngonye_solar_farm,POINT (28.4286 -15.5169)
11321,Riverside Solar PV Plant,33.0,operating,2018.0,,2024-09-23 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Riverside_Solar_PV_Plant,POINT (28.2364 -12.8072)


In [6]:
lons = gdf.geometry.x
lats = gdf.geometry.y

ds_points = xr.Dataset(
    data_vars={
        'capacity_mw': ('station', gdf['capacity_mw'].values),
        'station': ('station', range(len(gdf['capacity_mw'].values))),
    },
    coords={
        'lon': ('station', lons.values),
        'lat': ('station', lats.values),
        'name': ('station',  gdf['name'].values),
        # Add the capacity_mw as a coordinate for each station

    },
)

In [7]:
ds_points

### Map to weather grid points
Now we want to do perform a convolution to get it to a desired resolution. In our case, we want to convolve it such that the result aligns with a given weather grid.
We would start with NOAA data (i.e. 0.25 degrees but we need to make sure they align).

-> Better: group the points by the closest point in the weather dataset and sum over them. 

In [98]:
# weather_dataset = xr.open_dataset('HRRR_reprojected.grib2', engine='rasterio')
# weather_dataset

In [99]:
weather_dataset = xr.open_dataset('NOAA_2024_01_01_0_000.grib2', engine='rasterio')
weather_dataset

In [100]:
# Get the weather grid coordinates
weather_x = weather_dataset.x
weather_y = weather_dataset.y

# Find the index of the nearest weather grid point for each station
x_indices = weather_x.to_index()[weather_x.to_index().get_indexer(ds_points.lon, method='nearest')]
y_indices = weather_y.to_index()[weather_y.to_index().get_indexer(ds_points.lat, method='nearest')]

# Add these indices as coordinates to your station dataset
capacity_array = ds_points.assign_coords({
    'weather_x_idx': ('station', x_indices),
    'weather_y_idx': ('station', y_indices)
})["capacity_mw"]
capacity_array = capacity_array.fillna(0)

In [101]:
grouped = capacity_array.groupby(['weather_y_idx', 'weather_x_idx']).sum(min_count=1)

# Unstack to get a 2D grid
capacity_grid = grouped.unstack().fillna(0)

### Population coverage with subset of weather points

In [102]:
zone_bounding_box = [
    [-124.909721, 32.0341570000001],
    [-113.631212, 41.965851]
]

In [103]:
# Convert bounding box to min/max for longitude (x) and latitude (y)
min_lon, min_lat = zone_bounding_box[0]
max_lon, max_lat = zone_bounding_box[1]

# Find weather_x_idx and weather_y_idx values within the bounding box
weather_x_vals = capacity_grid.weather_x_idx.values
weather_y_vals = capacity_grid.weather_y_idx.values

# Select indices within bounding box
selected_x = weather_x_vals[(weather_x_vals >= min_lon) & (weather_x_vals <= max_lon)]
selected_y = weather_y_vals[(weather_y_vals >= min_lat) & (weather_y_vals <= max_lat)]

# Select the slice from summed
summed_subset = capacity_grid.sel(weather_x_idx=selected_x, weather_y_idx=selected_y)
summed_subset

For a given bounding box, we want to select the weather grid points that make up X% of the population.
We'll use california (us-cal-ciso) as an example.

#### Compute cummulative sums over grids, check what share of grid points is needed to cover what share of population

In [104]:
import numpy as np

In [105]:
summed_subset

In [106]:
flat_sorted = summed_subset.stack(grid_cell=('weather_y_idx', 'weather_x_idx')).sortby(summed_subset.stack(grid_cell=('weather_y_idx', 'weather_x_idx')), ascending=False)

In [107]:
flat_sorted.sum()

In [108]:
total_pop = summed_subset.sum().values
flat_sorted = summed_subset.stack(grid_cell=('weather_y_idx', 'weather_x_idx')).sortby(summed_subset.stack(grid_cell=('weather_y_idx', 'weather_x_idx')), ascending=False)
cumsum_1d = flat_sorted.cumsum(dim='grid_cell')


for threshold in np.arange(0.95, 1, 0.01):
    cutoff_idx = (cumsum_1d <= total_pop*threshold).sum().values
    selected_coords = flat_sorted.grid_cell[:cutoff_idx]

    print(f"{cutoff_idx/len(flat_sorted.grid_cell)*100}% of the grid points are needed to cover {threshold*100}% of the capacity")
    print(f"{cutoff_idx} grid points are needed to cover {threshold*100}% of the capacity")

y_coords = [coord[0] for coord in selected_coords.values]
x_coords = [coord[1] for coord in selected_coords.values]

4.52991452991453% of the grid points are needed to cover 95.0% of the capacity
53 grid points are needed to cover 95.0% of the capacity
4.786324786324787% of the grid points are needed to cover 96.0% of the capacity
56 grid points are needed to cover 96.0% of the capacity
5.213675213675214% of the grid points are needed to cover 97.0% of the capacity
61 grid points are needed to cover 97.0% of the capacity
5.726495726495727% of the grid points are needed to cover 98.0% of the capacity
67 grid points are needed to cover 98.0% of the capacity
6.41025641025641% of the grid points are needed to cover 99.0% of the capacity
75 grid points are needed to cover 99.0% of the capacity
100.0% of the grid points are needed to cover 100.0% of the capacity
1170 grid points are needed to cover 100.0% of the capacity


In [109]:
threshold = 0.999
cutoff_idx = (cumsum_1d <= total_pop*threshold).sum().values
selected_coords = flat_sorted.grid_cell[:cutoff_idx]

print(f"{cutoff_idx/len(flat_sorted.grid_cell)*100}% of the grid points are needed to cover {threshold*100}% of the capacity")
print(f"{cutoff_idx} grid points are needed to cover {threshold*100}% of the capacity")

y_coords = [coord[0] for coord in selected_coords.values]
x_coords = [coord[1] for coord in selected_coords.values]

7.350427350427351% of the grid points are needed to cover 99.9% of the capacity
86 grid points are needed to cover 99.9% of the capacity


### Plotting

In [110]:
summed_subset_values = summed_subset.sel(
    weather_y_idx=xr.DataArray(y_coords, dims='points'),
    weather_x_idx=xr.DataArray(x_coords, dims='points')
)

In [111]:
import plotly.express as px

In [112]:
values = summed_subset_values.values

fig = px.scatter_mapbox(
    lat=y_coords,  # Assuming y_coords are latitudes
    lon=x_coords,  # Assuming x_coords are longitudes
    color=values,
    color_continuous_scale='viridis',
    title=f'Top {threshold*100}% Capacity Grid Cells',
    mapbox_style='carto-positron',  # Light map style
    zoom=2,
    width=1000,
    height=700
)

fig.update_traces(
    marker=dict(size=6, opacity=0.7),
    hovertemplate=(
        "<b>Latitude</b>: %{lat:.8f}<br>"
        "<b>Longitude</b>: %{lon:.8f}<br>"
        "<b>Capacity</b>: %{marker.color:,.0f}"
        "<extra></extra>"
    )
)

fig.show()


*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/

