In [8]:
import rasterio
import rioxarray
import dask
import xarray as xr
xr.set_options(use_flox=True)
import json
import geopandas as gpd

from tqdm import tqdm

In [4]:
with open("solar_assets.geojson", "r") as f:
    solar_assets = json.load(f)

In [9]:
gdf = gpd.read_file("solar_assets.geojson")

In [14]:
gdf = gdf[gdf['status'] == 'operating']
gdf

Unnamed: 0,name,capacity_mw,status,commission_year,comission_end_year,capacity_update_date,source,url,geometry
0,Karavasta solar farm,140.0,operating,2023.0,,2024-02-22 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Karavasta_solar_farm,POINT (19.4535 40.8491)
1,Project Blue 1 solar farm,58.0,operating,2024.0,,2024-02-23 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Project_Blue_1_solar_farm,POINT (19.3912 40.7617)
3,Adrar Solar Plant,20.0,operating,2015.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Adrar_Solar_Plant,POINT (-0.3174 27.9077)
5,Ain El Ibel Solar,20.0,operating,2016.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ain_El_Ibel_Solar,POINT (3.1636 34.3447)
6,Ain El Ibel Solar,33.0,operating,2017.0,,2022-08-16 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ain_El_Ibel_Solar,POINT (3.1696 34.3433)
...,...,...,...,...,...,...,...,...,...
11313,Bangweulu solar farm,54.0,operating,2019.0,,2024-11-13 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Bangweulu_solar_farm,POINT (28.4329 -15.5138)
11317,Itimpi solar farm,64.0,operating,2024.0,,2024-11-05 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Itimpi_solar_farm,POINT (28.182 -12.6946)
11320,Ngonye solar farm,34.0,operating,2019.0,,2024-11-13 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Ngonye_solar_farm,POINT (28.4286 -15.5169)
11321,Riverside Solar PV Plant,33.0,operating,2018.0,,2024-09-23 00:00:00+00:00,Global Energy Monitor,https://www.gem.wiki/Riverside_Solar_PV_Plant,POINT (28.2364 -12.8072)


In [61]:
import pandas as pd

In [69]:
# You can inspect it:
# print("--- GeoDataFrame loaded from GeoJSON ---")
# print(gdf.head())

# 2. Extract coordinates and other properties from the GeoDataFrame
# The 'geometry' column contains point objects. We can get their x and y coordinates.
lons = gdf.geometry.x
lats = gdf.geometry.y

# Example: get data from a property field named 'value' in the GeoJSON
# If a property doesn't exist, it will default to None.
point_values = gdf['capacity_mw'].values if 'capacity_mw' in gdf else None

# 3. Create an xarray.Dataset
# We'll create a new dimension, e.g., 'station', to index the points.
# The coordinates and properties of the points become variables in the Dataset.
ds_points = xr.Dataset(
    data_vars={
        'station': ('station', range(len(point_values)))
    },
    coords={
        # The coordinates of the points are also variables, assigned to 'coords'.
        'lon': ('station', lons.values),
        'lat': ('station', lats.values),
        # The primary dimension coordinate for indexing the points
        'name': ('station',  gdf['name'].values),
    },
    vars={
        'capacity':
    }
)

In [68]:
ds_points

In [57]:
ds_points

### Map to weather grid points
Now we want to do perform a convolution to get it to a desired resolution. In our case, we want to convolve it such that the result aligns with a given weather grid.
We would start with NOAA data (i.e. 0.25 degrees but we need to make sure they align).

-> Better: group the points by the closest point in the weather dataset and sum over them. 

In [50]:
ds = xr.open_dataset('NOAA_2024_01_01_0_000.grib2', engine='rasterio')
ds

In [55]:
x_indexer = ds.x.to_index().get_indexer(ds_points.lon, method='nearest')
y_indexer = ds.y.to_index().get_indexer(ds_points.lat, method='nearest')

# Add these indices as new coordinates to ds_points.
# This doesn't move any data, just adds metadata.
ds_points.coords['weather_x_idx'] = ('x', x_indexer)
ds_points.coords['weather_y_idx'] = ('y', y_indexer)

print("Added 'weather_x_idx' and 'weather_y_idx' to ds_points.coords")
# You can inspect ds_points now to see the new coordinates.
ds_points

CoordinateValidationError: coordinate weather_x_idx has dimensions ('x',), but these are not a subset of the DataArray dimensions ('station',)

In [21]:
if 'band' in ds_points.dims and ds_points.sizes['band'] == 1:
    df1_no_band = ds_points.squeeze('band', drop=True)
else:
    df1_no_band = ds_points

In [22]:
df1_no_band

In [26]:
print(len(ds_points.lat))
print(len(ds_points.lon))

9820
9820


In [None]:
import dask.array as da
import numpy as np
import xarray as xr

# --- This part is the same as before ---
# Squeeze out the 'band' dimension if it exists
if 'band' in ds_points.dims and ds_points.sizes['band'] == 1:
    asset_data = ds_points.squeeze('band', drop=True)
else:
    asset_data = ds_points

y_indexer = ds.y.to_index()[ds.y.to_index().get_indexer(asset_data.y, method='nearest')]
x_indexer = ds.x.to_index()[ds.x.to_index().get_indexer(asset_data.x, method='nearest')]

asset_data.['weather_y_idx'] = ('y', y_indexer)
asset_data.coords['weather_x_idx'] = ('x', x_indexer)

asset_data

CoordinateValidationError: coordinate weather_y_idx has dimensions ('y',), but these are not a subset of the DataArray dimensions ('station',)

In [30]:
summed = asset_data.groupby(['weather_y_idx']).sum().groupby(['weather_x_idx']).sum()
summed

In [34]:
import plotly.express as px
import numpy as np

# Let's assume the final aggregated data is in the 'aggregated_da' variable
# from the previous step. We need to call .compute() to get the actual values
# for plotting.
print("Computing the aggregated data for plotting...")
aggregated_computed = summed.compute().to_array()
print("Computation finished.")

Computing the aggregated data for plotting...
Computation finished.


In [39]:
aggregated_computed

In [36]:
import plotly.express as px
import numpy as np


# For the color scale, a log scale is better to see variations.
# np.log1p calculates log(1 + x) to handle cells with 0 population.
aggregated_logged = aggregated_computed
aggregated_logged.name = 'Log of Population' # Label for the color bar


# --- Create the interactive plot ---
# `px.imshow` creates a fully interactive plot with pan and zoom.
fig = px.imshow(
    aggregated_logged,
    labels=dict(x="Longitude", y="Latitude", color="Log(Population)"),
    title='Interactive World Population Map',
    origin='lower', # This ensures the map is oriented correctly.
    color_continuous_scale='viridis'
)


# --- Customize the Hover Tooltip ---
# We can provide a rich hover tooltip showing the original population count.
# The customdata is an array of the same shape as the plot data.
# The hovertemplate string tells plotly how to format this data.
fig.update_traces(
    customdata=aggregated_logged.values,
    hovertemplate=(
        "<b>Longitude</b>: %{x}<br>"
        "<b>Latitude</b>: %{y}<br>"
        "<b>Log(Population)</b>: %{z:.2f}<br>"
        "<b>Total Population</b>: %{customdata:,.0f}"
        "<extra></extra>"  # This removes the trace name from the hover
    )
)


# --- Final Touches & Saving ---
fig.update_layout(title_x=0.5)

# Save the interactive plot to a self-contained HTML file.
output_filename = "interactive_solar_assets_map.html"
fig.write_html(output_filename)

print(f"Interactive map has been saved to '{output_filename}'")
print("Displaying interactive map below:")


# Display the figure in the notebook. You can now zoom and pan!
fig.show()



ValueError: px.imshow only accepts 2D single-channel, RGB or RGBA images. An image of shape (1, 820, 346, 9820) was provided. Alternatively, 3- or 4-D single or multichannel datasets can be visualized using the `facet_col` or/and `animation_frame` arguments.