## Load information 
### Gather solar radiation data from multiple files

In [13]:
import pandas as pd
import glob
import os

from shapely.geometry import Point

path = 'chicago/data/radiation/'
files = glob.glob(os.path.join(path, '*.csv'))

radiation_data = []
for f in files:
    # Read CSV skipping first two lines and using the third line as header
    df = pd.read_csv(f, skiprows=2)
    # Extract lat/lon from filename (e.g., xxxxxxx_41.88_-87.63_yyyy.csv)
    lat, lon = map(float, os.path.basename(f).split('_')[1:3])
    df['lat'] = lat
    df['lon'] = lon
    df['geometry'] = Point(lon, lat)
    radiation_data.append(df)

radiation_df = pd.concat(radiation_data, ignore_index=True)

Convert stadard dataframe into geopandas dataframe. This will still have multiple timestamps (year, month, day, hour) per (DHI, DNI, GHI) triplet.

In [14]:
import geopandas as gpd

radiation_gdf = gpd.GeoDataFrame(radiation_df, geometry='geometry', crs='EPSG:4326')
radiation_gdf

Unnamed: 0,Year,Month,Day,Hour,Minute,DHI,GHI,DNI,Solar Zenith Angle,lat,lon,geometry
0,2023,1,1,0,0,0,0,0,106.22,41.98,-87.73,POINT (-87.73 41.98)
1,2023,1,1,1,0,0,0,0,117.11,41.98,-87.73,POINT (-87.73 41.98)
2,2023,1,1,2,0,0,0,0,128.23,41.98,-87.73,POINT (-87.73 41.98)
3,2023,1,1,3,0,0,0,0,139.25,41.98,-87.73,POINT (-87.73 41.98)
4,2023,1,1,4,0,0,0,0,149.61,41.98,-87.73,POINT (-87.73 41.98)
...,...,...,...,...,...,...,...,...,...,...,...,...
6806515,2023,12,31,19,0,106,106,0,66.51,41.68,-87.71,POINT (-87.71 41.68)
6806516,2023,12,31,20,0,42,42,0,70.99,41.68,-87.71,POINT (-87.71 41.68)
6806517,2023,12,31,21,0,50,50,0,77.68,41.68,-87.71,POINT (-87.71 41.68)
6806518,2023,12,31,22,0,10,10,0,85.96,41.68,-87.71,POINT (-87.71 41.68)


In [15]:
print(radiation_gdf.crs)

EPSG:4326


To have a single geopandas data frame with a single triplet per coordinate which will be the year average.

In [16]:
# not removing zeros because those were measurements, not nan
summary = radiation_gdf.groupby(['lat', 'lon']).agg({
    "GHI": ["mean", "sum"],
    "DNI": ["mean", "sum"],
    "DHI": ["mean", "sum"]
}) 

# Flatten the MultiIndex columns
summary.columns = ['_'.join(col).strip() for col in summary.columns.values]

# Reset index to make it easier to join back later
summary = summary.reset_index()

summary['geometry'] = summary.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
summary = gpd.GeoDataFrame(summary, geometry='geometry', crs='EPSG:4326') 
summary = summary.to_crs('EPSG:3435')  # Chicago's local CRS
summary

Unnamed: 0,lat,lon,GHI_mean,GHI_sum,DNI_mean,DNI_sum,DHI_mean,DHI_sum,geometry
0,41.64,-87.97,165.969064,1453889,182.585274,1599447,62.619178,548544,POINT (1083562.578 1811618.167)
1,41.64,-87.95,165.943265,1453663,182.646918,1599987,62.639384,548721,POINT (1089029.391 1811641.853)
2,41.64,-87.93,165.955936,1453774,182.663584,1600133,62.632192,548658,POINT (1094496.205 1811666.806)
3,41.64,-87.91,166.399315,1457658,185.094749,1621430,61.825799,541594,POINT (1099963.021 1811693.028)
4,41.64,-87.89,166.332648,1457074,185.556164,1625472,61.631849,539895,POINT (1105429.838 1811720.517)
...,...,...,...,...,...,...,...,...,...
772,42.04,-87.33,158.380251,1387411,160.960616,1410015,64.334817,563573,POINT (1256798.315 1958764.788)
773,42.04,-87.31,158.618721,1389500,160.726256,1407962,64.344977,563662,POINT (1262231.265 1958829.138)
774,42.04,-87.29,158.655137,1389819,160.997489,1410338,64.251484,562843,POINT (1267664.219 1958894.757)
775,42.04,-87.27,158.976712,1392636,161.735845,1416806,64.047603,561057,POINT (1273097.176 1958961.648)


### Load building footprints

In [17]:
buildings_gdf = gpd.read_file('chicago/data/footprints/Buildings.shp')

In [18]:
print(buildings_gdf.crs)

EPSG:3435


### Spatially join radiation points to building footprints

In [19]:
joined_gdf = buildings_gdf.sjoin_nearest(summary, how='left', distance_col='distance_meters')

In [20]:
#check if the join worked properly and see how far off are the interolations
(joined_gdf.sort_values(by='distance_meters', ascending=True)).head(25)

Unnamed: 0,BLDG_ID,CDB_CITY_I,BLDG_STATU,F_ADD1,T_ADD1,PRE_DIR1,ST_NAME1,ST_TYPE1,UNIT_NAME,NON_STANDA,...,index_right,lat,lon,GHI_mean,GHI_sum,DNI_mean,DNI_sum,DHI_mean,DHI_sum,distance_meters
207146,656981,,ACTIVE,0,0,,,,,,...,198,41.74,-87.71,165.111872,1446380,178.850457,1566730,63.994749,560594,0.0
705662,404629,,ACTIVE,2826,2826,S,HOMAN,AVE,,,...,383,41.84,-87.71,165.174543,1446929,179.468721,1572146,63.544521,556650,0.0
42488,376069,,ACTIVE,1529,1529,S,HOMAN,AVE,,,...,420,41.86,-87.71,164.577626,1441700,175.6629,1538807,64.173288,562158,0.0
199613,656689,,ACTIVE,2701,2701,W,84TH,PL,,,...,199,41.74,-87.69,165.025571,1445624,178.816895,1566436,63.942466,560136,0.0
164846,329522,,ACTIVE,1700,1700,W,AUGUSTA,BLVD,,,...,496,41.9,-87.67,163.485502,1432133,174.111416,1525216,63.988584,560540,0.0
115428,133388,,ACTIVE,0,0,,,,,,...,605,41.96,-87.71,164.298402,1439254,178.944064,1567550,63.023402,552085,0.0
583129,531755,,ACTIVE,6230,6230,S,TALMAN,AVE,,,...,273,41.78,-87.69,165.339384,1448373,180.045434,1577198,63.111416,552856,0.0
82416,329585,,ACTIVE,3300,3322,W,GRAND,AVE,,,...,494,41.9,-87.71,165.57226,1450413,180.749886,1583369,63.041895,552247,0.0
552787,532113,,ACTIVE,3450,3450,W,62ND,PL,,,...,272,41.78,-87.71,165.339498,1448374,180.04395,1577185,63.558219,556770,0.0
296116,790732,,ACTIVE,11755,11755,S,VINCENNES,AVE,,,...,89,41.68,-87.67,164.106393,1437572,176.418037,1545422,63.343721,554891,0.0


## Visualize solar potential

Basic plot geopandas style or adding tiles with contextily.
This is only a visualisation test.

In [23]:
#simplify geodataframe to avoid slowness
vis_gdf = joined_gdf.copy()
vis_gdf = vis_gdf[['distance_meters', 'GHI_sum', 'DNI_sum', 'DHI_sum', 'GHI_mean', 'DNI_mean', 'DHI_mean', 'lon', 'lat', 'BLDG_ID', 'SHAPE_AREA', 'geometry']]
vis_gdf.head()

Unnamed: 0,distance_meters,GHI_sum,DNI_sum,DHI_sum,GHI_mean,DNI_mean,DHI_mean,lon,lat,BLDG_ID,SHAPE_AREA,geometry
0,3202.249694,1415272,1499454,554489,161.560731,171.170548,63.297831,-87.57,41.74,634694,1089.25,"POLYGON ((1195174.643 1850643.039, 1195174.143..."
1,1458.230092,1448374,1577185,556770,165.339498,180.04395,63.558219,-87.71,41.78,520117,752.75,"POLYGON ((1153540.143 1864316.539, 1153539.143..."
2,2602.645542,1415468,1516007,547183,161.583105,173.06016,62.463813,-87.57,41.76,580168,6857.875,"POLYGON ((1190036.643 1857036.539, 1190036.143..."
3,2809.896053,1425211,1520248,557098,162.69532,173.544292,63.595662,-87.69,41.96,156735,1164.5,"POLYGON ((1160053.643 1925961.539, 1160049.643..."
4,3072.06899,1431263,1527216,557259,163.386187,174.339726,63.614041,-87.69,41.92,261847,1296.375,"POLYGON ((1157022.143 1916159.539, 1157018.643..."


In [24]:
#save geodataframe
vis_gdf.to_file('chicago/data/solar_summary.geojson', driver='GeoJSON')

In [11]:
# import matplotlib.pyplot as plt
# import contextily as ctx

# # Convert to Web Mercator for web tile compatibility
# vis_gdf.to_crs(epsg=3857)
# fig, ax = plt.subplots(figsize=(12, 12))

# vis_gdf.plot(
#      column='GHI',
#      cmap='YlOrRd',
#      linewidth=0.1,
#      edgecolor='grey',
#      legend=True,
#      ax=ax
#  )

# ax.set_title("Rooftop Solar Potential (GHI) by Building", fontsize=16)
# ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, alpha=0.6)
# ax.set_axis_off()
# plt.tight_layout()
# plt.show()