# Joining the results of the consumption simulation, production prediction and price forecasting

#### Notebook Purpose:
The purpose of this notebook is to combine the research effort in estimating production, consumption and day-ahead prices. The merging of the components allows us to estimate the actual energy consumption that can be covered by the solar production and the resulting financial savings per postal code.

These estimations are then visualized via Looker Studio. Lastly, the impact of these insights and realistic future scenarios like increased electricity prices are analyzed.



# Finalizing Production Prediction

In [1]:
# authenticate user

from google.colab import auth

auth.authenticate_user()

In [2]:
# set parameters to use BigQuery Functionality

PROJECT_ID = 'solarinsight-383513' #@param {type: "string"}
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [3]:
import google.cloud.bigquery as bq

client = bq.Client(project=PROJECT_ID)

In [None]:
!pip install geopandas

## Map Postal Code Polygons to Sunroof Data and Weather Stations

#### Load Polygon Data

In [None]:
%%bigquery plz5_polygons --project solarinsight-383513
SELECT
plz5, polygon
FROM
  `geo_data.plz5_polygone`


Query is running:   0%|          |

Downloading:   0%|          |

#### Load Sunroof Data

In [None]:
%%bigquery sunroof_agg --project solarinsight-383513
SELECT
zip_code, pv_capacity_total_kwp, installed_pv_total_kwp
FROM
  `solar_data.sunroof_aggr`


Query is running:   0%|          |

Downloading:   0%|          |

#### Merge Sunroof and Polygon Data

In [None]:
import pandas as pd

sunroof_polygon = pd.merge(sunroof_agg, plz5_polygons, left_on='zip_code', right_on='plz5')
sunroof_polygon = sunroof_polygon[['plz5', 'polygon', 'pv_capacity_total_kwp', 'installed_pv_total_kwp']]
sunroof_polygon

Unnamed: 0,plz5,polygon,pv_capacity_total_kwp,installed_pv_total_kwp
0,60315,"POLYGON((8.6725366 50.1126399, 8.6736336 50.11...",0.000000,0.000000
1,82475,"POLYGON((10.984616 47.4209797, 10.9804486 47.4...",0.000000,0.000000
2,33333,"POLYGON((8.4349539 51.8831374, 8.4364239 51.88...",4.482612,0.976111
3,60310,"POLYGON((8.6720136 50.1107568, 8.6726428 50.11...",0.000000,0.000000
4,60306,"POLYGON((8.6695498 50.1158427, 8.6707175 50.11...",0.000000,0.000000
...,...,...,...,...
8165,27432,"POLYGON((8.895216 53.5198453, 8.8986475 53.519...",70664.009074,27799.645358
8166,29664,"POLYGON((9.453149 52.9171371, 9.450937 52.9167...",64904.743636,14756.129912
8167,39288,"POLYGON((11.7249361 52.2504985, 11.7263513 52....",132924.199263,51028.811353
8168,33102,"POLYGON((8.7196088 51.7111361, 8.7225706 51.71...",49531.806738,18979.043901


#### Load Weather Station Data

In [None]:
%%bigquery station_ids --project solarinsight-383513
SELECT
DISTINCT STATIONS_ID, Latitude, Longitude
FROM
  `electricity_production_data.predicted_power_output_minimal`

Query is running:   0%|          |

Downloading:   0%|          |

#### Map to each Postal Code Polygon the closest weather station

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely import wkt

sunroof_polygon['polygon'] = sunroof_polygon['polygon'].apply(wkt.loads)

# Convert the regular DataFrames to GeoDataFrames
postal_codes_geo = gpd.GeoDataFrame(sunroof_polygon, geometry='polygon')
weather_stations_geo = gpd.GeoDataFrame(station_ids, geometry=gpd.points_from_xy(station_ids['Longitude'], station_ids['Latitude']))

# Perform spatial join to map weather stations to postal code polygons
sunroof_weather_stations = gpd.sjoin_nearest(postal_codes_geo, weather_stations_geo)

sunroof_weather_stations = sunroof_weather_stations[['plz5', 'STATIONS_ID', 'pv_capacity_total_kwp', 'installed_pv_total_kwp']]


#### Calculate number of potential cells

In [None]:
# 1 kwp = 3 solar cells as common rule of thumb

import numpy as np

sunroof_weather_stations['potential_capacity'] = sunroof_weather_stations['pv_capacity_total_kwp'] - sunroof_weather_stations['installed_pv_total_kwp']
sunroof_weather_stations['potential_cells'] = np.floor(sunroof_weather_stations['potential_capacity']*3)
sunroof_weather_stations['existing_cells'] = np.floor(sunroof_weather_stations['installed_pv_total_kwp']*3)
sunroof_weather_stations = sunroof_weather_stations[['plz5', 'STATIONS_ID', 'potential_cells', 'existing_cells']]

In [None]:
sunroof_weather_stations

Unnamed: 0,plz5,STATIONS_ID,potential_cells,existing_cells
0,60315,7341,0.0,0.0
3,60310,7341,0.0,0.0
4,60306,7341,0.0,0.0
6,60312,7341,0.0,0.0
7,60308,7341,0.0,0.0
...,...,...,...,...
4436,15320,3158,25641.0,5907.0
4978,15324,3158,11184.0,8016.0
5455,15326,3158,34119.0,6030.0
6899,15306,3158,57766.0,26532.0


### Summarize the production for each Postal Code area

#### Load Production Prediction Data

In [None]:
%%bigquery power_prediction --project solarinsight-383513
SELECT
MESS_DATUM, STATIONS_ID, Power_Predicted
FROM
  `electricity_production_data.predicted_power_output_minimal`

Query is running:   0%|          |

Downloading:   0%|          |

#### Transform the unit to kwh

In [None]:
power_prediction['Power_Predicted'] = power_prediction['Power_Predicted']/1000
power_prediction

Unnamed: 0,MESS_DATUM,STATIONS_ID,Power_Predicted
0,2024-03-01 15:00:00+00:00,2564,0.015044
1,2024-03-02 15:00:00+00:00,2564,0.015044
2,2024-03-03 15:00:00+00:00,2564,0.015044
3,2024-03-04 15:00:00+00:00,2564,0.015044
4,2024-03-05 15:00:00+00:00,2564,0.015044
...,...,...,...
475365,2024-03-27 14:00:00+00:00,2559,0.016873
475366,2024-03-28 14:00:00+00:00,2559,0.016873
475367,2024-03-29 14:00:00+00:00,2559,0.016873
475368,2024-03-30 14:00:00+00:00,2559,0.016873


#### Merge Production due to weather with the Postal Code areas

In [None]:
postal_code_production = pd.merge(sunroof_weather_stations, power_prediction, on='STATIONS_ID')

In [None]:
postal_code_production

Unnamed: 0,plz5,STATIONS_ID,potential_cells,existing_cells,MESS_DATUM,Power_Predicted
0,60315,7341,0.0,0.0,2024-03-01 15:00:00+00:00,0.015044
1,60315,7341,0.0,0.0,2024-03-02 15:00:00+00:00,0.015044
2,60315,7341,0.0,0.0,2024-03-03 15:00:00+00:00,0.015044
3,60315,7341,0.0,0.0,2024-03-04 15:00:00+00:00,0.015044
4,60315,7341,0.0,0.0,2024-03-05 15:00:00+00:00,0.015044
...,...,...,...,...,...,...
20440864,15232,3158,33937.0,7028.0,2024-03-27 14:00:00+00:00,0.016873
20440865,15232,3158,33937.0,7028.0,2024-03-28 14:00:00+00:00,0.016873
20440866,15232,3158,33937.0,7028.0,2024-03-29 14:00:00+00:00,0.016873
20440867,15232,3158,33937.0,7028.0,2024-03-30 14:00:00+00:00,0.016873


#### Calculate the total Production based on number of cells and predicted power output

In [None]:
postal_code_production['potential_production'] = postal_code_production['potential_cells']*postal_code_production['Power_Predicted']
postal_code_production['existing_production'] = postal_code_production['existing_cells']*postal_code_production['Power_Predicted']
postal_code_production= postal_code_production[['plz5', 'MESS_DATUM', 'potential_production', 'existing_production']]
postal_code_production

Unnamed: 0,plz5,MESS_DATUM,potential_production,existing_production
0,60315,2024-03-01 15:00:00+00:00,0.000000,0.000000
1,60315,2024-03-02 15:00:00+00:00,0.000000,0.000000
2,60315,2024-03-03 15:00:00+00:00,0.000000,0.000000
3,60315,2024-03-04 15:00:00+00:00,0.000000,0.000000
4,60315,2024-03-05 15:00:00+00:00,0.000000,0.000000
...,...,...,...,...
20440864,15232,2024-03-27 14:00:00+00:00,572.611309,118.581851
20440865,15232,2024-03-28 14:00:00+00:00,572.611309,118.581851
20440866,15232,2024-03-29 14:00:00+00:00,572.611309,118.581851
20440867,15232,2024-03-30 14:00:00+00:00,572.611309,118.581851


#### export Final Production per Postal Code to BigQuery

In [None]:
  import pandas_gbq

  table_name = 'geo_data.postal_code_production'

  pandas_gbq.to_gbq(postal_code_production, table_name, project_id=PROJECT_ID, if_exists='replace')

100%|██████████| 1/1 [00:00<00:00, 4854.52it/s]


# Combining Production Consumption and Prices to estimate savings per Postal Code per hour

#### Load Production Data

In [None]:
%%bigquery production --project solarinsight-383513
SELECT
plz5, MESS_DATUM, potential_production, existing_production
FROM
  `geo_data.postal_code_production`
WHERE MESS_DATUM >= TIMESTAMP('2023-07-01 00:00:00+00:00')
  AND MESS_DATUM <= TIMESTAMP('2024-06-30 23:59:59+00:00')

Query is running:   0%|          |

Downloading:   0%|          |

#### Load Consumption Data

In [None]:
%%bigquery consumption --project solarinsight-383513
SELECT
  plz5, datetime, total_consumption
FROM
  `geo_data.postal_code_consumption`
WHERE
  datetime >= TIMESTAMP('2023-07-01 00:00:00+00:00')
  AND datetime <= TIMESTAMP('2024-06-30 23:59:59+00:00')
  AND EXTRACT(HOUR FROM datetime) >= 10
  AND EXTRACT(HOUR FROM datetime) <= 15

Query is running:   0%|          |

Downloading:   0%|          |

#### Load Price Data

In [None]:
%%bigquery prices --project solarinsight-383513
SELECT
DatumMEZ, DayAheadAuktion
FROM
  `electricity_data.day_ahead_auktion_forecast_data`
WHERE DatumMEZ >= TIMESTAMP('2023-07-01 00:00:00+00:00')
  AND DatumMEZ <= TIMESTAMP('2024-06-30 23:59:59+00:00')

Query is running:   0%|          |

Downloading:   0%|          |

#### Preprocessing of loaded data

In [None]:
feed_in_price = 0.083 # € / kwh - mean of past years
consumption['plz5'] = consumption['plz5'].astype(int)
consumption.loc[consumption['total_consumption'] <= 0, 'total_consumption'] = 0
production['potential_production'] = production['potential_production'] * 4 # due to the prediction being a 15-minute interval, interpolating to one hour
production['existing_production'] = production['existing_production'] * 4

#### Join Data Sets on Datetime and Postal Code

In [None]:
import pandas as pd

merged_df = pd.merge(production, consumption, left_on=['MESS_DATUM', 'plz5'], right_on=['datetime', 'plz5'], how = 'inner')
merged_df = pd.merge(merged_df, prices, left_on='MESS_DATUM', right_on='DatumMEZ', how = 'left')
merged_df

Unnamed: 0,plz5,MESS_DATUM,potential_production,existing_production,datetime,total_consumption,DatumMEZ,DayAheadAuktion
0,69221,2024-03-19 10:00:00+00:00,1353.018396,1000.342090,2024-03-19 10:00:00+00:00,1756.495027,2024-03-19 10:00:00+00:00,34.797525
1,69221,2023-12-12 14:00:00+00:00,1196.654720,884.735999,2023-12-12 14:00:00+00:00,2028.704186,2023-12-12 14:00:00+00:00,144.146830
2,69221,2023-12-30 12:00:00+00:00,1248.853826,923.328943,2023-12-30 12:00:00+00:00,2306.905011,2023-12-30 12:00:00+00:00,-73.715854
3,69221,2023-08-26 12:00:00+00:00,1640.460491,1212.859840,2023-08-26 12:00:00+00:00,1336.458939,2023-08-26 12:00:00+00:00,96.217455
4,68753,2024-02-06 13:00:00+00:00,5164.549662,1571.303073,2024-02-06 13:00:00+00:00,3053.023471,2024-02-06 13:00:00+00:00,2.591432
...,...,...,...,...,...,...,...,...
17505991,67061,2024-06-16 11:00:00+00:00,6434.748430,2654.978747,2024-06-16 11:00:00+00:00,2344.208283,2024-06-16 11:00:00+00:00,-69.863244
17505992,67061,2024-05-15 10:00:00+00:00,6485.300769,2675.836654,2024-05-15 10:00:00+00:00,2691.700358,2024-05-15 10:00:00+00:00,34.247383
17505993,67061,2024-03-23 12:00:00+00:00,7156.377780,2952.723190,2024-03-23 12:00:00+00:00,3346.309350,2024-03-23 12:00:00+00:00,-32.334101
17505994,67061,2024-01-12 10:00:00+00:00,4287.368930,1768.969450,2024-01-12 10:00:00+00:00,3684.572751,2024-01-12 10:00:00+00:00,38.938377


#### Calculate Financial and Energy Savings

In [None]:
merged_df = merged_df[['plz5', 'datetime', 'potential_production', 'existing_production', 'total_consumption', 'DayAheadAuktion']]

# subtract the already installed pv production to estimate only the potential that can be covered via additional pv installation
merged_df['total_consumption'] = merged_df['total_consumption'] - merged_df['existing_production']
merged_df.loc[merged_df['total_consumption'] <= 0, 'total_consumption'] = 0 # in case of negative values

# usage of own produced energy is maximum of either the total consumption or the produced energy
merged_df['saved_energy'] = merged_df[['potential_production', 'total_consumption']].apply(min, axis=1)

# Price in €/Mwh to €/Kwh
merged_df['DayAheadAuktion'] = merged_df['DayAheadAuktion']/1000

# Calculate Energy that was overproduced and fed into the grid
merged_df['overproduction'] = merged_df['potential_production'] - merged_df['total_consumption']
merged_df.loc[merged_df['overproduction'] <= 0, 'overproduction'] = 0  # in case of negative values

# Calculate the resulting savings based on the prices
merged_df['savings_production'] = merged_df['saved_energy'] * merged_df['DayAheadAuktion']
merged_df['savings_feed_in'] = merged_df['overproduction'] * feed_in_price


In [None]:
merged_df

Unnamed: 0,plz5,datetime,potential_production,existing_production,total_consumption,DayAheadAuktion,saved_energy,overproduction,savings_production,savings_feed_in
0,69221,2024-03-19 10:00:00+00:00,1353.018396,1000.342090,16564.608184,0.034798,1353.018396,0.0,47.081691,0.0
1,69221,2023-12-12 14:00:00+00:00,1196.654720,884.735999,19402.305862,0.144147,1196.654720,0.0,172.493984,0.0
2,69221,2023-12-30 12:00:00+00:00,1248.853826,923.328943,22145.721170,-0.073716,1248.853826,0.0,-92.060326,0.0
3,69221,2023-08-26 12:00:00+00:00,1640.460491,1212.859840,12151.729545,0.096217,1640.460491,0.0,157.840933,0.0
4,68753,2024-02-06 13:00:00+00:00,5164.549662,1571.303073,28958.931640,0.002591,5164.549662,0.0,13.383580,0.0
...,...,...,...,...,...,...,...,...,...,...
17505991,67061,2024-06-16 11:00:00+00:00,6434.748430,2654.978747,20787.104079,-0.069863,6434.748430,0.0,-449.552400,0.0
17505992,67061,2024-05-15 10:00:00+00:00,6485.300769,2675.836654,24241.166921,0.034247,6485.300769,0.0,222.104580,0.0
17505993,67061,2024-03-23 12:00:00+00:00,7156.377780,2952.723190,30510.370308,-0.032334,7156.377780,0.0,-231.395042,0.0
17505994,67061,2024-01-12 10:00:00+00:00,4287.368930,1768.969450,35076.758064,0.038938,4287.368930,0.0,166.943187,0.0


#### Export Hourly Savings per Postal Code to BigQuery

In [None]:
  import pandas_gbq

  table_name = 'results.hourly_savings'

  pandas_gbq.to_gbq(merged_df, table_name, project_id=PROJECT_ID, if_exists='replace')

100%|██████████| 1/1 [00:00<00:00, 5599.87it/s]


# Postal Code Aggregation for Map Visualization

#### Load Hourly Savings

In [None]:
%%bigquery hourly_savings --project solarinsight-383513
SELECT
*
FROM
  `results.hourly_savings`

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
hourly_savings = hourly_savings[['plz5', 'potential_production', 'total_consumption', 'overproduction', 'savings_production', 'savings_feed_in']]
hourly_savings

Unnamed: 0,plz5,potential_production,total_consumption,overproduction,savings_production,savings_feed_in
0,65599,2265.357724,8000.520009,0.0,153.386747,0.0
1,2979,2600.291707,5518.398747,0.0,-80.311888,0.0
2,91361,270.397491,2597.869390,0.0,3.174630,0.0
3,14727,2463.386056,14574.844646,0.0,28.921637,0.0
4,65207,4304.129548,32408.964655,0.0,21.791001,0.0
...,...,...,...,...,...,...
17505991,38372,1011.601748,2541.764596,0.0,86.399338,0.0
17505992,77889,445.111178,587.509477,0.0,42.637662,0.0
17505993,84574,322.834081,597.986481,0.0,13.062199,0.0
17505994,50668,3044.337930,18527.282280,0.0,282.346494,0.0


#### Aggregate on Postal Code Area

In [None]:
# summarize total savings from feed-in and production

aggregation = hourly_savings.groupby(by = ['plz5']).sum().reset_index()
aggregation['savings_total'] = aggregation['savings_production'] + aggregation['savings_feed_in']
aggregation

Unnamed: 0,plz5,potential_production,total_consumption,overproduction,savings_production,savings_feed_in,savings_total
0,1067,7.331353e+06,3.492069e+07,0.0,288359.685030,0.0,288359.685030
1,1069,9.062622e+06,8.195197e+07,0.0,356454.659759,0.0,356454.659759
2,1097,5.672088e+06,5.140970e+07,0.0,223096.832808,0.0,223096.832808
3,1099,1.376724e+07,8.577160e+07,0.0,541498.722460,0.0,541498.722460
4,1108,3.524689e+06,1.526659e+07,0.0,138634.463957,0.0,138634.463957
...,...,...,...,...,...,...,...
8155,99988,3.729797e+06,1.246834e+07,0.0,146701.878616,0.0,146701.878616
8156,99991,5.780490e+06,1.429907e+07,0.0,227360.520889,0.0,227360.520889
8157,99994,6.109110e+06,1.857757e+07,0.0,240285.955907,0.0,240285.955907
8158,99996,8.557800e+05,4.783000e+06,0.0,33659.879357,0.0,33659.879357


#### Load Polygon Data

In [None]:
%%bigquery plz5_polygons --project solarinsight-383513
SELECT
plz5, polygon
FROM
  `geo_data.plz5_polygone`

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
%%bigquery plz2_polygons --project solarinsight-383513
SELECT
plz2, polygon
FROM
  `geo_data.plz2_polygone`

Query is running:   0%|          |

Downloading:   0%|          |

#### Merge Data and group on PLZ2 and PLZ5 Polygons



In [None]:
import pandas as pd

# retrieves PLZ2 out of the PLZ2 Postal Code

def extract_postal_code(postal_code):
    if len(postal_code) == 5:
        return int(postal_code[:2])
    elif len(postal_code) == 4:
        return int(postal_code[0])
    else:
        return None

aggregation_plz5 = pd.merge(aggregation, plz5_polygons, on = 'plz5')
aggregation_plz5['plz2'] = aggregation_plz5['plz5'].apply(lambda x: extract_postal_code(str(x)))
aggregation_plz5 = pd.merge(aggregation_plz5, plz2_polygons, on = 'plz2')
aggregation_plz5

Unnamed: 0,plz5,potential_production,total_consumption,overproduction,savings_production,savings_feed_in,savings_total,polygon_x,plz2,polygon_y
0,1067,7.331353e+06,3.492069e+07,0.0,288359.685030,0.0,288359.685030,"POLYGON((13.6868862 51.063946, 13.6869811 51.0...",1,"POLYGON((13.6311502 51.5073113, 13.6304967 51...."
1,1069,9.062622e+06,8.195197e+07,0.0,356454.659759,0.0,356454.659759,"MULTIPOLYGON(((13.730314 51.0217019, 13.726345...",1,"POLYGON((13.6311502 51.5073113, 13.6304967 51...."
2,1097,5.672088e+06,5.140970e+07,0.0,223096.832808,0.0,223096.832808,"POLYGON((13.7254824 51.0686032, 13.7300049 51....",1,"POLYGON((13.6311502 51.5073113, 13.6304967 51...."
3,1099,1.376724e+07,8.577160e+07,0.0,541498.722460,0.0,541498.722460,"POLYGON((13.7421761 51.0897907, 13.7419937 51....",1,"POLYGON((13.6311502 51.5073113, 13.6304967 51...."
4,1108,3.524689e+06,1.526659e+07,0.0,138634.463957,0.0,138634.463957,"POLYGON((13.7654335 51.1749103, 13.7628235 51....",1,"POLYGON((13.6311502 51.5073113, 13.6304967 51...."
...,...,...,...,...,...,...,...,...,...,...
8155,99988,3.729797e+06,1.246834e+07,0.0,146701.878616,0.0,146701.878616,"POLYGON((10.2360109 51.1838452, 10.2333046 51....",99,"POLYGON((10.0320274 51.0103861, 10.0305206 51...."
8156,99991,5.780490e+06,1.429907e+07,0.0,227360.520889,0.0,227360.520889,"POLYGON((10.384644 51.0829657, 10.3836686 51.0...",99,"POLYGON((10.0320274 51.0103861, 10.0305206 51...."
8157,99994,6.109110e+06,1.857757e+07,0.0,240285.955907,0.0,240285.955907,"POLYGON((10.5513951 51.2906513, 10.5462779 51....",99,"POLYGON((10.0320274 51.0103861, 10.0305206 51...."
8158,99996,8.557800e+05,4.783000e+06,0.0,33659.879357,0.0,33659.879357,"POLYGON((10.4956798 51.3034737, 10.5006552 51....",99,"POLYGON((10.0320274 51.0103861, 10.0305206 51...."


In [None]:
aggregation_plz2 = aggregation_plz5[['plz2', 'polygon_y', 'potential_production', 'savings_total']].groupby(by = ['plz2', 'polygon_y']).sum().reset_index()
aggregation_plz2

Unnamed: 0,plz2,polygon_y,potential_production,savings_total
0,1,"POLYGON((13.6311502 51.5073113, 13.6304967 51....",8.955213e+08,3.527712e+07
1,2,"POLYGON((14.1349373 51.541292, 14.1314896 51.5...",3.425821e+08,1.351857e+07
2,3,"POLYGON((13.5485672 51.7469385, 13.5468103 51....",2.239359e+08,8.819862e+06
3,4,"MULTIPOLYGON(((12.1898778 51.3622181, 12.19067...",7.690589e+08,3.028562e+07
4,6,"MULTIPOLYGON(((12.1842981 51.3872599, 12.18444...",1.019384e+09,4.009282e+07
...,...,...,...,...
90,95,"POLYGON((11.2931944 49.8866155, 11.2938084 49....",3.739539e+08,1.475072e+07
91,96,"POLYGON((10.4356524 49.7502091, 10.4371049 49....",4.002028e+08,1.581309e+07
92,97,"POLYGON((9.5013397 50.2431399, 9.498539 50.240...",7.799976e+08,3.069235e+07
93,98,"POLYGON((10.1508105 50.5522779, 10.1509346 50....",2.234136e+08,8.790535e+06


#### Export Savings grouped on PLZ2 and PLZ5 level to BigQuery

In [None]:
import pandas_gbq

table_name = 'results.savings_plz5'

pandas_gbq.to_gbq(aggregation_plz5[['plz5', 'polygon_x', 'savings_total']], table_name, project_id=PROJECT_ID, if_exists='replace')

100%|██████████| 1/1 [00:00<00:00, 1874.13it/s]


In [None]:
import pandas_gbq

table_name = 'results.savings_plz2'

pandas_gbq.to_gbq(aggregation_plz2[['plz2', 'polygon_y', 'savings_total', 'potential_production']], table_name, project_id=PROJECT_ID, if_exists='replace')

100%|██████████| 1/1 [00:00<00:00, 2770.35it/s]


#### Transform BigQuery Column of Polygons to Geography for Visualization in Looker Studio

In [None]:
from google.cloud import bigquery

table_name = f'{PROJECT_ID}.results.savings_plz5'

# Create a BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define your query
query = '''
SELECT
  plz5, ST_GEOGFROMTEXT(polygon_x, make_valid => TRUE) AS polygon, savings_total
FROM
  `results.savings_plz5`
'''

# Define the configuration for the query job
job_config = bigquery.QueryJobConfig(destination=table_name,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)

# Start the query job
job = client.query(query, job_config=job_config)

job.result()

<google.cloud.bigquery.table.RowIterator at 0x7f3c49638e50>

In [None]:
from google.cloud import bigquery

table_name = f'{PROJECT_ID}.results.savings_plz2'

# Create a BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define your query
query = '''
SELECT
  plz2, ST_GEOGFROMTEXT(polygon_y, make_valid => TRUE) AS polygon, savings_total, potential_production
FROM
  `results.savings_plz2`
'''

# Define the configuration for the query job
job_config = bigquery.QueryJobConfig(destination=table_name,
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE)

# Start the query job
job = client.query(query, job_config=job_config)

job.result()

<google.cloud.bigquery.table.RowIterator at 0x7f3c4530c430>

# Insights generated from the Data

## Impact of selecting high potential regions over average regions in financial comparison

#### Load PLZ2 Savings

In [None]:
%%bigquery plz2_savings --project solarinsight-383513
SELECT
plz2, savings_total, potential_production
FROM
  `results.savings_plz2`


Query is running:   0%|          |

Downloading:   0%|          |

#### calculate difference in savings of third quartile and mean Postal Codes



In [None]:
# savings of top 25% regions compared to the mean of all regions

third_quartile = plz2_savings['savings_total'].quantile(0.75)
mean = plz2_savings['savings_total'].mean()

average_improvement_perc = (third_quartile - mean) / mean
average_improvement_perc

0.26558643438535223

#### calculate difference in energy production of third quartile and mean Postal Codes



In [None]:
third_quartile = plz2_savings['potential_production'].quantile(0.75)
mean = plz2_savings['potential_production'].mean()

average_improvement = (third_quartile - mean)
average_improvement


158700325.86391222

#### Resulting increased CO2 savings

In [None]:
average_improvement_co2 = average_improvement * 0.627
average_improvement_co2

# Emissionsbilanz Photovoltaik: 627 g [CO2-äq] pro Kilowattstunde Solarstrom
# Quelle: Umweltbundesamt, November 2019 www.umweltbundesamt.de/publikationen/emissionsbilanz-erneuerbarer-energietraeger

99505104.31667297

## Future Scenario: Increase in Electricity Price by 20% - impact on break-even point of solar panels

#### Load Sunroof Data

In [None]:
%%bigquery sunroof_agg --project solarinsight-383513
SELECT
zip_code, pv_capacity_total_kwp, installed_pv_total_kwp
FROM
  `solar_data.sunroof_aggr`


Query is running:   0%|          |

Downloading:   0%|          |

#### Calculate number of potential cells

In [None]:
sunroof_agg['potential_cells'] = (sunroof_agg['pv_capacity_total_kwp'] - sunroof_agg['installed_pv_total_kwp']) * 3
potential_cells = sunroof_agg[['zip_code', 'potential_cells']].copy()
potential_cells

Unnamed: 0,zip_code,potential_cells
0,60315,0.000000
1,82475,0.000000
2,33333,10.519503
3,60310,0.000000
4,60306,0.000000
...,...,...
8165,27432,128593.091149
8166,29664,150445.841172
8167,39288,245686.163731
8168,33102,91658.288510


#### Load Hourly Savings

In [None]:
%%bigquery hourly_savings --project solarinsight-383513
SELECT
*
FROM
  `results.hourly_savings`

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
hourly_savings

Unnamed: 0,plz5,datetime,potential_production,existing_production,total_consumption,DayAheadAuktion,saved_energy,overproduction,savings_production,savings_feed_in
0,22941,2024-06-02 10:00:00+00:00,5218.399536,1338.967758,23188.227302,-0.030886,5218.399536,0.0,-161.174039,0.0
1,97631,2024-06-02 10:00:00+00:00,1972.819393,1566.324460,5187.548895,-0.030886,1972.819393,0.0,-60.931952,0.0
2,77773,2024-02-03 10:00:00+00:00,508.130719,77.177759,1960.178267,0.005063,508.130719,0.0,2.572571,0.0
3,88481,2024-02-03 10:00:00+00:00,1392.688192,293.944517,2136.669523,0.005063,1392.688192,0.0,7.050919,0.0
4,53913,2023-07-10 10:00:00+00:00,5388.253319,1507.843596,19127.316631,0.042590,5388.253319,0.0,229.485188,0.0
...,...,...,...,...,...,...,...,...,...,...
17505991,88486,2023-10-12 15:00:00+00:00,426.448511,376.720561,1839.009645,0.107318,426.448511,0.0,45.765693,0.0
17505992,97259,2023-10-12 15:00:00+00:00,426.814158,133.199866,1746.498208,0.107318,426.814158,0.0,45.804933,0.0
17505993,14548,2024-01-08 15:00:00+00:00,2585.650963,1271.126179,14509.812806,0.074856,2585.650963,0.0,193.550703,0.0
17505994,7743,2024-04-05 15:00:00+00:00,9633.122737,1622.886986,46644.457795,0.053628,9633.122737,0.0,516.602493,0.0


#### Modify Price and calculate new Savings

In [None]:
hourly_savings['price_adjustment'] = 0.2 * hourly_savings['DayAheadAuktion'].abs()
hourly_savings['new_price'] = hourly_savings['DayAheadAuktion'] + hourly_savings['price_adjustment']
hourly_savings['new_savings_production'] = hourly_savings['saved_energy'] * hourly_savings['new_price']
hourly_savings['old_savings_total'] = hourly_savings['savings_production'] + hourly_savings['savings_feed_in']
hourly_savings['new_savings_total'] = hourly_savings['new_savings_production'] + hourly_savings['savings_feed_in']

#### Calculate Average savings per Cell

In [None]:
import pandas as pd

savings_per_cell = hourly_savings[['plz5', 'old_savings_total', 'new_savings_total']].groupby(by = ['plz5']).sum().reset_index()
savings_per_cell = pd.merge(savings_per_cell, potential_cells, left_on='plz5', right_on='zip_code')
savings_per_cell['old_savings_per_cell'] = savings_per_cell['old_savings_total'] / savings_per_cell['potential_cells']
savings_per_cell['new_savings_per_cell'] = savings_per_cell['new_savings_total'] / savings_per_cell['potential_cells']

In [None]:
savings_per_cell

Unnamed: 0,plz5,old_savings_total,new_savings_total,zip_code,potential_cells,old_savings_per_cell,new_savings_per_cell
0,1067,288359.685030,364946.955317,1067,55796.357980,51.680736,65.406949
1,1069,356454.659759,451127.704533,1069,68972.496398,51.680696,65.406898
2,1097,223096.832808,282350.529914,1097,43168.253447,51.680764,65.406985
3,1099,541498.722460,685318.788752,1099,104777.747328,51.680699,65.406902
4,1108,138634.463957,175455.266979,1108,26825.481386,51.680140,65.406195
...,...,...,...,...,...,...,...
8155,99988,146701.878616,185665.357259,99988,28386.576129,51.680019,65.406041
8156,99991,227360.520889,287746.637846,99991,43993.471203,51.680514,65.406668
8157,99994,240285.955907,304105.020799,99994,46494.897015,51.680071,65.406107
8158,99996,33659.879357,42599.819341,99996,6513.608207,51.676242,65.401261


#### Calculate difference in break-even points with increased electricity prices

In [None]:
old_mean = savings_per_cell['old_savings_per_cell'].mean()
new_mean = savings_per_cell['new_savings_per_cell'].mean()
reduced_break_even_years = 7000/(18*old_mean) - 7000/(18*new_mean) # 6 kwp cost 7.000€ -> 18 cells
print(reduced_break_even_years)

# https://gruenes.haus/preise-solarmodule-preisentwicklung/ 7000€ for 6 kwp

1.5967095859411167


In [None]:
# break even from before

7000/(18*old_mean)

7.662644578780129

### Export all Results to BigQuery

In [None]:
results_table = pd.DataFrame()
results_table['impact_savings_perc'] = [average_improvement_perc.round(3)]
results_table['impact_energy_abs'] = [average_improvement]
results_table['impact_co2'] = [average_improvement_co2]
results_table['reduced_break_even_years'] = reduced_break_even_years.round(2)
results_table

Unnamed: 0,impact_savings_perc,impact_energy_abs,impact_co2,reduced_break_even_years
0,0.266,158700300.0,99505100.0,1.6


In [None]:
import pandas_gbq

table_name = 'results.experiment_kpis'

pandas_gbq.to_gbq(results_table[['impact_savings_perc', 'impact_energy_abs', 'impact_co2', 'reduced_break_even_years']], table_name, project_id=PROJECT_ID, if_exists='replace')



100%|██████████| 1/1 [00:00<00:00, 6335.81it/s]


## Varying Feed-in price Impact on break-even point and state expenses

#### Load Hourly Savings

In [4]:
%%bigquery hourly_savings --project solarinsight-383513
SELECT
*
FROM
  `results.hourly_savings`

Query is running:   0%|          |

Downloading:   0%|          |

In [7]:
hourly_savings

Unnamed: 0,plz5,datetime,potential_production,existing_production,total_consumption,DayAheadAuktion,saved_energy,overproduction,savings_production,savings_feed_in
0,65599,2023-08-11 10:00:00+00:00,2265.357724,781.588565,8000.520009,0.067710,2265.357724,0.0,153.386747,0.0
1,2979,2024-06-02 10:00:00+00:00,2600.291707,327.299398,5518.398747,-0.030886,2600.291707,0.0,-80.311888,0.0
2,91361,2023-11-07 10:00:00+00:00,270.397491,149.926422,2597.869390,0.011741,270.397491,0.0,3.174630,0.0
3,14727,2023-11-07 10:00:00+00:00,2463.386056,833.493751,14574.844646,0.011741,2463.386056,0.0,28.921637,0.0
4,65207,2024-02-03 10:00:00+00:00,4304.129548,1183.615316,32408.964655,0.005063,4304.129548,0.0,21.791001,0.0
...,...,...,...,...,...,...,...,...,...,...
17505991,26203,2023-07-16 15:00:00+00:00,6839.788583,902.970639,14694.841776,0.069425,6839.788583,0.0,474.850827,0.0
17505992,95703,2023-10-12 15:00:00+00:00,808.549303,217.820957,3202.116352,0.107318,808.549303,0.0,86.772067,0.0
17505993,18059,2024-01-08 15:00:00+00:00,5474.361554,3677.240372,44164.098663,0.074856,5474.361554,0.0,409.787146,0.0
17505994,85072,2024-04-05 15:00:00+00:00,6191.578569,1711.554205,15801.604718,0.053628,6191.578569,0.0,332.040296,0.0


#### Load Sunroof Data

In [13]:
%%bigquery sunroof_agg --project solarinsight-383513
SELECT
zip_code, pv_capacity_total_kwp, installed_pv_total_kwp
FROM
  `solar_data.sunroof_aggr`


Query is running:   0%|          |

Downloading:   0%|          |

#### Calculate number of Potential Cells

In [14]:
sunroof_agg['potential_cells'] = (sunroof_agg['pv_capacity_total_kwp'] - sunroof_agg['installed_pv_total_kwp']) * 3
potential_cells = sunroof_agg[['zip_code', 'potential_cells']].copy()
potential_cells

Unnamed: 0,zip_code,potential_cells
0,60315,0.000000
1,82475,0.000000
2,33333,10.519503
3,60310,0.000000
4,60306,0.000000
...,...,...
8165,27432,128593.091149
8166,29664,150445.841172
8167,39288,245686.163731
8168,33102,91658.288510


#### Calculate new savings total with different feed-in price

In [20]:
# instead of 8.3 cent/kwh experiment with 10, 15, 20 cent

import pandas as pd

varying_feed_in = hourly_savings[['plz5', 'savings_production', 'overproduction', 'savings_feed_in']].groupby(by=['plz5']).sum().reset_index()

varying_feed_in['feed_in_10'] = varying_feed_in['overproduction'] * 0.1
varying_feed_in['feed_in_15'] = varying_feed_in['overproduction'] * 0.15
varying_feed_in['feed_in_20'] = varying_feed_in['overproduction'] * 0.2

varying_feed_in['savings_total_base'] = varying_feed_in['savings_production'] + varying_feed_in['savings_feed_in']

varying_feed_in['savings_total_10'] = varying_feed_in['savings_production'] + varying_feed_in['feed_in_10']
varying_feed_in['savings_total_15'] = varying_feed_in['savings_production'] + varying_feed_in['feed_in_15']
varying_feed_in['savings_total_20'] = varying_feed_in['savings_production'] + varying_feed_in['feed_in_20']

varying_feed_in = pd.merge(varying_feed_in, potential_cells, left_on='plz5', right_on='zip_code')

# calculate per cell

varying_feed_in['savings_per_cell_base'] = varying_feed_in['savings_total_base'] / varying_feed_in['potential_cells']
varying_feed_in['savings_per_cell_10'] = varying_feed_in['savings_total_10'] / varying_feed_in['potential_cells']
varying_feed_in['savings_per_cell_15'] = varying_feed_in['savings_total_15'] / varying_feed_in['potential_cells']
varying_feed_in['savings_per_cell_20'] = varying_feed_in['savings_total_20'] / varying_feed_in['potential_cells']

#### Calculate with new savings the break-even point

In [24]:
def get_reduced_break_even(column):
  old_mean = varying_feed_in['savings_per_cell_base'].mean()
  new_mean = varying_feed_in[column].mean()
  reduced_break_even_years = 7000/(18*old_mean) - 7000/(18*new_mean) #6 kwp 7.000€ -> 18 cells
  return(reduced_break_even_years.round(2))

print(get_reduced_break_even('savings_per_cell_10'))
print(get_reduced_break_even('savings_per_cell_15'))
print(get_reduced_break_even('savings_per_cell_20'))


0.05
0.18
0.3


#### Calculate the additional expeses for the state subsidy

In [28]:
feed_in_price = 0.083 # cent / kwh

additional_expenses_10ct = varying_feed_in['overproduction'].sum() * (0.1-feed_in_price)
additional_expenses_15ct = varying_feed_in['overproduction'].sum() * (0.15-feed_in_price)
additional_expenses_20ct = varying_feed_in['overproduction'].sum() * (0.2-feed_in_price)

print(additional_expenses_10ct)
print(additional_expenses_15ct)
print(additional_expenses_20ct)

9475513.531502256
37344670.977097124
65213828.422692


In [30]:
results_table = pd.DataFrame()
results_table['reduced_break_even_10ct'] = [get_reduced_break_even('savings_per_cell_10')]
results_table['reduced_break_even_15ct'] = [get_reduced_break_even('savings_per_cell_15')]
results_table['reduced_break_even_20ct'] = [get_reduced_break_even('savings_per_cell_20')]
results_table['additional_expenses_10ct'] = [additional_expenses_10ct.round(0)]
results_table['additional_expenses_15ct'] = [additional_expenses_15ct.round(0)]
results_table['additional_expenses_20ct'] = [additional_expenses_20ct.round(0)]
results_table

Unnamed: 0,reduced_break_even_10ct,reduced_break_even_15ct,reduced_break_even_20ct,additional_expenses_10ct,additional_expenses_15ct,additional_expenses_20ct
0,0.05,0.18,0.3,9475514.0,37344671.0,65213828.0


#### Export results of varying feed-in prices to BigQuery

In [31]:
import pandas_gbq

table_name = 'results.varying_feed_in_tariff'

pandas_gbq.to_gbq(results_table[['reduced_break_even_10ct', 'reduced_break_even_15ct', 'reduced_break_even_20ct', 'additional_expenses_10ct', 'additional_expenses_15ct', 'additional_expenses_20ct']], table_name, project_id=PROJECT_ID, if_exists='replace')



100%|██████████| 1/1 [00:00<00:00, 5497.12it/s]
