# Notebook for the development of module "weather -> photovoltaic output"
### If you develop some new function useful also in other situations, please put it in a dedicated .py file, then import it here

In [1]:
%pip install openmeteo_requests
%pip install requests-cache retry-requests
%pip install pandas
%pip install pvlib
%pip install pgmpy
%pip install sklearn

Collecting openmeteo_requests
  Using cached openmeteo_requests-1.4.0-py3-none-any.whl.metadata (9.7 kB)
Collecting openmeteo-sdk>=1.4.0 (from openmeteo_requests)
  Using cached openmeteo_sdk-1.20.0-py3-none-any.whl.metadata (935 bytes)
Collecting requests (from openmeteo_requests)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting flatbuffers==25.2.10 (from openmeteo-sdk>=1.4.0->openmeteo_requests)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting charset-normalizer<4,>=2 (from requests->openmeteo_requests)
  Downloading charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests->openmeteo_requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests->openmeteo_requests)
  Downloading urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests->openmeteo_reque

svm
lstm long short term memory

In [2]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

#### Set system parameters here

In [3]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 45.9,
	"longitude": 11.9,
	"start_date": "2024-01-01",
	"end_date": "2025-02-28",
	"hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "snowfall", "rain", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "shortwave_radiation", "direct_radiation", "diffuse_radiation", "direct_normal_irradiance", "global_tilted_irradiance", "terrestrial_radiation"],
	"timezone": "Europe/Berlin"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 45.86994552612305°N 11.96202564239502°E
Elevation 308.0 m asl
Timezone b'Europe/Berlin'b'GMT+2'
Timezone difference to GMT+0 7200 s


#### Weather request start and end time

In [4]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(5).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(6).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(7).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(8).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(9).ValuesAsNumpy()
hourly_direct_radiation = hourly.Variables(10).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(11).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(12).ValuesAsNumpy()
hourly_global_tilted_irradiance = hourly.Variables(13).ValuesAsNumpy()
hourly_terrestrial_radiation = hourly.Variables(14).ValuesAsNumpy()

#### Call to open-weather

In [5]:
hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["snowfall"] = hourly_snowfall
hourly_data["rain"] = hourly_rain
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["shortwave_radiation"] = hourly_shortwave_radiation
hourly_data["direct_radiation"] = hourly_direct_radiation
hourly_data["diffuse_radiation"] = hourly_diffuse_radiation
hourly_data["direct_normal_irradiance"] = hourly_direct_normal_irradiance
hourly_data["global_tilted_irradiance"] = hourly_global_tilted_irradiance
hourly_data["terrestrial_radiation"] = hourly_terrestrial_radiation

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

                           date  temperature_2m  precipitation  \
0     2023-12-31 22:00:00+00:00          5.6415            1.1   
1     2023-12-31 23:00:00+00:00          5.3915            0.2   
2     2024-01-01 00:00:00+00:00          5.2415            0.1   
3     2024-01-01 01:00:00+00:00          6.0915            0.1   
4     2024-01-01 02:00:00+00:00          5.6415            0.1   
...                         ...             ...            ...   
10195 2025-02-28 17:00:00+00:00          7.1915            0.0   
10196 2025-02-28 18:00:00+00:00          5.9915            0.0   
10197 2025-02-28 19:00:00+00:00          5.7415            0.0   
10198 2025-02-28 20:00:00+00:00          5.1915            0.0   
10199 2025-02-28 21:00:00+00:00          5.6415            0.0   

       wind_speed_10m  snowfall  rain  cloud_cover  cloud_cover_low  \
0            6.287130       0.0   1.1        100.0             83.0   
1            6.519877       0.0   0.2        100.0             40

In [6]:
# --- Your Open-Meteo data retrieval code ---
# ... (previous code to get data into hourly_data dictionary) ...

hourly_dataframe = pd.DataFrame(data = hourly_data)

# --- ENSURE THIS LINE IS PRESENT AND EXECUTED ---
hourly_dataframe = hourly_dataframe.set_index("date")
# ---------------------------------------------

print("Hourly DataFrame head AFTER setting index:")
print(hourly_dataframe.head()) # Add this to verify the index is set
print("Hourly DataFrame index type:", type(hourly_dataframe.index)) # Verify index type

# --- End of your data retrieval code ---

# --- Start PVLib Calculation ---
# ... (the rest of the pvlib code should now work correctly) ...

Hourly DataFrame head AFTER setting index:
                           temperature_2m  precipitation  wind_speed_10m  \
date                                                                       
2023-12-31 22:00:00+00:00          5.6415            1.1        6.287130   
2023-12-31 23:00:00+00:00          5.3915            0.2        6.519877   
2024-01-01 00:00:00+00:00          5.2415            0.1        4.104631   
2024-01-01 01:00:00+00:00          6.0915            0.1        4.198285   
2024-01-01 02:00:00+00:00          5.6415            0.1        4.024922   

                           snowfall  rain  cloud_cover  cloud_cover_low  \
date                                                                      
2023-12-31 22:00:00+00:00       0.0   1.1        100.0             83.0   
2023-12-31 23:00:00+00:00       0.0   0.2        100.0             40.0   
2024-01-01 00:00:00+00:00       0.0   0.1         98.0             92.0   
2024-01-01 01:00:00+00:00       0.0   0.1        

In [None]:
# --- Start PVLib Calculation ---
import pvlib
from pvlib.location import Location
from pvlib.pvsystem import PVSystem, retrieve_sam
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
from pvlib.modelchain import ModelChain
import numpy as np # Add numpy import if not already there

print("\n--- Starting PVLib Calculation ---")

# 1. Prepare Weather Data DataFrame for pvlib
weather_df = hourly_dataframe.copy()
weather_df.rename(columns={
    'shortwave_radiation': 'ghi',         # Global Horizontal Irradiance
    'diffuse_radiation': 'dhi',           # Diffuse Horizontal Irradiance
    'direct_normal_irradiance': 'dni',    # Direct Normal Irradiance
    'temperature_2m': 'temp_air',       # Ambient Air Temperature
    'wind_speed_10m': 'wind_speed',       # Wind Speed
}, inplace=True)

# Ensure required columns exist
required_cols = ['ghi', 'dhi', 'dni', 'temp_air', 'wind_speed']
missing_cols = [col for col in required_cols if col not in weather_df.columns]
if missing_cols:
    raise ValueError(f"Missing required weather columns for PVLib: {missing_cols}")

# Optional: Handle potential NaN values more robustly
if weather_df[required_cols].isnull().values.any():
     print("Warning: NaN values detected in required weather data columns. Imputing with ffill and 0.")
     # Example: Forward fill first, then fill remaining (usually at start) with 0
     weather_df.ffill(inplace=True)
     weather_df.fillna(0, inplace=True)

# --- DEBUG: Inspect weather data just before model run ---
print("\nWeather Data Input to ModelChain (first 5 rows):")
print(weather_df[required_cols].head())
print("\nWeather Data Input Summary:")
print(weather_df[required_cols].describe())
# Check for any negative irradiance values after potential NaN filling
print(f"Any negative GHI/DHI/DNI? GHI: {(weather_df['ghi'] < 0).any()}, DHI: {(weather_df['dhi'] < 0).any()}, DNI: {(weather_df['dni'] < 0).any()}")
# --- END DEBUG ---


# 2. Define Location
latitude = response.Latitude()
longitude = response.Longitude()
altitude = response.Elevation()
tz_bytes = response.Timezone()
tz = tz_bytes.decode('utf-8') if isinstance(tz_bytes, bytes) else tz_bytes

# ... (previous code) ...

location = Location(latitude=latitude, longitude=longitude, altitude=altitude, tz=tz) # Use the decoded string





# 3. Define System Parameters (CRITICAL: Choose appropriate components!)
# (Rest of the system definition remains the same as before)
# ... rest of the script ...

# 3. Define System Parameters
sandia_modules = retrieve_sam('SandiaMod')
cec_inverters = retrieve_sam('cecinverter')

module_name = 'Canadian_Solar_Inc__CS6K_275M'
inverter_name = 'SMA_America__SB7000TL_US__240V_'

if module_name not in sandia_modules:
     print(f"Warning: Module '{module_name}' not found in Sandia database. Trying CEC...")
     cec_modules = retrieve_sam('CECMod')
     module_name_cec = 'Canadian_Solar_Inc__CS6X_300M'
     if module_name_cec in cec_modules:
          print(f"Using CEC module: {module_name_cec}")
          module_parameters = cec_modules[module_name_cec]
          module_db = cec_modules
     else:
          raise KeyError(f"Example modules not found in Sandia or CEC databases. Please select an available module.")
else:
     module_parameters = sandia_modules[module_name]
     module_db = sandia_modules

if inverter_name not in cec_inverters:
     print(f"Warning: Inverter '{inverter_name}' not found in CEC database. Trying alternative...")
     inverter_name = 'SMA_America__SB5000TL_US_22__240V_'
     if inverter_name not in cec_inverters:
          raise KeyError(f"Example inverters not found in CEC database. Please select an available inverter.")
     else:
          inverter_parameters = cec_inverters[inverter_name]
else:
     inverter_parameters = cec_inverters[inverter_name]

surface_tilt = 30
surface_azimuth = 180
temp_params = TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']

modules_per_string = 1
strings_per_inverter = 1

system = PVSystem(surface_tilt=surface_tilt,
                  surface_azimuth=surface_azimuth,
                  module_parameters=module_parameters,
                  inverter_parameters=inverter_parameters,
                  temperature_model_parameters=temp_params,
                  modules_per_string=modules_per_string,
                  strings_per_inverter=strings_per_inverter)

# 5. Create ModelChain object
mc = ModelChain(system, location,
                aoi_model="physical",
                spectral_model="no_loss")

print(f"\nPV System Configuration:")
print(f"- Module: {module_parameters.Name if hasattr(module_parameters, 'Name') else module_name}") # Use .Name attribute
print(f"- Inverter: {inverter_parameters.Name if hasattr(inverter_parameters, 'Name') else inverter_name}")
print(f"- Modules per String: {modules_per_string}")
print(f"- Strings per Inverter: {strings_per_inverter}")
print(f"- Tilt: {surface_tilt} deg, Azimuth: {surface_azimuth} deg")


# 6. Run the Simulation
print("\nRunning model...")
mc.run_model(weather=weather_df)
print("Model run complete.")

# --- DEBUG: Inspect intermediate results ---
# Removed the incorrect mc.results.poa_global line
# print("\nPlane of Array Irradiance (first 5 rows):") # This line was removed
# print(mc.results.poa_global.head())                 # This line was removed
print("\nEffective Irradiance (first 5 rows):")
print(mc.results.effective_irradiance.head())
print("\nCell Temperature (first 5 rows):")
print(mc.results.cell_temperature.head())
print("\nDC Power Output (first 5 rows):")
print(mc.results.dc.head())
# --- END DEBUG ---


# 7. Analyze Results
print("\n--- Simulation Results ---")
print("Calculated AC Power Output (first 5 rows):")
print(mc.results.ac.head())

hourly_dataframe['ac_power_calculated_watts'] = mc.results.ac

print("\nAC Power Output DataFrame :")
print(hourly_dataframe[['ac_power_calculated_watts']].tail(24))


# Check for positive AC power before calculating sum
positive_ac = mc.results.ac[mc.results.ac > 0]
if not positive_ac.empty:
    # Calculate interval in hours
    if weather_df.index.freq:
        interval_hours = weather_df.index.freq.total_seconds() / 3600
    else:
        # Estimate interval if freq is not set (less reliable)
        interval_hours = (weather_df.index[1] - weather_df.index[0]).total_seconds() / 3600
        print(f"Warning: Inferring interval as {interval_hours} hours.")

    total_energy_kwh = mc.results.ac.sum() * interval_hours / 1000 # Wh to kWh
    print(f"\nTotal Estimated AC Energy Production for the period: {total_energy_kwh:.2f} kWh")
else:
    # If the sum is negative or zero, it means only losses or no production
    total_energy_kwh = mc.results.ac.sum() * (hourly.Interval() / 3600) / 1000 # Calculate anyway for consistency
    print(f"\nNo significant positive AC power generated. Calculated sum (likely losses): {total_energy_kwh:.2f} kWh")


--- Starting PVLib Calculation ---

Weather Data Input to ModelChain (first 5 rows):
                           ghi  dhi  dni  temp_air  wind_speed
date                                                          
2023-12-31 22:00:00+00:00  0.0  0.0  0.0    5.6415    6.287130
2023-12-31 23:00:00+00:00  0.0  0.0  0.0    5.3915    6.519877
2024-01-01 00:00:00+00:00  0.0  0.0  0.0    5.2415    4.104631
2024-01-01 01:00:00+00:00  0.0  0.0  0.0    6.0915    4.198285
2024-01-01 02:00:00+00:00  0.0  0.0  0.0    5.6415    4.024922

Weather Data Input Summary:
                ghi           dhi           dni      temp_air    wind_speed
count  10200.000000  10200.000000  10200.000000  10200.000000  10200.000000
mean     141.382935     53.533138    173.919418     11.679922      4.431945
std      213.496750     75.555809    259.354462      8.241317      2.403467
min        0.000000      0.000000      0.000000     -5.908500      0.000000
25%        0.000000      0.000000      0.000000      5.191500   

Training - test

In [8]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn import svm

In [9]:
X = hourly_dataframe.drop(columns=['ac_power_calculated_watts'], axis=1)
Y = hourly_dataframe['ac_power_calculated_watts']

print('X shape: ', X.shape)
print('Y shape: ', Y.shape)

X shape:  (10200, 15)
Y shape:  (10200,)


In [10]:
X_test_len = int(X.shape[0] * 0.2)

print("Amount of data for training and deciding parameters:", X.shape[0] - X_test_len)
print("Amount of data for test:", X_test_len)

Amount of data for training and deciding parameters: 8160
Amount of data for test: 2040


In [11]:

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = X_test_len, random_state = 42)


print('X_train shape: ', X_train.shape)
print('Y_train shape: ', Y_train.shape)
print('X_test shape: ', X_test.shape)
print('Y_test shape: ', Y_test.shape)


X_train shape:  (8160, 15)
Y_train shape:  (8160,)
X_test shape:  (2040, 15)
Y_test shape:  (2040,)


(0-date  1-temperature_2m  2-precipitation  3-wind_speed_10m 4-snowfall  5-rain  6-cloud_cover  7-cloud_cover_low  8-cloud_cover_mid  9-cloud_cover_high 10-shortwave_radiation  11-direct_radiation 12-diffuse_radiation  13-direct_normal_irradiance  14-global_tilted_irradiance 15-terrestrial_radiation)

In [12]:

#X = hourly_dataframe.drop(columns=['ac_power_calculated_watts'], axis=1)

scaler = StandardScaler()

X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()


X_train_scaled[X_train.columns.difference(['precipitation'])] = scaler.fit_transform(X_train[X_train.columns.difference(['precipitation'])])
X_test_scaled[X_test.columns.difference(['precipitation'])] = scaler.transform(X_test[X_test.columns.difference(['precipitation'])])

print('X_train shape: ', X_train_scaled.shape)
print('X_test shape: ', X_test_scaled.shape)

print('x_test', X_test_scaled)

X_train shape:  (8160, 15)
X_test shape:  (2040, 15)
x_test                            temperature_2m  precipitation  wind_speed_10m  \
date                                                                       
2025-02-23 20:00:00+00:00       -0.668726            0.0       -1.339605   
2024-06-04 20:00:00+00:00        0.392623            0.0       -0.136630   
2025-02-23 10:00:00+00:00       -0.168550            0.0       -0.169727   
2024-08-26 09:00:00+00:00        1.783356            0.0        0.153351   
2025-01-09 09:00:00+00:00       -0.699224            0.5        0.401241   
...                                   ...            ...             ...   
2024-08-28 06:00:00+00:00        1.100189            0.0        1.378948   
2024-07-23 08:00:00+00:00        1.643063            0.0        0.401241   
2024-10-08 00:00:00+00:00        0.240130            0.1       -0.096141   
2024-04-27 10:00:00+00:00        0.185233            0.1        0.853835   
2024-07-03 20:00:00+00:00   

In [15]:
param_grid = {
            'C':[0.01,0.1,1,10,100,1000],
            'gamma': [0.001, 0.01, 0.1, 1],
            'kernel':['poly','rbf'],
            'degree':[2,3,4],
        }
kfold_num = 5

model = svm.SVR(max_iter=1000000)

grid = GridSearchCV(model, n_jobs=-3, refit=True, cv=kfold_num, verbose=2, param_grid=param_grid)

grid.fit(X_train_scaled,Y_train)

best_params = grid.best_params_
best_estimator = grid.best_estimator_

Fitting 5 folds for each of 144 candidates, totalling 720 fits




In [53]:
print("Best parameters: ", best_params)
print("Best estimator test score: ", best_estimator.score(X_test_scaled, Y_test))
print("Best estimator train score: ", best_estimator.score(X_train_scaled, Y_train))

Best parameters:  {'C': 1000, 'degree': 2, 'gamma': 0.1, 'kernel': 'rbf'}
Best estimator test score:  0.9859423063885407
Best estimator train score:  0.9925730133083225


In [54]:
import joblib
import os
model_output_path = 'output/model'
model_filename = 'best_estimator_model.pkl'
if not os.path.exists(model_output_path):
    os.makedirs(model_output_path)
joblib.dump(best_estimator, os.path.join(model_output_path, model_filename))

['output/model/best_estimator_model.pkl']

In [None]:
scaler_filename = 'scaler.pkl'
scaler_save_path = os.path.join(model_output_path, scaler_filename)
joblib.dump(scaler, scaler_save_path) 
print(f"saved in: {os.path.abspath(scaler_save_path)}")

saved in: /home/gian/Desktop/AI_for_social_good/weather_pv_conversion/output/model/scaler.pkl


## TEST BEST MODEL ON NEW DATA (NEITHER PREVIOUSLY TRAIN OR TEST DATA)

In [55]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
new_params = {
	"latitude": 45.9,
	"longitude": 11.9,
	"start_date": "2025-03-01",
	"end_date": "2025-04-20",
	"hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "snowfall", "rain", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "shortwave_radiation", "direct_radiation", "diffuse_radiation", "direct_normal_irradiance", "global_tilted_irradiance", "terrestrial_radiation"],
	"timezone": "Europe/Berlin"
}
new_responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
new_response = new_responses[0]
print(f"Coordinates {new_response.Latitude()}°N {new_response.Longitude()}°E")
print(f"Elevation {new_response.Elevation()} m asl")
print(f"Timezone {new_response.Timezone()}{new_response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {new_response.UtcOffsetSeconds()} s")

Coordinates 45.86994552612305°N 11.96202564239502°E
Elevation 308.0 m asl
Timezone b'Europe/Berlin'b'GMT+2'
Timezone difference to GMT+0 7200 s


In [56]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = new_response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(5).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(6).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(7).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(8).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(9).ValuesAsNumpy()
hourly_direct_radiation = hourly.Variables(10).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(11).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(12).ValuesAsNumpy()
hourly_global_tilted_irradiance = hourly.Variables(13).ValuesAsNumpy()
hourly_terrestrial_radiation = hourly.Variables(14).ValuesAsNumpy()

In [57]:
new_hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

new_hourly_data["temperature_2m"] = hourly_temperature_2m
new_hourly_data["precipitation"] = hourly_precipitation
new_hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
new_hourly_data["snowfall"] = hourly_snowfall
new_hourly_data["rain"] = hourly_rain
new_hourly_data["cloud_cover"] = hourly_cloud_cover
new_hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
new_hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
new_hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
new_hourly_data["shortwave_radiation"] = hourly_shortwave_radiation
new_hourly_data["direct_radiation"] = hourly_direct_radiation
new_hourly_data["diffuse_radiation"] = hourly_diffuse_radiation
new_hourly_data["direct_normal_irradiance"] = hourly_direct_normal_irradiance
new_hourly_data["global_tilted_irradiance"] = hourly_global_tilted_irradiance
new_hourly_data["terrestrial_radiation"] = hourly_terrestrial_radiation

new_hourly_dataframe = pd.DataFrame(data = new_hourly_data)
print(new_hourly_dataframe)

                           date  temperature_2m  precipitation  \
0     2023-12-31 22:00:00+00:00          5.6415            1.1   
1     2023-12-31 23:00:00+00:00          5.3915            0.2   
2     2024-01-01 00:00:00+00:00          5.2415            0.1   
3     2024-01-01 01:00:00+00:00          6.0915            0.1   
4     2024-01-01 02:00:00+00:00          5.6415            0.1   
...                         ...             ...            ...   
10195 2025-02-28 17:00:00+00:00          7.1915            0.0   
10196 2025-02-28 18:00:00+00:00          5.9915            0.0   
10197 2025-02-28 19:00:00+00:00          5.7415            0.0   
10198 2025-02-28 20:00:00+00:00          5.1915            0.0   
10199 2025-02-28 21:00:00+00:00          5.6415            0.0   

       wind_speed_10m  snowfall  rain  cloud_cover  cloud_cover_low  \
0            6.287130       0.0   1.1        100.0             83.0   
1            6.519877       0.0   0.2        100.0             40

In [58]:
# --- Your Open-Meteo data retrieval code ---
# ... (previous code to get data into new_hourly_data dictionary) ...

new_hourly_dataframe = pd.DataFrame(data = new_hourly_data)

# --- ENSURE THIS LINE IS PRESENT AND EXECUTED ---
new_hourly_dataframe = new_hourly_dataframe.set_index("date")
# ---------------------------------------------

print("Hourly DataFrame head AFTER setting index:")
print(new_hourly_dataframe.head()) # Add this to verify the index is set
print("Hourly DataFrame index type:", type(new_hourly_dataframe.index)) # Verify index type

# --- End of your data retrieval code ---

# --- Start PVLib Calculation ---
# ... (the rest of the pvlib code should now work correctly) ...

Hourly DataFrame head AFTER setting index:
                           temperature_2m  precipitation  wind_speed_10m  \
date                                                                       
2023-12-31 22:00:00+00:00          5.6415            1.1        6.287130   
2023-12-31 23:00:00+00:00          5.3915            0.2        6.519877   
2024-01-01 00:00:00+00:00          5.2415            0.1        4.104631   
2024-01-01 01:00:00+00:00          6.0915            0.1        4.198285   
2024-01-01 02:00:00+00:00          5.6415            0.1        4.024922   

                           snowfall  rain  cloud_cover  cloud_cover_low  \
date                                                                      
2023-12-31 22:00:00+00:00       0.0   1.1        100.0             83.0   
2023-12-31 23:00:00+00:00       0.0   0.2        100.0             40.0   
2024-01-01 00:00:00+00:00       0.0   0.1         98.0             92.0   
2024-01-01 01:00:00+00:00       0.0   0.1        

In [59]:
# --- Start PVLib Calculation ---
import pvlib
from pvlib.location import Location
from pvlib.pvsystem import PVSystem, retrieve_sam
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
from pvlib.modelchain import ModelChain
import numpy as np # Add numpy import if not already there

print("\n--- Starting PVLib Calculation ---")

# 1. Prepare Weather Data DataFrame for pvlib
new_weather_df = new_hourly_dataframe.copy()
new_weather_df.rename(columns={
    'shortwave_radiation': 'ghi',         # Global Horizontal Irradiance
    'diffuse_radiation': 'dhi',           # Diffuse Horizontal Irradiance
    'direct_normal_irradiance': 'dni',    # Direct Normal Irradiance
    'temperature_2m': 'temp_air',       # Ambient Air Temperature
    'wind_speed_10m': 'wind_speed',       # Wind Speed
}, inplace=True)

# Ensure required columns exist
required_cols = ['ghi', 'dhi', 'dni', 'temp_air', 'wind_speed']
missing_cols = [col for col in required_cols if col not in new_weather_df.columns]
if missing_cols:
    raise ValueError(f"Missing required weather columns for PVLib: {missing_cols}")

# Optional: Handle potential NaN values more robustly
if new_weather_df[required_cols].isnull().values.any():
     print("Warning: NaN values detected in required weather data columns. Imputing with ffill and 0.")
     # Example: Forward fill first, then fill remaining (usually at start) with 0
     new_weather_df.ffill(inplace=True)
     new_weather_df.fillna(0, inplace=True)

# --- DEBUG: Inspect weather data just before model run ---
print("\nWeather Data Input to ModelChain (first 5 rows):")
print(new_weather_df[required_cols].head())
print("\nWeather Data Input Summary:")
print(new_weather_df[required_cols].describe())
# Check for any negative irradiance values after potential NaN filling
print(f"Any negative GHI/DHI/DNI? GHI: {(new_weather_df['ghi'] < 0).any()}, DHI: {(new_weather_df['dhi'] < 0).any()}, DNI: {(new_weather_df['dni'] < 0).any()}")
# --- END DEBUG ---


# 2. Define Location
latitude = new_response.Latitude()
longitude = new_response.Longitude()
altitude = new_response.Elevation()
tz_bytes = new_response.Timezone()
tz = tz_bytes.decode('utf-8') if isinstance(tz_bytes, bytes) else tz_bytes

# ... (previous code) ...

location = Location(latitude=latitude, longitude=longitude, altitude=altitude, tz=tz) # Use the decoded string





# 3. Define System Parameters (CRITICAL: Choose appropriate components!)
# (Rest of the system definition remains the same as before)
# ... rest of the script ...

# 3. Define System Parameters
sandia_modules = retrieve_sam('SandiaMod')
cec_inverters = retrieve_sam('cecinverter')

module_name = 'Canadian_Solar_Inc__CS6K_275M'
inverter_name = 'SMA_America__SB7000TL_US__240V_'

if module_name not in sandia_modules:
     print(f"Warning: Module '{module_name}' not found in Sandia database. Trying CEC...")
     cec_modules = retrieve_sam('CECMod')
     module_name_cec = 'Canadian_Solar_Inc__CS6X_300M'
     if module_name_cec in cec_modules:
          print(f"Using CEC module: {module_name_cec}")
          module_parameters = cec_modules[module_name_cec]
          module_db = cec_modules
     else:
          raise KeyError(f"Example modules not found in Sandia or CEC databases. Please select an available module.")
else:
     module_parameters = sandia_modules[module_name]
     module_db = sandia_modules

if inverter_name not in cec_inverters:
     print(f"Warning: Inverter '{inverter_name}' not found in CEC database. Trying alternative...")
     inverter_name = 'SMA_America__SB5000TL_US_22__240V_'
     if inverter_name not in cec_inverters:
          raise KeyError(f"Example inverters not found in CEC database. Please select an available inverter.")
     else:
          inverter_parameters = cec_inverters[inverter_name]
else:
     inverter_parameters = cec_inverters[inverter_name]

surface_tilt = 30
surface_azimuth = 180
temp_params = TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']

modules_per_string = 1
strings_per_inverter = 1

system = PVSystem(surface_tilt=surface_tilt,
                  surface_azimuth=surface_azimuth,
                  module_parameters=module_parameters,
                  inverter_parameters=inverter_parameters,
                  temperature_model_parameters=temp_params,
                  modules_per_string=modules_per_string,
                  strings_per_inverter=strings_per_inverter)

# 5. Create ModelChain object
mc = ModelChain(system, location,
                aoi_model="physical",
                spectral_model="no_loss")

print(f"\nPV System Configuration:")
print(f"- Module: {module_parameters.Name if hasattr(module_parameters, 'Name') else module_name}") # Use .Name attribute
print(f"- Inverter: {inverter_parameters.Name if hasattr(inverter_parameters, 'Name') else inverter_name}")
print(f"- Modules per String: {modules_per_string}")
print(f"- Strings per Inverter: {strings_per_inverter}")
print(f"- Tilt: {surface_tilt} deg, Azimuth: {surface_azimuth} deg")


# 6. Run the Simulation
print("\nRunning model...")
mc.run_model(weather=new_weather_df)
"""            - ``'dni'``
            - ``'ghi'``
            - ``'dhi'``

            Optional columns are:

            - ``'temp_air'``
            - ``'cell_temperature'``
            - ``'module_temperature'``
            - ``'wind_speed'``
            - ``'albedo'``"""
print("Model run complete.")

# --- DEBUG: Inspect intermediate results ---
# Removed the incorrect mc.results.poa_global line
# print("\nPlane of Array Irradiance (first 5 rows):") # This line was removed
# print(mc.results.poa_global.head())                 # This line was removed
print("\nEffective Irradiance (first 5 rows):")
print(mc.results.effective_irradiance.head())
print("\nCell Temperature (first 5 rows):")
print(mc.results.cell_temperature.head())
print("\nDC Power Output (first 5 rows):")
print(mc.results.dc.head())
# --- END DEBUG ---


# 7. Analyze Results
print("\n--- Simulation Results ---")
print("Calculated AC Power Output (first 5 rows):")
print(mc.results.ac.head())

new_hourly_dataframe['ac_power_calculated_watts'] = mc.results.ac

print("\nAC Power Output DataFrame :")
print(new_hourly_dataframe[['ac_power_calculated_watts']].tail(24))


# Check for positive AC power before calculating sum
positive_ac = mc.results.ac[mc.results.ac > 0]
if not positive_ac.empty:
    # Calculate interval in hours
    if new_weather_df.index.freq:
        interval_hours = new_weather_df.index.freq.total_seconds() / 3600
    else:
        # Estimate interval if freq is not set (less reliable)
        interval_hours = (new_weather_df.index[1] - new_weather_df.index[0]).total_seconds() / 3600
        print(f"Warning: Inferring interval as {interval_hours} hours.")

    total_energy_kwh = mc.results.ac.sum() * interval_hours / 1000 # Wh to kWh
    print(f"\nTotal Estimated AC Energy Production for the period: {total_energy_kwh:.2f} kWh")
else:
    # If the sum is negative or zero, it means only losses or no production
    total_energy_kwh = mc.results.ac.sum() * (hourly.Interval() / 3600) / 1000 # Calculate anyway for consistency
    print(f"\nNo significant positive AC power generated. Calculated sum (likely losses): {total_energy_kwh:.2f} kWh")


--- Starting PVLib Calculation ---

Weather Data Input to ModelChain (first 5 rows):
                           ghi  dhi  dni  temp_air  wind_speed
date                                                          
2023-12-31 22:00:00+00:00  0.0  0.0  0.0    5.6415    6.287130
2023-12-31 23:00:00+00:00  0.0  0.0  0.0    5.3915    6.519877
2024-01-01 00:00:00+00:00  0.0  0.0  0.0    5.2415    4.104631
2024-01-01 01:00:00+00:00  0.0  0.0  0.0    6.0915    4.198285
2024-01-01 02:00:00+00:00  0.0  0.0  0.0    5.6415    4.024922

Weather Data Input Summary:
                ghi           dhi           dni      temp_air    wind_speed
count  10200.000000  10200.000000  10200.000000  10200.000000  10200.000000
mean     141.382935     53.533138    173.919418     11.679922      4.431945
std      213.496750     75.555809    259.354462      8.241317      2.403467
min        0.000000      0.000000      0.000000     -5.908500      0.000000
25%        0.000000      0.000000      0.000000      5.191500   

In [60]:
X = new_hourly_dataframe.drop(columns=['ac_power_calculated_watts'], axis=1)
Y = new_hourly_dataframe['ac_power_calculated_watts']

print('X shape: ', X.shape)
print('Y shape: ', Y.shape)

X shape:  (10200, 15)
Y shape:  (10200,)


In [61]:
X_scaled = X.copy()
X_scaled[X_test.columns.difference(['precipitation'])] = scaler.transform(X[X.columns.difference(['precipitation'])])

print('X_test shape: ', X_scaled.shape)

print('x_test', X_scaled)

X_test shape:  (10200, 15)
x_test                            temperature_2m  precipitation  wind_speed_10m  \
date                                                                       
2023-12-31 22:00:00+00:00       -0.741922            1.1        0.769601   
2023-12-31 23:00:00+00:00       -0.772421            0.2        0.866244   
2024-01-01 00:00:00+00:00       -0.790720            0.1       -0.136630   
2024-01-01 01:00:00+00:00       -0.687025            0.1       -0.097743   
2024-01-01 02:00:00+00:00       -0.741922            0.1       -0.169727   
...                                   ...            ...             ...   
2025-02-28 17:00:00+00:00       -0.552831            0.0       -1.302018   
2025-02-28 18:00:00+00:00       -0.699224            0.0       -1.368279   
2025-02-28 19:00:00+00:00       -0.729723            0.0       -0.697668   
2025-02-28 20:00:00+00:00       -0.796820            0.0       -0.189904   
2025-02-28 21:00:00+00:00       -0.741922            0

In [62]:
print("Best estimator test (new data) score: ", best_estimator.score(X_scaled, Y))

Best estimator test (new data) score:  0.9912450642213819
