# Comparison CUTonalá and PVlib forecast using GFS

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pvlib
from pvlib.pvsystem import PVSystem
from pvlib.location import Location
from pvlib.modelchain import ModelChain
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS

In [2]:
temperature_model_parameters = TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']

In [3]:
cec_modules = pvlib.pvsystem.retrieve_sam('CECMod')
cec_inverters = pvlib.pvsystem.retrieve_sam('cecinverter')

cec_module = cec_modules['Canadian_Solar_Inc__CS6X_320P']
cec_inverter = cec_inverters['INGETEAM_POWER_TECHNOLOGY_S_A___Ingecon_Sun_40TL_U_M__480V_']

In [4]:
#cec_module

In [5]:
#cec_inverter

In [6]:
latitude= 20.56
longitude= -103.22
altitude=1544
tz='America/Mexico_City'

In [7]:
location = Location(latitude=latitude, longitude=longitude, altitude=altitude, tz=tz)

In [8]:
surface_tilt = 25
surface_azimuth = 180 # pvlib uses 0=North, 90=East, 180=South, 270=West convention
albedo = 0.2

In [9]:
losses = pvlib.pvsystem.pvwatts_losses(soiling=7, age=2)
losses

20.090364440085963

In [10]:
system = PVSystem(surface_tilt=surface_tilt, surface_azimuth=surface_azimuth,
                 module_parameters=cec_module,
                 inverter_parameters=cec_inverter,
                 temperature_model_parameters=temperature_model_parameters,
                 albedo=albedo, surface_type=None, module=None, 
                 module_type='glass_polymer',
                 modules_per_string=19.5, 
                 strings_per_inverter=8, inverter=None, 
                 racking_model='open_rack', 
                 losses_parameters=losses, 
                 name='Name of PV system: Huerto de Cutonalá')

In [11]:
system

PVSystem:
  name: Name of PV system: Huerto de Cutonalá
  surface_tilt: 25
  surface_azimuth: 180
  module: None
  inverter: None
  albedo: 0.2
  racking_model: open_rack
  module_type: glass_polymer
  temperature_model_parameters: {'a': -3.47, 'b': -0.0594, 'deltaT': 3}

In [12]:
mc = ModelChain(system, location, name='Huerto de Cutonalá', 
                clearsky_model='ineichen', transposition_model='haydavies', 
                solar_position_method='nrel_numpy', airmass_model='kastenyoung1989',
                dc_model=None, #from module specifications
                ac_model='sandia', #from inverter specifications
                aoi_model='no_loss', 
                spectral_model='no_loss', #spectral model 'no loss' otherwise error
                temperature_model='sapm', losses_model='no_loss')

In [13]:
mc

ModelChain: 
  name: Huerto de Cutonalá
  orientation_strategy: None
  clearsky_model: ineichen
  transposition_model: haydavies
  solar_position_method: nrel_numpy
  airmass_model: kastenyoung1989
  dc_model: cec
  ac_model: snlinverter
  aoi_model: sapm_aoi_loss
  spectral_model: sapm_spectral_loss
  temperature_model: sapm_temp
  losses_model: no_extra_losses

In [14]:
import datetime
#Variables de tiempo

start = pd.Timestamp(datetime.date.today(), tz='America/Mexico_City') # today's date
end = start + pd.Timedelta(days=7) # 7 days from today

#start_variable stand for a configurable variable for one given date
#end_variable stand for a configurable variable for one given end date
#days_before_today variable stands for an especific date before today

days_before_today = 10

start_variable = start - pd.Timedelta(days=days_before_today)
end_variable = start_variable + pd.Timedelta(days=1)

In [15]:
start_variable

Timestamp('2021-01-23 00:00:00-0600', tz='America/Mexico_City')

In [16]:
end_variable

Timestamp('2021-01-24 00:00:00-0600', tz='America/Mexico_City')

## Variables for files

In [17]:
study_day = '23'
month = 'Enero'

cutonala_real_data_file = './Real Data Cutonalá/Enero 18-31/2021-' + study_day + '-Enero.csv'
comparision_data_to_csv_date = './comparision data/' + study_day + '-' + month + '-2021-GFS'
metrics_data = './comparision data/' + study_day + '-' + month + '-2021-GFS-Metrics'

In [18]:

from pvlib.forecast import GFS, NAM, NDFD, RAP, HRRR



In [19]:
# Se define el modelo de pronostico
fm = GFS()
#fm = NAM()
#fm = NDFD()
#fm = RAP()
#fm = HRRR()

In [20]:
# Se obtiene la data y se procesa del modelo de pronostico
forecast_data = fm.get_processed_data(latitude, longitude, start_variable, end_variable)

In [21]:
#pd.set_option('display.max_columns', None)
#forecast_data.head()

In [22]:
forecast_data

Unnamed: 0,temp_air,wind_speed,ghi,dni,dhi,total_clouds,low_clouds,mid_clouds,high_clouds
2021-01-23 06:00:00-06:00,14.205017,2.059263,0.0,0.0,0.0,46.0,0.0,0.0,46.0
2021-01-23 09:00:00-06:00,12.098541,1.317391,247.124766,523.130086,85.141559,0.0,0.0,0.0,0.0
2021-01-23 12:00:00-06:00,11.43988,0.874417,703.278373,651.722609,225.652197,9.0,0.0,0.0,9.0
2021-01-23 15:00:00-06:00,19.055023,0.604828,380.143394,101.493401,313.248176,65.0,0.0,0.0,65.0
2021-01-23 18:00:00-06:00,37.239899,0.92368,38.940917,0.0,38.940917,58.0,0.0,0.0,58.0
2021-01-23 21:00:00-06:00,34.439911,2.184795,0.0,0.0,0.0,100.0,0.0,0.0,100.0
2021-01-24 00:00:00-06:00,22.738281,2.129871,0.0,0.0,0.0,71.0,0.0,0.0,71.0


In [23]:
ghi = forecast_data['ghi']
dni = forecast_data['dni']
dhi = forecast_data['dhi']
temp_air = forecast_data['temp_air']
wind_speed = forecast_data['wind_speed']
indexDF = forecast_data.index

In [24]:
weather = pd.DataFrame([dni, ghi, dhi, temp_air, wind_speed]).T

In [25]:
mc.run_model(weather)

KeyError: 'B5'

In [None]:
mc.dc

In [None]:
forecast_dc = mc.dc.reset_index()
forecast_dc

In [None]:
droped_forecas_data = forecast_dc.drop([0,5,6])
time = droped_forecas_data['index'].reset_index()
time = time['index']
time

In [None]:
power_forecasted = droped_forecas_data['p_mp']
power_forecasted = power_forecasted.reset_index()
power_forecasted = power_forecasted['p_mp']
power_forecasted

## Retrieving real data

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
real_data_cut = pd.read_csv(cutonala_real_data_file)

In [None]:
real_data_cut

In [None]:
droped_data_cut = real_data_cut.drop([0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,20,21,22,23,24,25,26,27,28,29,30,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47])
droped_data_cut

In [None]:
power_real = droped_data_cut['Pac(W)']
power_real = power_real.reset_index()
power_real = power_real['Pac(W)']
power_real

In [None]:
data_dict = {'time': time,'real_data': power_real, 'forecasted_data': power_forecasted}
dframe = pd.DataFrame(data_dict)
dframe

In [None]:
comparision = dframe.set_index('time')
comparision

In [None]:
comparision_to_csv = comparision.to_csv(comparision_data_to_csv_date, index=False)

In [None]:
comparision.plot()
plt.ylabel('Power (W)')
plt.title('Comparision Between Pvlib GFS and PV Cutonala Pharm');

### RMSE

$  RMSE = \sqrt{\frac{1}{n}\Sigma_{i=1}^{n}{\Big(\frac{d_i -f_i}{\sigma_i}\Big)^2}} $

In [None]:
from sklearn.metrics import mean_squared_error, mean_squared_log_error, mean_absolute_error
import numpy as np

In [None]:
real = comparision['real_data']
real

In [None]:
forecasted = comparision['forecasted_data']
forecasted

In [None]:
print("MSE:"+str(mean_squared_error(forecasted, real)))
print("RMSE:"+str(np.sqrt(mean_squared_error(forecasted, real))))
print("MSLE:"+str(mean_squared_log_error(forecasted, real)))
print("RMSLE:"+str(np.sqrt(mean_squared_log_error(forecasted, real))))
print("MAE:"+str(mean_absolute_error(forecasted, real)))

In [None]:
MSE = mean_squared_error(forecasted, real)
RMSE = np.sqrt(mean_squared_error(forecasted, real))
MSLE = mean_squared_log_error(forecasted, real)
RMSLE = np.sqrt(mean_squared_log_error(forecasted, real))
MAE = mean_absolute_error(forecasted, real)

In [None]:
metrics_obj = {'MSE': [MSE], 'RMSE':[RMSE], 'MSLE':[MSLE], 'RMSLE':[RMSLE], 'MAE':[MAE]}
metrics_df = pd.DataFrame(metrics_obj)
metrics_df

In [None]:
metrics_df.to_csv(metrics_data, index=False)