                                                                            Luis Ramirez Camargo, June 2020

# PV output from ERA5-land and MERRA2 data using PV_lib
This notebook calculates hourly PV power output for the location of the Chilean PV installations uing PV_LIB. The output is calculated for installations oriented north and with an inclination equal to the latitude, as well as for single tracker systems. The calculation is made in all cases using ERA5-land or MERRA2 data. This notebook requires the time series generated using the notebook 2_get_time_series_from_era5_land_and_merra2_for_pv_calculation. Furthermore, the output of this notebook is part of the input for the validation and intercomparison procedure in 4_pv_validation_ERA5_land_MERRA2. 


## 1) import the libraries and data

In [1]:
import os
import itertools
import xarray as xr
import pandas as pd
from pandas.plotting import register_matplotlib_converters
import numpy as np
import pvlib
from pvlib import clearsky, atmosphere, solarposition
from pvlib.iotools import read_tmy3
from pvlib.pvsystem import PVSystem
from pvlib.location import Location
from pvlib.modelchain import ModelChain
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import geopandas
import glob
from tkinter import Tcl
import gc
import unidecode
import scipy as sp
import scipy.stats
import seaborn as sns
from pathlib import Path
from pvlib.tools import cosd, sind

## Import locations

In [2]:
installations_chile_pre = \
Path("input_data/solares_fd0779de_0870_4194_b962_83a842d8c316.shp")
def clean_installations_locations(installations_chile_pre):
    '''creates a data frame with the location and basic 
    characteristics of the large PV installations in Chile'''
    pvs_chile = geopandas.read_file(installations_chile_pre)
    pvs_chile_4326 = pvs_chile.to_crs(epsg=4326)
    installations_list = pvs_chile_4326["NOMBRE"]
    installations_chile = pd.DataFrame({'latitude': pvs_chile_4326.geometry.y.values, 
                        'longitude': pvs_chile_4326.geometry.x.values, 
                        'size': pvs_chile_4326['POTENCIA'].values, 
                        'official_operation_start': pd.to_datetime(pvs_chile_4326['F_OPERACIO'].values), 
                        'end_time': pd.to_datetime('2018-12-30')},
                       index=installations_list)
    return installations_chile

installations_chile = clean_installations_locations(installations_chile_pre)

## Import the weather parameters

In [3]:
era5l_radiation_file = Path('time_series_from_reanalysis_data_sets/eral5_radiation_all_installations.csv')
era5l_temperature_file = Path('time_series_from_reanalysis_data_sets/eral5_temperature_all_installations.csv')
era5l_wind_speed_file = Path('time_series_from_reanalysis_data_sets/eral5_wind_speed_all_installations.csv')
merra2_radiation_file = Path('time_series_from_reanalysis_data_sets/merra2_radiation_all_installations.csv')
merra2_temperature_file = Path('time_series_from_reanalysis_data_sets/merra2_temperature_all_installations.csv')
merra2_wind_speed_file = Path('time_series_from_reanalysis_data_sets/merra2_wind_speed_all_installations.csv')
#the time index is imported as UTC 
era5l_radiation = pd.read_csv(era5l_radiation_file,
                              index_col=0,
                              parse_dates=True).tz_localize(tz='UTC')
era5l_temperature = pd.read_csv(era5l_temperature_file,
                                index_col=0,
                                parse_dates=True).tz_localize(tz='UTC')
era5l_wind_speed = pd.read_csv(era5l_wind_speed_file,
                               index_col=0,
                               parse_dates=True).tz_localize(tz='UTC')
merra2_radiation = pd.read_csv(merra2_radiation_file,
                               index_col=0,
                               parse_dates=True).tz_localize(tz='UTC')
merra2_temperature = pd.read_csv(merra2_temperature_file,
                                 index_col=0,
                                 parse_dates=True).tz_localize(tz='UTC')
merra2_wind_speed = pd.read_csv(merra2_wind_speed_file,
                                index_col=0,
                                parse_dates=True).tz_localize(tz='UTC')

In [4]:
def shift_and_clean(resample_time_step, shifting, reanalysis_variable):
    '''resample a a data frame to a certain time step, keeps the stored values only once,
    shift the time a defined amount of time steps and drops all the NaN values in a data frame.
    the output data frame has the same lenght as the input data frame but the time index is 
    shifted. eg. remsampe_time_step ='30min', shifting=1, reanalysis_variable=era5l_radiation'''
    reanalysis_variable_shifted = reanalysis_variable.resample(resample_time_step).asfreq().shift(periods = shifting).dropna()
    return reanalysis_variable_shifted

In [5]:
#run shift_and_clean for all variables of the 2 different reanalysis
merra2_radiation_30m = shift_and_clean('30min', 1, merra2_radiation)
merra2_temperature_30m = shift_and_clean('30min', 1, merra2_temperature)
merra2_wind_speed_30m = shift_and_clean('30min', 1, merra2_wind_speed)
era5l_radiation_30m = shift_and_clean('30min', 1, era5l_radiation)
era5l_temperature_30m = shift_and_clean('30min', 1, era5l_temperature)
era5l_wind_speed_30m = shift_and_clean('30min', 1, era5l_wind_speed)

## Calculate clear sky values for each location

In [6]:
def get_irrad_clear_sky(lats, lons, time_horizon):
    pv_installation = Location(lats, lons, tz='UTC')
    ephem_pv_installation = pvlib.solarposition.get_solarposition(time_horizon, pv_installation.latitude, pv_installation.longitude)
    tracker_data = pvlib.tracking.singleaxis(ephem_pv_installation['apparent_zenith'],
                                             ephem_pv_installation['azimuth'],
                                             axis_tilt=0,
                                             axis_azimuth=0,
                                             max_angle=90,
                                             backtrack=True,
                                             gcr=2.0/7.0)
    irrad_data = pv_installation.get_clearsky(time_horizon)
    dni_et = pvlib.irradiance.get_extra_radiation(irrad_data.index, method='asce')
    haydavies_diffuse = pvlib.irradiance.haydavies(tracker_data['surface_tilt'], tracker_data['surface_azimuth'], 
                                                irrad_data['dhi'], irrad_data['dni'], dni_et,
                                                ephem_pv_installation['apparent_zenith'], ephem_pv_installation['azimuth'])
    ground_irrad = pvlib.irradiance.get_ground_diffuse(tracker_data['surface_tilt'], irrad_data['ghi'], albedo=.25)
    global_in_plane = (cosd(tracker_data['aoi'])*irrad_data['dni']) + haydavies_diffuse + ground_irrad
    return irrad_data, dni_et, global_in_plane

In [7]:
def get_clear_sky_ghi_all(time_horizon):
    '''runs the function pv_output_system for all PV plants in Chile
    and creates a data frame with the times series'''
    clear_sky_ghi_all = pd.DataFrame(columns=installations_chile.index, index=pd.to_datetime(time_horizon))
    for installation in installations_chile.index:
        clear_sky_ghi_all[installation] = get_irrad_clear_sky(installations_chile.latitude[installation],
                                                              installations_chile.longitude[installation],
                                                              time_horizon)[0]['ghi'].values
    return clear_sky_ghi_all

In [8]:
def get_clear_sky_global_in_plane_all(time_horizon):
    '''runs the function pv_output_system for all PV plants in Chile
    and creates a data frame with the times series'''
    clear_sky_global_in_plane_all = pd.DataFrame(columns=installations_chile.index, index=pd.to_datetime(time_horizon))
    for installation in installations_chile.index:
        clear_sky_global_in_plane_all[installation] = get_irrad_clear_sky(installations_chile.latitude[installation],
                                                              installations_chile.longitude[installation],
                                                              time_horizon)[2].values
    return clear_sky_global_in_plane_all

In [9]:
clear_sky_ghi_all = get_clear_sky_ghi_all(era5l_radiation_30m.index) 

In [10]:
clear_sky_ghi_all_00 = clear_sky_ghi_all
clear_sky_ghi_all_00.index = pd.date_range(start='2014-01-01 00:00:00',
                                           end='2018-12-31 22:00:00',
                                           freq='H')
clear_sky_ghi_all_00.to_csv(Path('intermediate_results/clear_sky_ghi_all_00.csv'))

In [11]:
clear_sky_global_in_plane_all = get_clear_sky_global_in_plane_all(era5l_radiation_30m.index)

In [12]:
clear_sky_global_in_plane_all_00 = clear_sky_global_in_plane_all
clear_sky_global_in_plane_all_00.index = pd.date_range(start='2014-01-01 00:00:00',
                                                       end='2018-12-31 22:00:00',
                                                       freq='H')
clear_sky_global_in_plane_all_00.to_csv(Path('intermediate_results/clear_sky_global_in_plane_all_00.csv'))

## Caculate the PV output assuming optimal configuration and tracking

In [13]:
def pv_output_system(radiation_pre, temperature_pre, wind_speed_pre,lons,lats,tracking,installed_capacity_kWp):
    '''calculates ac output in Wh of a PV installation either heading
    to the ecuator and an inclination equal to the latitude or assuming 
    a single-axis tracking system for a particular location. It requires
    time series of solar radiation, temperature (in °C at 2 m)and wind 
    speed (in m/s at 1 m altitude) as well as the coordinates of the location
    and either 0 or 1 to define the type of tracker as input'''
    location = Location(latitude=lats, longitude=lons, tz='UTC')
    temperature = temperature_pre
    wind_speed = wind_speed_pre
    ghi_input = radiation_pre
    timeindex = ghi_input.asfreq(freq='1H').index
    dayofyear = timeindex.dayofyear
    ephem_pv_installation = pvlib.solarposition.pyephem(timeindex, 
                                                        location.latitude, 
                                                        location.longitude,
                                                        temperature=np.mean(temperature))
    Zenith = ephem_pv_installation['zenith']
    if tracking == 0:
        slope = lats
        if lats >= 0:
            aspect = 0
        elif lats < 0:
            aspect = 180
    elif tracking == 1:
 
        tracker_data = pvlib.tracking.singleaxis(ephem_pv_installation['apparent_zenith'],
                                                 ephem_pv_installation['azimuth'],
                                                 axis_tilt=0,
                                                 axis_azimuth=0,
                                                 max_angle=90,
                                                 backtrack=True,
                                                 gcr=2.0/7.0)
        slope = tracker_data['surface_tilt']
        aspect = tracker_data['surface_azimuth']
    #solartime = ephem_pv_installation['solar_time'] 
    #clearsky_irrad = location.get_clearsky(timeindex)
    #clearsky_irrad['2018-01-01'].plot()
    dni_pre = pvlib.irradiance.disc(ghi_input,Zenith,dayofyear)['dni']
    dhi_pre = ghi_input - dni_pre *cosd(Zenith)
    weather = pd.DataFrame({'ghi': ghi_input, 
                        'dni': dni_pre, 
                        'dhi': dhi_pre, 
                        'temp_air': temperature, 
                        'wind_speed': wind_speed},
                       index=timeindex)
    #weather['2017-06-01':'2017-06-08'].plot(figsize=(18,6))
    sandia_modules = pvlib.pvsystem.retrieve_sam('SandiaMod')
    cec_inverters = pvlib.pvsystem.retrieve_sam('cecinverter')
    #the follow selection requires some sort of automatization
    sandia_module = sandia_modules['Silevo_Triex_U300_Black__2014_']
    #Tries with the stc where understimating the yearly sum. Decided to use the PTC
    PTC = 280.5
    cec_inverter = cec_inverters['ABB__MICRO_0_3_I_OUTD_US_240_240V__CEC_2014_']
    #check that the Paco is at least equal to the STC
    number_of_panels_1kWp = 1000/PTC
    area_1kWp = number_of_panels_1kWp * sandia_module['Area']
    system = PVSystem(surface_tilt=slope, surface_azimuth=aspect,
                  module_parameters=sandia_module,
                  inverter_parameters=cec_inverter)
    mc = ModelChain(system, location)
    mc.run_model(times=weather.index, weather=weather)
    pv_output = (mc.ac * number_of_panels_1kWp * installed_capacity_kWp).fillna(0)
    return pv_output

In [14]:
def get_pv_output_all(radiation_df, temperature_df, wind_speed_df, tracking, installed_capacity_kWp):
    '''runs the function pv_output_system for all PV plants in Chile
    and creates a data frame with the times series'''
    date_range = pd.date_range(start='2014-01-01 00:00:00', end='2018-12-31 22:00:00', freq='H')
    pv_output_optimal = pd.DataFrame(columns=installations_chile.index, index=pd.to_datetime(date_range))
    for installation in installations_chile.index:
        pv_output_optimal[installation] = pv_output_system(radiation_df[installation], 
                                                           temperature_df[installation], 
                                                           wind_speed_df[installation],
                                                           installations_chile.longitude[installation],
                                                           installations_chile.latitude[installation], 
                                                           tracking,
                                                           installed_capacity_kWp).values
    return pv_output_optimal

In [15]:
#calculate the pv output assuming non-tracking configuration
timeseries_pv_optimal_era5l = get_pv_output_all(era5l_radiation_30m, 
                                                        era5l_temperature_30m,
                                                        era5l_wind_speed_30m, 0, 1)
timeseries_pv_optimal_merra2 = get_pv_output_all(merra2_radiation_30m, 
                                                        merra2_temperature_30m,
                                                        merra2_wind_speed_30m, 0, 1)
timeseries_pv_optimal_clear_sky = get_pv_output_all(clear_sky_ghi_all, 
                                                        ((era5l_temperature_30m*0)+20),
                                                        (era5l_wind_speed_30m*0), 0, 1)

In [16]:
#calculate the pv output assuming tracking configuration
timeseries_pv_tracking_era5l = get_pv_output_all(era5l_radiation_30m, 
                                                        era5l_temperature_30m,
                                                        era5l_wind_speed_30m, 1, 1)
timeseries_pv_tracking_merra2 = get_pv_output_all(merra2_radiation_30m, 
                                                        merra2_temperature_30m,
                                                        merra2_wind_speed_30m, 1, 1)
timeseries_pv_tracking_clear_sky = get_pv_output_all(clear_sky_ghi_all, 
                                                        ((era5l_temperature_30m*0)+20),
                                                        (era5l_wind_speed_30m*0), 1, 1)

In [17]:
#save the data frames of PV output to csv files
timeseries_pv_optimal_era5l.to_csv(Path('intermediate_results/timeseries_pv_optimal_era5l.csv'))
timeseries_pv_optimal_merra2.to_csv(Path('intermediate_results/timeseries_pv_optimal_merra2.csv'))
timeseries_pv_tracking_era5l.to_csv(Path('intermediate_results/timeseries_pv_tracking_era5l.csv'))
timeseries_pv_tracking_merra2.to_csv(Path('intermediate_results/timeseries_pv_tracking_merra2.csv'))
timeseries_pv_optimal_clear_sky.to_csv(Path('intermediate_results/timeseries_pv_optimal_clear_sky.csv'))
timeseries_pv_tracking_clear_sky.to_csv(Path('intermediate_results/timeseries_pv_tracking_clear_sky.csv'))

## Transform PV output data into capacity factors 

In [18]:
def capacity_factors(file_with_df_pv_output):
    cf_2 = pd.read_csv(file_with_df_pv_output, index_col=0, parse_dates=True)
    cf_1 =  cf_2/cf_2.max().copy()
    cf = cf_1.where(cf_2 > 0,0).copy()
    return cf

In [19]:
#calculate the capacity factors for the PV output data sets
timeseries_capacity_factors_pv_optimal_era5l = capacity_factors(Path('intermediate_results/timeseries_pv_optimal_era5l.csv'))
timeseries_capacity_factors_pv_optimal_merra2 = capacity_factors(Path('intermediate_results/timeseries_pv_optimal_merra2.csv'))
timeseries_capacity_factors_pv_tracking_era5l = capacity_factors(Path('intermediate_results/timeseries_pv_tracking_era5l.csv'))
timeseries_capacity_factors_pv_tracking_merra2 = capacity_factors(Path('intermediate_results/timeseries_pv_tracking_merra2.csv'))

In [20]:
timeseries_capacity_factors_pv_optimal_clear_sky_rad = capacity_factors(Path('intermediate_results/clear_sky_ghi_all_00.csv'))
timeseries_capacity_factors_pv_tracking_clear_sky_rad = capacity_factors(Path('intermediate_results/clear_sky_global_in_plane_all_00.csv'))

In [21]:
#save the data frames of capacity factors to csv files
timeseries_capacity_factors_pv_optimal_era5l.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_optimal_era5l.csv'))
timeseries_capacity_factors_pv_optimal_merra2.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_optimal_merra2.csv'))
timeseries_capacity_factors_pv_tracking_era5l.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_tracking_era5l.csv'))
timeseries_capacity_factors_pv_tracking_merra2.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_tracking_merra2.csv'))
timeseries_capacity_factors_pv_tracking_clear_sky_rad.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_tracking_clear_sky_rad.csv'))
timeseries_capacity_factors_pv_optimal_clear_sky_rad.to_csv(Path('intermediate_results/timeseries_capacity_factors_pv_optimal_clear_sky_rad.csv'))

## Deseasonalize the calculated data

In [22]:
def deseason_pv_output(clear_sky,reanalysis):
    '''this function extracts the clear sky PV output capacity factor
    from the PV output capacity factor of other data source
    the inputs are two data frames with the same dimentions and
    temporal index'''
    deseason_df = reanalysis - clear_sky
    return deseason_df

In [24]:
des_rad_optimal_era5l = deseason_pv_output(timeseries_capacity_factors_pv_optimal_clear_sky_rad,
                                       timeseries_capacity_factors_pv_optimal_era5l)
des_rad_tracking_era5l = deseason_pv_output(timeseries_capacity_factors_pv_tracking_clear_sky_rad,
                                        timeseries_capacity_factors_pv_tracking_era5l)
des_rad_optimal_merra2 = deseason_pv_output(timeseries_capacity_factors_pv_optimal_clear_sky_rad,
                                        timeseries_capacity_factors_pv_optimal_merra2)
des_rad_tracking_merra2 = deseason_pv_output(timeseries_capacity_factors_pv_tracking_clear_sky_rad,
                                         timeseries_capacity_factors_pv_tracking_merra2)

In [25]:
#save the deseazonalized data in files
des_rad_optimal_era5l.to_csv(Path('intermediate_results/deseason_rad_optimal_era5l.csv'))
des_rad_tracking_era5l.to_csv(Path('intermediate_results/deseason_rad_tracking_era5l.csv'))
des_rad_optimal_merra2.to_csv(Path('intermediate_results/deseason_rad_optimal_merra2.csv'))
des_rad_tracking_merra2.to_csv(Path('intermediate_results/deseason_rad_tracking_merra2.csv'))

## Deseasonalize the measured data

In [26]:
#import also the CF of the meassured data
reference = pd.read_csv(Path('intermediate_results/time_series_PV_sen_chile_capacity_factors.csv'),
                        index_col=0, parse_dates=True)
reference.loc[:'2017-09','SOLAR HORMIGA'] = np.nan
reference.loc[:'2017-11','SOLAR EL AGUILA I'] = np.nan

In [27]:
#modify the calculated data sets time series to match chilean summer time
#this cannot be performed automatically since the dailight summer time in chile changes from year to year
def summer_time_chile(cf_file):
    cf_utc = pd.read_csv(cf_file, index_col=0, parse_dates=True)
    cf_ut_st_2014 = cf_utc["2014-04-27":"2014-09-07"].shift(periods=-1).copy()
    cf_ut_st_2016 = cf_utc["2016-05-15":"2016-08-14"].shift(periods=-1).copy()
    cf_ut_st_2017 = cf_utc["2017-05-14":"2017-08-14"].shift(periods=-1).copy()
    cf_ut_st_2018 = cf_utc["2018-05-13":"2018-08-12"].shift(periods=-1).copy()
    cf_utc_st = cf_utc.copy()
    cf_utc_st.loc["2014-04-27":"2014-09-07"] = cf_ut_st_2014
    cf_utc_st.loc["2016-05-15":"2016-08-14"] = cf_ut_st_2016 
    cf_utc_st.loc["2017-05-14":"2017-08-14"] = cf_ut_st_2017
    cf_utc_st.loc["2018-05-13":"2018-08-12"] = cf_ut_st_2018
    return cf_utc_st

In [28]:
st_clearsky_rad_capacity_factors_optimal = \
summer_time_chile(Path('intermediate_results/timeseries_capacity_factors_pv_optimal_clear_sky_rad.csv')).tz_localize('UTC')
st_clearsky_rad_capacity_factors_tracking = \
summer_time_chile(Path('intermediate_results/timeseries_capacity_factors_pv_tracking_clear_sky_rad.csv')).tz_localize('UTC')

In [29]:
des_rad_optimal_reference = deseason_pv_output(st_clearsky_rad_capacity_factors_optimal,
                                       reference)[3:] 
des_rad_tracking_reference = deseason_pv_output(st_clearsky_rad_capacity_factors_tracking,
                                       reference)[3:]

In [30]:
des_rad_optimal_reference.to_csv(Path('intermediate_results/deseason_rad_optimal_reference.csv'))
des_rad_tracking_reference.to_csv(Path('intermediate_results/deseason_rad_tracking_reference.csv'))

## Aggregate Capacity factors of installations with and without tracking  

In [31]:
#import classification of installations based on automatic procedure
installations_selection = pd.read_csv(Path('intermediate_results/installation_classification.csv'),
                                      index_col=1)  
#create a colum to store the start of operation
installations_selection['operation_start'] = '2018-12-31 22:00:00+00:00'

In [32]:
#installations_selection

In [33]:
def get_operation_start(installations_selection, reference):
    '''get the operation start based on the first non nan for each installation that has been 
    clasified either into non-tracking or tracking systems'''
    for installation in installations_selection.index[installations_selection.Type != 'erroneous']:
        #print(reference[installation].first_valid_index())
        installations_selection.loc[installation,
                                    'operation_start'] = \
        reference[installation].first_valid_index()
get_operation_start(installations_selection, reference)

In [34]:
def get_cumulated_time_series(df_all_time_series, installations_to_agregate, installations_selection):
    '''cumulate the hourly values in df_all_time_series for the set of installations
    in installations_to_agregate which is a subset of installations_selection'''
    operation_time_series = df_all_time_series[installations_to_agregate].copy()
    for installation in operation_time_series.columns:
        operation_time_series.loc[:installations_selection.loc[installation,'operation_start'],
                                  installation] = np.nan
    installations_amount = operation_time_series.columns.size
    cumulated = (operation_time_series.sum(axis=1, min_count=1)/installations_amount).to_frame().rename(columns={0:"CUMULATED"})
    #cumulated = cumulated.where(cumulated > 0.0, np.nan).copy()
    return cumulated

In [35]:
#calculate the cumulated values for the reference data
cumulated_non_tracking_reference = \
get_cumulated_time_series(reference,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_tracking_reference = \
get_cumulated_time_series(reference,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [36]:
#calculate the cumulated values for the era5l data
cumulated_non_tracking_era5l = \
get_cumulated_time_series(timeseries_capacity_factors_pv_optimal_era5l,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_tracking_era5l = \
get_cumulated_time_series(timeseries_capacity_factors_pv_tracking_era5l,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [37]:
#calculate the cumulated values for the merra2 data
cumulated_non_tracking_merra2 = \
get_cumulated_time_series(timeseries_capacity_factors_pv_optimal_merra2,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_tracking_merra2 = \
get_cumulated_time_series(timeseries_capacity_factors_pv_tracking_merra2,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [38]:
cumulated_non_tracking_reference.to_csv(Path('intermediate_results/cumulated_non_tracking_reference.csv'))
cumulated_tracking_reference.to_csv(Path('intermediate_results/cumulated_tracking_reference.csv'))
cumulated_non_tracking_era5l.to_csv(Path('intermediate_results/cumulated_non_tracking_era5l.csv'))
cumulated_tracking_era5l.to_csv(Path('intermediate_results/cumulated_tracking_era5l.csv'))
cumulated_non_tracking_merra2.to_csv(Path('intermediate_results/cumulated_non_tracking_merra2.csv'))
cumulated_tracking_merra2.to_csv(Path('intermediate_results/cumulated_tracking_merra2.csv'))

## Aggregate Capacity factors of deseasonalised data of installations with and without tracking  

In [39]:
#calculate the cumulated values for the deseasonalised reference data
cumulated_deseason_non_tracking_reference = \
get_cumulated_time_series(des_rad_optimal_reference,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_deseason_tracking_reference = \
get_cumulated_time_series(des_rad_tracking_reference,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [40]:
#calculate the cumulated values for the deseasonalised era5l data
cumulated_deseason_non_tracking_era5l = \
get_cumulated_time_series(des_rad_optimal_era5l,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_deseason_tracking_era5l = \
get_cumulated_time_series(des_rad_tracking_era5l,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [41]:
#calculate the cumulated values for the deseasonalised merra2 data
cumulated_deseason_non_tracking_merra2 = \
get_cumulated_time_series(des_rad_optimal_merra2,
                          installations_selection.index[installations_selection.Type == 'non-tracking'],
                          installations_selection)
cumulated_deseason_tracking_merra2 = \
get_cumulated_time_series(des_rad_tracking_merra2,
                          installations_selection.index[installations_selection.Type == 'tracking'],
                          installations_selection)

In [42]:
cumulated_deseason_non_tracking_reference.to_csv(Path('intermediate_results/cumulated_deseason_non_tracking_reference.csv'))
cumulated_deseason_tracking_reference.to_csv(Path('intermediate_results/cumulated_deseason_tracking_reference.csv'))
cumulated_deseason_non_tracking_era5l.to_csv(Path('intermediate_results/cumulated_deseason_non_tracking_era5l.csv'))
cumulated_deseason_tracking_era5l.to_csv(Path('intermediate_results/cumulated_deseason_tracking_era5l.csv'))
cumulated_deseason_non_tracking_merra2.to_csv(Path('intermediate_results/cumulated_deseason_non_tracking_merra2.csv'))
cumulated_deseason_tracking_merra2.to_csv(Path('intermediate_results/cumulated_deseason_tracking_merra2.csv'))