## Create PV load profile from: ninja_pv_52.5170_13.3889_corrected_2019_MERRA_Berlin.csv
 Renewables.ninja Solar PV (Point API) - 52.5170, 13.3889 - Version: 1.1 (using GSEE v0.3.1) - License: https://creativecommons.org/licenses/by-nc/4.0/ - Reference: https://doi.org/10.1016/j.energy.2016.08.060
### Units: time in UTC, local_time in Europe/Berlin, electricity in kW
### {"units": {"time": "UTC", "local_time": "Europe/Berlin", "electricity": "kW"}, "params": {"local_time": true, "header": true, "lat": "51.0834196", "lon": "10.4234469", "date_from": "2019-01-01", "date_to": "2019-12-31", "dataset": "merra2", "capacity": "1", "system_loss": "0.1", "tracking": "2", "tilt": "35", "azim": "180", "raw": false}}

#Tesla Solar panel 400 W [1 unit 1.88m x 1m]
## Solar panel capacity dataset = 1kW --> Infrastructutr for the simulation (40 panels, 60 m2) = 16kW

In [1]:
%%capture
!pip install plotly==5.9.0

In [2]:
#Cloning repository and changing directory
!git clone https://github.com/francescomaldonato/RL_VPP_Thesis.git
%cd RL_VPP_Thesis/data/data_training/
%ls

In [4]:
#Moving to the current directory
current_folder = ''

input_folder = current_folder + 'raw_datasets/'
output_folder = current_folder + 'scenario_datasets/'

In [5]:
import numpy as np
import pandas as pd
import plotly.express as px

Data pre-process

In [6]:
pv_data = pd.read_csv(input_folder + 'ninja_pv_52.5170_13.3889_corrected_2019_MERRA_Berlin.csv')
pv_data = pv_data.drop(['local_time'], axis=1)
pv_data['time'] = pd.to_datetime(pv_data['time'])

index = len(pv_data['time'])
timestamp = pd.to_datetime("2020-01-01 00:00:00", format="%Y-%m-%d %H:%M:%S")
last_value = pv_data['electricity'][index-1]
new_row = pd.DataFrame([[timestamp, last_value]], columns=["time",'electricity'], index=[index])
pv_data = pd.concat([pv_data, pd.DataFrame(new_row)], ignore_index=False)
pv_data.rename(columns = {'electricity':'solar_power'}, inplace = True)

#Scaling capacity of PV
pv_data['solar_power'] = pv_data['solar_power']*16
pv_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8761 entries, 0 to 8760
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   time         8761 non-null   datetime64[ns]
 1   solar_power  8761 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 205.3 KB


Resampling dataset

In [7]:
pv_data_15 = pv_data.resample('15min', on='time').agg({'time':'min','solar_power':'min'})
pv_data_15['time'] = pv_data_15.index.values
pv_data_15['solar_power'].fillna(method='ffill', inplace=True)
pv_data_15 = pv_data_15.drop(['time'], axis=1)

pv_data_ = pv_data_15.loc['2019-01-01 00:00:00':'2020-01-01 00:00:00']
pv_data_.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35041 entries, 2019-01-01 00:00:00 to 2020-01-01 00:00:00
Freq: 15T
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   solar_power  35041 non-null  float64
dtypes: float64(1)
memory usage: 547.5 KB


Saving and plotting data

In [8]:
pv_data_csv = pv_data_.to_csv(output_folder + 'PV_load_2019_profile.csv', index = True)
#pv_data_.iplot(title='PV Solar power', yTitle='kW')
px.line(pv_data_["solar_power"], title=('PV Solar power'), color_discrete_sequence=["orange"]).update_layout(yaxis_title="kW", xaxis_rangeslider_visible=True, xaxis_range=["2019-06-01 00:00:00", "2019-07-01 00:00:00"])