In [21]:
# https://joint-research-centre.ec.europa.eu/pvgis-online-tool/getting-started-pvgis/api-non-interactive-service_en

import os, csv, json, requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
from zipfile import ZipFile
from IPython.display import clear_output

from pathlib import Path
pd.options.display.float_format = '{:.2f}'.format

In [23]:
__author__  = "Fernando Fahl <fernando.fahl@ext.ec.europa.eu>"
__version__ = "1.0"
__date__    = "March 2023"

# 1. Parameters

## 1.1 PVGIS

In [13]:
startyear     = 2020
endyear       = 2020
peakpower     = 1
loss          = 14
pvcalculation = 1 # "0" outputs only solar radiation calculations, "1" outputs the estimation of hourly PV production as well
optimalangles = 1 #  Value of 1 for "yes". All other values (or no value) mean "no". 

excel_filename = 'SLE_results1.13.xlsx'

## 1.2 Time zone transformation

This transformation applies to the entire dataset. \
If required different time zones, split the input dataset in different files, one for each time zone.\
It shifts the data according the the time zone, and it may generate data in a different time period (e.g. last or first column may be in different years)

In [14]:
time_zone     = 0

# 2. Data load

In [24]:
today       = date.today().isoformat()
home        = Path(os.getcwd())
xls_file    = home.joinpath(excel_filename)

csv_outfile = home.joinpath(f'{xls_file.stem}_PVGIS_year{startyear}to{endyear}_timezone{time_zone}_{today}.csv')
csv_errors  = home.joinpath(f'{xls_file.stem}_ERRORs_{today}.csv')

# print (csv_outfile)
# print (csv_errors)

In [16]:
df_xls = pd.read_excel(xls_file)
columns = ['idSet','latitude','longitude']
df = df_xls[columns].copy()
# df = df_xls[columns].head(105).copy() # test only

print(df.head())

   idSet  latitude  longitude
0      1      7.82     -10.92
1      2      8.28     -10.38
2      3      8.28     -10.37
3      4      8.21     -10.35
4      5      8.17     -10.36


# 3. PVGIS

## 3.1 Create URL

In [17]:
def get_url(lat, lon):    
    
    url_base = "https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?"
    
    pvgis_params = dict(
        peakpower=peakpower,
        loss=loss,
        startyear=startyear,
        endyear=endyear,
        pvcalculation=pvcalculation, # "0" outputs only solar radiation calculations, "1" outputs the estimation of hourly PV production as well
        optimalangles=optimalangles,
        lat=lat,
        lon=lon,
        outputformat = 'json',
    )   

    return url_base + "&".join([f'{key}={value}' for key, value in pvgis_params.items()])


## 3.2 Parse PVGIS

In [18]:
def parse_json(id, data):    
    
    # ____________ parse data to df
    df_input        = pd.json_normalize(data.get('inputs'))
    df_output       = pd.json_normalize(data.get('outputs').get('hourly'))        
    latitude        = df_input['location.latitude'].values
    
    # ____________ get optimal angles
    slope_optimized   = df_input['mounting_system.fixed.slope.value'].values
    azimuth_optimized = df_input['mounting_system.fixed.azimuth.value'].values    
    azimuth_corrected = 0 if float(latitude) < 0 else 180

    # ____________ convert the 'Date' column to datetime format: round minutes from timestamp (do not trunc the timestamp - it would cause a shift in some readings)
    df_power               = df_output[['time', 'P']].copy() 
    df_power['time']       = pd.to_datetime(df_power['time'], format='%Y%m%d:%H%M').round('H')
    df_power['local_time'] = df_power['time'] + timedelta(hours = time_zone)
    
    # ____________ reformat timestamp (remove minutes)
    df_power['local_time']  = df_power['local_time'].dt.strftime('%Y-%m-%d:%H')    

    # ____________ set datetime index
    df_power = df_power.set_index(df_power['local_time'])
    df_power = df_power.drop(['time', 'local_time'], axis=1)   
    
    # ____________ transpose df
    df_tranpose = df_power[['P']].transpose()   
    
    # ____________ add columns
    df_tranpose.insert(0, 'azimuth_corrected', azimuth_corrected)
    df_tranpose.insert(0, 'azimuth', azimuth_optimized)
    df_tranpose.insert(0, 'slope', slope_optimized)
    df_tranpose.insert(0, 'id', id)
    
    return df_tranpose   
    

## 3.3 Query PVGIS

In [27]:
def save_csv(dfs, dfs_error):
    """ save csv as compressed files"""

    df_merged = pd.concat(dfs.values(), ignore_index=True)            
    df_merged.to_csv(f"{csv_outfile}.gz", index = False, compression='gzip')
    
    if dfs_error:    
        df_merged_error = pd.concat(dfs_error.values(), ignore_index=True)    
        df_merged_error.to_csv(f"{csv_errors}.gz", index = False, compression='gzip')             


In [28]:
i = 0
dfs = dict()
dfs_error = dict()

for idx, row in df.iterrows(): 
    
    id = row['idSet'].astype(int)
    lat = row['latitude'].astype(str)
    lon = row['longitude'].astype(str)
    url = get_url (lat,lon)

    # _____________ get data from pvgis
    response = requests.get(url)
    row_json = json.loads(response.text)    

    # _____________ parse data from pvgis
    try:            
        dfs[id] = parse_json(id, row_json) # 
        print (f'done {i}: id={id} url={url}')
    except:
        message = row_json['message']
        dfs_error[id] = pd.DataFrame(dict(id=id,lat=lat,lon=lon,message=message,url=url), index=[0])
        print (f'ERROR {i} --> id: {id}, message: {message} url: {url}')

    # _____________ save partial results
    if i > 100:  
        save_csv(dfs, dfs_error)        
        i = 0
        clear_output(wait=False)

    i +=1
            
# _____________ save final results
save_csv(dfs, dfs_error)  


ERROR 1 --> id: 8385, message: Location over the sea. Please, select another location url: https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?peakpower=1&loss=14&startyear=2020&endyear=2020&pvcalculation=1&optimalangles=1&lat=8.447099&lon=-13.26918&outputformat=json
ERROR 2 --> id: 8386, message: Location over the sea. Please, select another location url: https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?peakpower=1&loss=14&startyear=2020&endyear=2020&pvcalculation=1&optimalangles=1&lat=8.490007&lon=-13.26516&outputformat=json
ERROR 3 --> id: 8387, message: Location over the sea. Please, select another location url: https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?peakpower=1&loss=14&startyear=2020&endyear=2020&pvcalculation=1&optimalangles=1&lat=8.450403&lon=-13.27778&outputformat=json
ERROR 4 --> id: 8388, message: Location over the sea. Please, select another location url: https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?peakpower=1&loss=14&startyear=2020&endyear=2020&pvcalculation=1&optim