                                                                            Luis Ramirez Camargo, June 2020

# Retrieve solar radiation, temperature and wind speed data from ERA5-land and MERRA2 data for the locations of the PV installations in Chile

This notebook generates CSV files with time series of solar radiation, temperature and wind speed for the location of all large PV installations in Chile from both the ERA5-land data and the MERRA2 global reanalysis. These files are necesary to speed up the calculations of PV power generation.
See related notebooks: clean_measured_pv_data_installations_chile, pv_output_from_ERA5_land_and_merra2 and pv_validation_ERA5_land_MERRA2_rn

In [1]:
import xarray as xr
import pandas as pd
from pandas.plotting import register_matplotlib_converters
import numpy as np
import geopandas as gpd
import glob
from tkinter import Tcl
import gc
import unidecode
import datetime
import scipy as sp

## Import the installations and the reanalysis data sets

In [2]:
#define the data set with the coordinates of the installations
installations_chile = "/home/lramirez/my-data/pv_generation_per_country/chile/solares_fd0779de_0870_4194_b962_83a842d8c316.shp"
#import the locations of PV plants in Chile
#transfrom to wg84 coordinates 
pvs_chile = gpd.read_file(installations_chile)
pvs_chile_4326 = pvs_chile.to_crs(epsg=4326)
installations_list = pvs_chile_4326["NOMBRE"]

In [3]:
#create a dataframe with the coordinates in the format necesary for the era5 land data
installations = pd.DataFrame({'latitude': pvs_chile_4326.geometry.y.values , 
                        'longitude': pvs_chile_4326.geometry.x.values + 360},
                       index=installations_list)

In [None]:
#import the era5 land data
input_era5l_data = "/home/lramirez/my-data/ERA5_land_data/derived/era5l_*"
era5 = xr.open_mfdataset(input_era5l_data, combine='nested', concat_dim='time', chunks={'lon': 200, 'lat':200})

In [29]:
#import the merra2 data
basis_dir_merra = "/data/scratch/merra2-chile/"
merra2_radiation = xr.open_mfdataset(str(basis_dir_merra + 'SWGDN/MERRA*'), combine='nested', concat_dim='time', chunks={'lon': 200, 'lat':200})
merra2_temperature = xr.open_mfdataset(str(basis_dir_merra + 'T2M/MERRA*'), combine='nested', concat_dim='time', chunks={'lon': 200, 'lat':200})
merra2_wind_u = xr.open_mfdataset(str(basis_dir_merra + 'U2M/MERRA*'), combine='nested', concat_dim='time', chunks={'lon': 200, 'lat':200})
merra2_wind_v = xr.open_mfdataset(str(basis_dir_merra + 'V2M/MERRA*'), combine='nested', concat_dim='time', chunks={'lon': 200, 'lat':200})

In [None]:
#set the date range for extracting the data of all data sets
date_range = pd.date_range(start='2014-01-01 00:00:00', end='2018-12-31 23:00:00', freq='H')

## Get solar radiation data from ERA5 land for each location

In [8]:
#create a data frame to store the era5 land radiation data for every plant
timeseries_era5land_radiation = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [9]:
def get_era5l_radiation_in_w(lons,lats):
    '''get the radiation values from ERA5 land data for a certain coordinate, 
    it transforms the values to Wh/m2'''
    GHI_in_j = era5.var169.sel(lon=lons, lat=lats, method='nearest').diff("time", 1, label="upper")
    ghi_era5l = (GHI_in_j.where(GHI_in_j > 0, 0))/3600
    return ghi_era5l

In [60]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    #print(str(installations.loc[plant,'longitude'])+","+str(installations.loc[plant,'latitude'])+"    "+str(datetime.datetime.now()))
    timeseries_era5land_radiation.loc[:-1,plant] = get_era5l_radiation_in_w(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [11]:
timeseries_era5land_radiation.to_csv('time_series_from_reanalysis_data_sets/eral5_radiation_all_installations.csv')

## Get temperature data from ERA5 land for each location

In [None]:
timeseries_era5land_temperature = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [None]:
def get_era5l_temperature_in_c(lons, lats):
    '''get the temperature values from ERA5 land data for a certain coordinate, 
    it transforms the values to centigrades'''
    temperature_pre = (era5.var167.sel(lon=lons,lat=lats, method='nearest'))-273.15
    temperature = temperature_pre[1:].values
    return temperature

In [None]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    timeseries_era5land_temperature.loc[:-1,plant] = get_era5l_temperature_in_c(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [None]:
timeseries_era5land_temperature.to_csv('time_series_from_reanalysis_data_sets/eral5_temperature_all_installations.csv')

## Get wind speed data from ERA5 land for each location

In [14]:
#create a data frame to store the wind speed data for every plant
timeseries_era5land_wind_speed = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [13]:
def get_era5l_wind_speed_in_m(lons, lats):
    '''get the wind speed values from ERA5 land data for a certain coordinate, 
    it caculates the wind speed from the u and v components'''
    wind_speed_pre2 = (np.sqrt(((era5.var165.sel(lon=lons,lat=lats, method='nearest'))**2)+
             ((era5.var166.sel(lon=lons,lat=lats, method='nearest'))**2)))
    wind_speed_pre1 = wind_speed_pre2 * ((np.log(1/0.25))/(np.log(10/0.25)))
    wind_speed = wind_speed_pre1[1:].values
    return wind_speed

In [61]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    timeseries_era5land_wind_speed.loc[:-1,plant] = get_era5l_wind_speed_in_m(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [18]:
timeseries_era5land_wind_speed.to_csv('time_series_from_reanalysis_data_sets/eral5_wind_speed_all_installations.csv')

## Get solar radiation data from MERRA2 for each location

In [22]:
timeseries_merra2_radiation = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [21]:
def get_merra2_radiation_in_w(lons,lats):
    '''get the radiation values from MERRA2 data for a certain coordinate, 
    values in Wh/m2'''
    lons_wgs84 = lons - 360
    GHI_in_w = merra2_radiation.SWGDN.sel(lon=lons_wgs84, lat=lats, method='nearest')
    return GHI_in_w

In [62]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    timeseries_merra2_radiation.loc[:,plant] = get_merra2_radiation_in_w(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [26]:
timeseries_merra2_radiation.to_csv('time_series_from_reanalysis_data_sets/merra2_radiation_all_installations.csv')

## Get temperature data from MERRA2 for each location

In [35]:
timeseries_merra2_temperature = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [40]:
def get_merra2_temperature_in_c(lons, lats):
    '''get the temperature values from MERRA2 data for a certain coordinate, 
    it transforms the values to centigrades'''
    lons_wgs84 = lons - 360
    temperature = (merra2_temperature.T2M.sel(lon=lons,lat=lats, method='nearest'))-273.15
    return temperature

In [63]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    timeseries_merra2_temperature.loc[:,plant] = get_merra2_temperature_in_c(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [59]:
timeseries_merra2_temperature.to_csv('time_series_from_reanalysis_data_sets/merra2_temperature_all_installations.csv')

## Get wind speed data from MERRA2 for each location

In [49]:
#create a data frame to store the wind speed data for every plant
timeseries_merra2_wind_speed = pd.DataFrame(columns=installations_list, index=pd.to_datetime(date_range))

In [51]:
def get_merra2_wind_speed_in_m(lons, lats):
    '''get the wind speed values from MERRA2 land data for a certain coordinate, 
    it caculates the wind speed from the u and v components'''
    wind_speed_pre = (np.sqrt(((merra2_wind_u.U2M.sel(lon=lons,lat=lats, method='nearest'))**2)+
             ((merra2_wind_v.V2M.sel(lon=lons,lat=lats, method='nearest'))**2)))
    wind_speed = wind_speed_pre * ((np.log(1/0.25))/(np.log(2/0.25)))
    return wind_speed

In [52]:
#runs the function for each coordinate and stores all values in a data frame
for plant in installations.index[:]:
    timeseries_merra2_wind_speed.loc[:,plant] = get_merra2_wind_speed_in_m(installations.loc[plant,'longitude'],installations.loc[plant,'latitude'])

In [58]:
timeseries_merra2_wind_speed.to_csv('time_series_from_reanalysis_data_sets/merra2_wind_speed_all_installations.csv')