# **Enrichment Data**

## Add:
* Solar position variables
* Theorical auxilar functions for other models

In [1]:
# Warnings
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
# For data science
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## **Get True Local-Time**

In [2]:
# Load solar data
df = pd.read_csv('../data/solar_data.csv')

# Only features
df.drop(['G(h)', 'Gb(n)', 'Gd(h)', 'IR(h)','Month','Day', 'NumDay', 'Hour'], 
        axis=1, inplace=True)

print(f'Number of missing data: {df.isna().sum().sum()}\n')
df.head(1)

Number of missing data: 0



Unnamed: 0,Date,Lat,Lon,Alt,SP,RH,WS10m,WD10m,T2m
0,2009-12-31 18:00:00,18.0,-100.0,787.0,92229.0,56.84,1.98,197.0,24.24


### Create the time zone and DatetimeIndex

In [3]:
# For search Timezone from lat and lon
from tzwhere import tzwhere
tz = tzwhere.tzwhere()

time_zones = []
for i in range(len(df.Lat.values)):
    time_zone = tz.tzNameAt(df.Lat[i], df.Lon[i])
    time_zones.append(time_zone)

df['Time_zone'] = time_zones

In [4]:
# Get the correct time (from: minig_solar_info/manage_objects.py)

from math import trunc
correction_time = []
for i in range(len(df.Date)):
    correction = trunc(-df.Lon[i] / 15)
    correction = np.timedelta64(correction, 'h')
    correction_time.append(correction)

### Get the real local time according to time zone

In [6]:
# Convert to UTC-time again
time_utc = []
for i in range(len(df.Date)):
    utc = np.array((df.Date[i]), dtype='datetime64[h]') + correction_time[i]
    time_utc.append(utc)
    
df['Time_utc'] = np.array(time_utc)

# Localize hour how UTC-time
date_utc = df['Time_utc'].dt.tz_localize('UTC')

# Convert to real time
df['Local_time'] = date_utc.dt.tz_convert(df.Time_zone.values.all())

In [7]:
df.head(1)

Unnamed: 0,Date,Lat,Lon,Alt,SP,RH,WS10m,WD10m,T2m,Time_zone,Time_utc,Local_time
0,2009-12-31 18:00:00,18.0,-100.0,787.0,92229.0,56.84,1.98,197.0,24.24,America/Mexico_City,2010-01-01,2009-12-31 18:00:00-06:00


## **Solar Position and Methorological Data**

In [8]:
# Solar position functions
import pvlib as pv

### Get solar position Variables

In [9]:
time = pd.pandas.DatetimeIndex(df['Local_time'])

solar_position = pv.solarposition.get_solarposition(time,
                                                    df.Lat.values,
                                                    df.Lon.values,
                                                    altitude=df.Alt.values, 
                                                    pressure=df.SP.values,
                                                    method='nrel_numpy',
                                                    temperature=df.T2m)

In [10]:
solar_position.head(1)

Unnamed: 0_level_0,apparent_zenith,zenith,apparent_elevation,elevation,azimuth,equation_of_time
Local_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31 18:00:00-06:00,87.288419,87.508232,2.711581,2.491768,244.795929,-3.299708


### Get metheorologycal data

In [11]:
# Precipitable water by Gueymard equation
df['Precipitable_water'] = pv.atmosphere.gueymard94_pw(df.T2m, df.RH)

# Percentage of date in accuracy range
mask = ((df.Precipitable_water >= 1) & (df.Precipitable_water <= 3))
accuracy_values = df['Precipitable_water'][mask].value_counts().sum()
total_values = len(df['Precipitable_water'])
print(f'% Accuracy range: {accuracy_values/total_values*100}')

% Accuracy range: 49.378870168914695


In [12]:
# Linke turbidity

# Separition DatetimeIndex by place
lat , lon = df.Lat[1], df.Lon[1]
loc = (lat, lon)
locations = [loc]
for i in range(len(df.Lat.values)):
    if loc == (df.Lat[i], df.Lon[i]):
        pass
    else:
        loc = (df.Lat[i], df.Lon[i])
        locations.append(loc)

# Get the Linke turbidity
linke_turbidity = []
for loc in locations:
    mask = ( (df.Lat == loc[0]) & (df.Lon == loc[1]) )
    time = pd.DatetimeIndex(df['Local_time'][mask])
    lt = pv.clearsky.lookup_linke_turbidity(time, loc[0], loc[1])
    linke_turbidity.append(lt)
    
# Load with all data
linke_turbidity = np.array(linke_turbidity).reshape(df.shape[0],)
df['Linke_turbidity'] = linke_turbidity

### Get irradiance variables

In [13]:
# Extraterrestial radiation
dayof_year = df['Local_time'].dt.dayofyear
df['Extra_radiation'] = pv.irradiance.get_extra_radiation(dayof_year)

### **Re-form**

In [14]:
# Visualization
df.head(1)

Unnamed: 0,Date,Lat,Lon,Alt,SP,RH,WS10m,WD10m,T2m,Time_zone,Time_utc,Local_time,Precipitable_water,Linke_turbidity,Extra_radiation
0,2009-12-31 18:00:00,18.0,-100.0,787.0,92229.0,56.84,1.98,197.0,24.24,America/Mexico_City,2010-01-01,2009-12-31 18:00:00-06:00,2.726047,3.675,1413.940576


In [16]:
# Get Date data
solar = pd.DataFrame()
solar['TimeUtc'] = df['Time_utc']
solar['TimeZone'] = df['Time_zone']
solar['LocalTime'] = df['Local_time']

# Create a variables to isolate time
solar['Year'] = solar['LocalTime'].dt.year
solar['MonthOfYear'] = solar['LocalTime'].dt.month
solar['WeekOfYear'] = solar['LocalTime'].dt.week
solar['DayOfYear'] = solar['LocalTime'].dt.dayofyear
solar['HourOfDay'] = solar['LocalTime'].dt.hour

# Geografical Variables
solar['Latitude'] = df.Lat.values
solar['Longitude'] = df.Lon.values
solar['Altitude'] = df.Alt.values

# Solar position
cols = solar_position.columns[:-1]
solar['ApparentZenith'] = solar_position[cols[0]].values
solar['Zenith'] = solar_position[cols[1]].values
solar['ApparentElevation'] = solar_position[cols[2]].values
solar['Elevation'] = solar_position[cols[3]].values
solar['Azimuth'] = solar_position[cols[4]].values

# Climatologycal data
solar['Temperature'] = df.T2m.values
solar['Pressure'] = df.SP.values
solar['Humidity'] = df.RH.values
solar['LinkeTurbidity'] = df.Linke_turbidity.values
solar['PrecipitableWater'] = df.Precipitable_water.values
solar['WindSpeed'] = df.WS10m.values
solar['WindDirection'] = df.WD10m.values

# Radiation data
solar['ExtraRadiation'] = df.Extra_radiation.values

In [17]:
# Complete and Save

df = pd.read_csv('../data/solar_data.csv')
solar['GHI'] = df['G(h)'].values
solar['DNI'] = df['Gb(n)'].values
solar['DHI'] = df['Gd(h)'].values

In [19]:
solar.to_csv('../data/solar.csv', index=False, encoding='utf-8')