In [1]:
import pandas as pd
import numpy as np
import feather
import pvlib
import sys
sys.path.append('..')
from src.utils.download_data import timer

In [2]:
def clear_sky(df, info=None, model='ineichen'):
    ''' Compute the total direct radiation under a clear sky model.
    '''
    lat, lon = info.loc[df.name, ['Latitude', 'Longitude']]
    location = pvlib.location.Location(lat, lon, tz='Pacific/Honolulu')
    return(location.get_clearsky(df.index.get_level_values('Datetime'), model=model))

In [3]:
# read minute data and location info
df = (pd.read_pickle('/home/SHARED/SOLAR/data/oahu_min_norm.pkl')
        .set_index(['Datetime', 'Location']))

info = pd.read_csv('/home/SHARED/SOLAR/data/info.csv')

# normalize location names by removing "HL_"
info['Location'] = info['Location'].str.replace('(HL)?_', '')
info.set_index('Location', inplace=True)

In [4]:
df.head()

Unnamed: 0_level_0,Radiation,GH,GT,ClearSky
Datetime,Location,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-03-19 14:15:00-10:00,AP1,476.328,,973.62924
2010-03-19 14:15:00-10:00,AP3,382.777,,973.625962
2010-03-19 14:15:00-10:00,AP4,351.61,,973.624679
2010-03-19 14:15:00-10:00,AP5,390.092,,973.630994
2010-03-19 14:15:00-10:00,AP6,353.928,343.313,973.628714


In [5]:
with timer():
    CS = df.groupby('Location').apply(clear_sky, info=info, model='ineichen')

Elapsed time (s): 161.101768


In [6]:
with timer():
    HCS = df.groupby('Location').apply(clear_sky, info=info, model='haurwitz')

Elapsed time (s): 156.424363


In [7]:
with timer():
    SCS = df.groupby('Location').apply(clear_sky, info=info, model='simplified_solis')

Elapsed time (s): 157.428710


In [8]:
df['Ineichen'] =  CS.reorder_levels(['Datetime', 'Location'])['ghi']
df['Haurwitz'] = HCS.reorder_levels(['Datetime', 'Location'])['ghi']
df['Solis']    = SCS.reorder_levels(['Datetime', 'Location'])['ghi']

In [9]:
df.head()

Unnamed: 0_level_0,Radiation,GH,GT,ClearSky,Ineichen,Haurwitz,Solis
Datetime,Location,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-03-19 14:15:00-10:00,AP1,476.328,,973.62924,840.960548,871.749955,907.496424
2010-03-19 14:15:00-10:00,AP3,382.777,,973.625962,841.60095,871.733426,907.47759
2010-03-19 14:15:00-10:00,AP4,351.61,,973.624679,841.594,871.726958,907.47022
2010-03-19 14:15:00-10:00,AP5,390.092,,973.630994,841.628209,871.758798,907.506499
2010-03-19 14:15:00-10:00,AP6,353.928,343.313,973.628714,841.615852,871.747297,907.493395


In [10]:
df.to_pickle('pvlib.pkl')

For the detect_clearsky function we need to do it per station and day (it does not support unequal differences in index)

In [41]:
with timer():
    cs = pvlib.clearsky.detect_clearsky(df.loc[('2010-03-20', 'AP1'), 'GH'], 
                                        df.loc[('2010-03-20', 'AP1'), 'Ineichen'], 
                                        df.loc[('2010-03-20', 'AP1'), 'GH'].index.get_level_values('Datetime'), 
                                        10)

	To accept the future behavior, pass 'dtype=object'.
	To keep the old behavior, pass 'dtype="datetime64[ns]"'.
  a = asanyarray(a)
  meas_slope_nstd = np.std(meas_slope, axis=0, ddof=1) / meas_mean
  c4 = meas_slope_nstd < var_diff


Elapsed time (s): 122.697683


In [49]:
df.loc[('2010-03-20', 'AP1'), ['GH', 'Ineichen']].loc[cs.to_numpy()]

Unnamed: 0_level_0,Radiation,GH,Ineichen
Datetime,Location,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-03-20 06:58:00-10:00,AP1,44.0718,14.337000
2010-03-20 06:59:00-10:00,AP1,47.3772,16.203300
2010-03-20 07:00:00-10:00,AP1,50.6826,18.179851
2010-03-20 07:01:00-10:00,AP1,54.3553,20.262922
2010-03-20 07:02:00-10:00,AP1,57.6608,22.448634
2010-03-20 07:03:00-10:00,AP1,60.9663,24.733024
2010-03-20 07:04:00-10:00,AP1,64.2717,27.112099
2010-03-20 07:05:00-10:00,AP1,68.3117,29.581878
2010-03-20 07:06:00-10:00,AP1,71.9845,32.138427
2010-03-20 07:07:00-10:00,AP1,75.6573,34.777885
