# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Load-data" data-toc-modified-id="Load-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load data</a></div><div class="lev1 toc-item"><a href="#Set-up-data" data-toc-modified-id="Set-up-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Set up data</a></div><div class="lev1 toc-item"><a href="#Set-up-data---statistical-clearsky" data-toc-modified-id="Set-up-data---statistical-clearsky-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Set up data - statistical clearsky</a></div><div class="lev1 toc-item"><a href="#Dump-to-file" data-toc-modified-id="Dump-to-file-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Dump to file</a></div><div class="lev2 toc-item"><a href="#Test-read" data-toc-modified-id="Test-read-41"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Test read</a></div>

In [1]:
import pandas as pd
import numpy as np
import os
import datetime
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

import pytz
import itertools

import pvlib
import utils
import model_free

import concurrent.futures

np.set_printoptions(precision=4)
%matplotlib notebook

%load_ext autoreload
%autoreload 2

# Load data

In [2]:
NSRDB_PATH = os.path.expanduser('/Users/benellis/data_sets/nsrdb/srrl_area/')
GROUND_PATH = os.path.expanduser('/Users/benellis/data_sets/srrl/srrl_psp/')

In [3]:
nsrdb_df = utils.read_dir_nsrdb(NSRDB_PATH, 'US/Mountain')
ground_df = utils.read_srrl_dir(GROUND_PATH, 'Etc/GMT+7', 'US/Mountain')

# Set up data

In [4]:
# rename so GHI is named same as NSRDB
# ground_df['GHI'] = ground_df['Global 40-South LI-200 [W/m^2]']
ground_df['GHI'] = ground_df['Global PSP [W/m^2]']
ground_df['GHI'].fillna(ground_df['Global PSP (cor) [W/m^2]'], inplace=True)

In [5]:
# there are some collection issues before 2009 - those will be ignored
ground_df = ground_df[(ground_df.index > '01-01-2009')]# & (ground_df.index < '01-01-2012')]

In [6]:
# filter off below zero irradiance
nsrdb_df[nsrdb_df['GHI'] < 0] = 0
ground_df[ground_df['GHI'] < 0] = 0

In [7]:
pd.unique(ground_df.index.to_series().diff())

array([      'NaT', 60000000000], dtype='timedelta64[ns]')

In [8]:
# generate clearsky model for ground-based measurements
snl_params = {'tilt': 40, 'elevation': 1829, 'azimuth': 180, 'lat': 39.74, 'lon': -105.18}
rtc = utils.make_pvlib_sys(**snl_params)
clear_skies = rtc.get_clearsky(ground_df.index)
clear_skies = pd.Series(clear_skies['ghi'])
clear_skies.index = ground_df.index
ground_df['Clearsky GHI'] = clear_skies
ground_df['sky_status'], _, alpha = pvlib.clearsky.detect_clearsky(ground_df['GHI'], ground_df['Clearsky GHI'], ground_df.index, 10, return_components=True)
ground_df['Clearsky GHI scale'] = alpha * ground_df['Clearsky GHI']

  meas_slope_nstd = np.std(meas_slope, axis=0, ddof=1) / meas_mean
  c4 = meas_slope_nstd < var_diff


In [9]:
nsrdb_df['sky_status'] = (nsrdb_df['Cloud Type'] == 0) & (nsrdb_df['GHI'] > 0)

In [10]:
ground_df['ratio'] = utils.calc_ratio(ground_df['GHI'], ground_df['Clearsky GHI'])
ground_df['abs_diff_ratio'] = utils.calc_abs_ratio_diff(ground_df['GHI'], ground_df['Clearsky GHI'])

In [11]:
ground_df['ratio scale'] = utils.calc_ratio(ground_df['GHI'], ground_df['Clearsky GHI scale'])
ground_df['abs_diff_ratio scale'] = utils.calc_abs_ratio_diff(ground_df['GHI'], ground_df['Clearsky GHI scale'])

In [12]:
nsrdb_df['ratio'] = utils.calc_ratio(nsrdb_df['GHI'], nsrdb_df['Clearsky GHI'])
nsrdb_df['abs_diff_ratio'] = utils.calc_abs_ratio_diff(nsrdb_df['GHI'], nsrdb_df['Clearsky GHI'])

# Set up data - statistical clearsky

In [13]:
mf = model_free.ModelFreeDetect(nsrdb_df['GHI'])

In [14]:
stat_cs = mf.generate_stat_cs(model_fxn=np.nanpercentile, percentile=90, num_days=30) # , smooth_window=60, smooth_fxn=np.nanmean)

  r = func(a, **kwargs)


In [15]:
nsrdb_df['Clearsky GHI stat'] = stat_cs

In [16]:
mf = model_free.ModelFreeDetect(ground_df['GHI'])

In [17]:
stat_cs = mf.generate_stat_cs(model_fxn=np.nanpercentile, percentile=90, num_days=30, smooth_window=90, smooth_fxn=np.nanmean)

  r = func(a, **kwargs)


In [18]:
ground_df['Clearsky GHI stat'] = stat_cs

In [19]:
ground_df['ratio stat'] = utils.calc_ratio(ground_df['GHI'], ground_df['Clearsky GHI stat'])
ground_df['abs_diff_ratio stat'] = utils.calc_abs_ratio_diff(ground_df['GHI'], ground_df['Clearsky GHI stat'])

In [20]:
nsrdb_df['ratio stat'] = utils.calc_ratio(nsrdb_df['GHI'], nsrdb_df['Clearsky GHI stat'])
nsrdb_df['abs_diff_ratio stat'] = utils.calc_abs_ratio_diff(nsrdb_df['GHI'], nsrdb_df['Clearsky GHI stat'])

# Dump to file

In [21]:
nsrdb_df.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,DHI,DNI,GHI,Clearsky DHI,Clearsky DNI,...,Precipitable Water,Wind Direction,Wind Speed,Fill Flag,sky_status,ratio,abs_diff_ratio,Clearsky GHI stat,ratio stat,abs_diff_ratio stat
1997-12-31 17:00:00-07:00,1998.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.74181,268.450653,4.049125,0.0,False,1.0,0.0,25.0,0.0,1.0
1997-12-31 17:30:00-07:00,1998.0,1.0,1.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,...,3.803859,268.450653,4.200805,0.0,False,1.0,0.0,0.0,1.0,0.0
1997-12-31 18:00:00-07:00,1998.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.865908,268.394196,4.352484,0.0,False,1.0,0.0,0.0,1.0,0.0
1997-12-31 18:30:00-07:00,1998.0,1.0,1.0,1.0,30.0,0.0,0.0,0.0,0.0,0.0,...,3.922091,268.394196,4.455772,0.0,False,1.0,0.0,0.0,1.0,0.0
1997-12-31 19:00:00-07:00,1998.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.978275,268.050659,4.559062,0.0,False,1.0,0.0,0.0,1.0,0.0


In [22]:
nsrdb_df.to_pickle('./srrl_nsrdb_df.pkl.gzip', compression='gzip')

In [23]:
ground_df.to_pickle('./srrl_ground_df.pkl.gzip', compression='gzip')

## Test read

In [24]:
nsrdb_df_read = pd.read_pickle('./srrl_nsrdb_df.pkl.gzip', compression='gzip')

In [25]:
nsrdb_df_read.equals(nsrdb_df)

True

In [26]:
ground_df_read = pd.read_pickle('./srrl_ground_df.pkl.gzip', compression='gzip')

In [27]:
ground_df_read.equals(ground_df)

True