## Radiation map

We would like a model to have as inputs the irradiance of the sensors and as outputs the irradiance on a rectangular grid. Let us split the sensors in two groups: inner sensors and outer sensors. Given an initial set, would it be possible to decide the location of new outer sensors to maximize the predicibility of the inner sensors (based on irradiance data)?

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

%matplotlib inline

In [4]:
DATA_PATH = '/home/SHARED/SOLAR/data/'

In [5]:
# read minute data and location info
df   = pd.read_pickle(DATA_PATH + 'oahu_min_final.pkl')
info = pd.read_pickle(DATA_PATH + 'info.pkl')

In [11]:
df_long = (df#.iloc[0:1]
             .stack()
             .reset_index('Datetime')
             .join(info[['Longitude', 'Latitude']])
             .rename(columns={0: 'GHI'})
             .dropna())

In [12]:
df_long

Unnamed: 0_level_0,Datetime,GHI,Longitude,Latitude
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AP1,2010-03-19 14:15:00-10:00,0.489229,-158.08389,21.31276
AP1,2010-03-19 14:16:00-10:00,0.345753,-158.08389,21.31276
AP1,2010-03-19 14:17:00-10:00,0.344373,-158.08389,21.31276
AP1,2010-03-19 14:18:00-10:00,1.193121,-158.08389,21.31276
AP1,2010-03-19 14:19:00-10:00,1.175083,-158.08389,21.31276
AP1,2010-03-19 14:20:00-10:00,1.162316,-158.08389,21.31276
AP1,2010-03-19 14:21:00-10:00,1.137088,-158.08389,21.31276
AP1,2010-03-19 14:22:00-10:00,0.657202,-158.08389,21.31276
AP1,2010-03-19 14:23:00-10:00,0.326653,-158.08389,21.31276
AP1,2010-03-19 14:24:00-10:00,0.343429,-158.08389,21.31276


In [5]:
def get_grid(lonlat, decimals=3):
    
    prec = 10**decimals
    max_min = lonlat.agg([np.max, np.min])

    lat = np.arange(int(math.floor(max_min.loc['amin',  'Latitude']*prec)), 
                    int(math.ceil( max_min.loc['amax',  'Latitude']*prec))+1)/prec

    lon = np.arange(int(math.floor(max_min.loc['amin', 'Longitude']*prec)), 
                    int(math.ceil( max_min.loc['amax', 'Longitude']*prec))+1)/prec

    xx, yy = np.meshgrid(lon, lat)
    return pd.DataFrame({'lon': xx.ravel(), 'lat': yy.ravel()})

In [7]:
df_dict = {}
for datetime, df in df_long.groupby('Datetime'):
    print(datetime, df)
    X_train = df[['Longitude', 'Latitude']]
    y_train = df['GHI']
    
    X_test = get_grid(df[['Longitude', 'Latitude']])
    
    gpr = GaussianProcessRegressor(kernel=RBF(10, (1e-3, 1e-3)), n_restarts_optimizer=10)
    gpr.fit(X_train, y_train)
    #X_test['mean'], X_test['std'] = gpr.predict(X_test, return_std=True)
    X_test['GHI'] = gpr.predict(X_test)
    df_dict[datetime] = X_test.set_index(['lon', 'lat'])
    break

2010-03-19 14:15:00-10:00                           Datetime       GHI  Longitude  Latitude
Location                                                         
AP1      2010-03-19 14:15:00-10:00  0.489229 -158.08389  21.31276
AP4      2010-03-19 14:15:00-10:00  0.361135 -158.07947  21.31141
AP5      2010-03-19 14:15:00-10:00  0.400657 -158.08249  21.30983
AP6      2010-03-19 14:15:00-10:00  0.363514 -158.07935  21.30812
AP7      2010-03-19 14:15:00-10:00  0.363748 -158.07785  21.31478
DH1      2010-03-19 14:15:00-10:00  0.392540 -158.08700  21.31533
DH10     2010-03-19 14:15:00-10:00  0.991153 -158.08554  21.31183
DH11     2010-03-19 14:15:00-10:00  0.562059 -158.08530  21.31042
DH2      2010-03-19 14:15:00-10:00  0.418994 -158.08534  21.31451
DH3      2010-03-19 14:15:00-10:00  0.609348 -158.08463  21.31236
DH4      2010-03-19 14:15:00-10:00  0.793878 -158.08505  21.31303
DH5      2010-03-19 14:15:00-10:00  0.507729 -158.08424  21.31357
DH6      2010-03-19 14:15:00-10:00  1.137295 -158.

In [None]:
df_final = pd.concat(df_dict).unstack(level=['lon', 'lat']).sort_index(axis=1)

In [None]:
df_final.columns = df_final.columns.droplevel(0)

In [None]:
df_final.to_pickle('{}/oahu_GP.pkl'.format(DATA_PATH))

In [None]:
df_final.shape

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
df1 = X_test.pivot_table(values='mean', columns='lon', index='lat')
sns.heatmap(data=df1,
            xticklabels=df1.columns.values.round(3),
            yticklabels=df1.index.values.round(3),
            ax=ax[0])

df2 = X_test.pivot_table(values='std', columns='lon', index='lat')
sns.heatmap(data=df2,
            xticklabels=df2.columns.values.round(3),
            yticklabels=df2.index.values.round(3),
            ax=ax[1]);

In [None]:
fig, ax = plt.subplots()
ax.scatter(X_test['lon'], X_test['lat'], c=X_test['std'], s=300)
ax.scatter(X_train['Longitude'], X_train['Latitude'], c='black')
ax.set_xlim((X_test['lon'].min()-1e-3, X_test['lon'].max()+1e-3))
ax.set_ylim((X_test['lat'].min()-1e-3, X_test['lat'].max()+1e-3));