# Explore Gaussian Processes

In [7]:
import pandas as pd
import numpy as np
import feather

df = (feather.read_dataframe('/home/SHARED/SOLAR/data/oahu_min.feather')
             .set_index('Datetime'))

info = pd.read_csv('/home/SHARED/SOLAR/data/info.csv')
info['Location'] = info['Location'].str.replace('(HL)?_', '')
info.set_index('Location', inplace=True)

We just work with GH radiation for the moment. We also normalize sensor names.

In [38]:
df.drop(list(df.filter(regex = 'GT_')), axis = 1, inplace = True)
df.columns = df.columns.str.replace('GH_', '')

In [51]:
"""
Returns the training set for the GP, that is the value and location 
of all sensors at a given time
"""
def get_training_set(time='2010-03-19 14:15:00'):
    im = info[['Latitude','Longitude']]
    im['Radiation'] = df.loc[tt]
    return im

## Train GP

In [74]:
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

time = '2010-03-19 14:15:00'
im = get_training_set(time)
X = im[['Latitude','Longitude']].values
y = im['Radiation'].values

#kernel = DotProduct() + WhiteKernel()
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))

gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=9)
gpr.fit(X, y)

gpr.predict(X[:2,:], return_std=True) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(array([593.29190135, 772.94208002]), array([0.        , 0.00586141]))

## Interpolate in a given grid

In [93]:
latmin =  np.min(im.Latitude) - 0.001
latmax =  np.max(im.Latitude) + 0.001
lonmin = np.min(im.Longitude) - 0.001
lonmax = np.max(im.Longitude) + 0.001

stepsize = 0.0001
long_range = np.arange(lonmin, lonmax, stepsize)
lat_range = np.arange(latmin, latmax, stepsize)
lats = np.zeros( len(long_range) * len(lat_range) )
longs = np.zeros_like(lats)
latlon =  np.transpose([np.tile(lat_range, len(long_range)), np.repeat(long_range, len(lat_range))])

pd.DataFrame(latlon, columns=["Latitude", "Longitude"])

Unnamed: 0,Latitude,Longitude
0,21.30712,-158.0880
1,21.30722,-158.0880
2,21.30732,-158.0880
3,21.30742,-158.0880
4,21.30752,-158.0880
5,21.30762,-158.0880
6,21.30772,-158.0880
7,21.30782,-158.0880
8,21.30792,-158.0880
9,21.30802,-158.0880


In [92]:
latmin

21.30712

In [78]:

for x in range(lonmin, lonmax, stepsize):
    for y in range(latmin, latmax, stepsize):
        yield (x, y)

SyntaxError: 'yield' outside function (<ipython-input-78-d4aa4c461c2b>, line 4)

In [None]:
lats = np.zeros()

In [72]:
im.Latitude

Location
DH3     21.31236
DH4     21.31303
DH5     21.31357
DH10    21.31183
DH11    21.31042
DH9     21.31268
DH2     21.31451
DH1     21.31533
AP6     21.30812
AP1     21.31276
AP3     21.31281
AP5     21.30983
AP4     21.31141
AP7     21.31478
DH6     21.31179
DH7     21.31418
DH8     21.31034
Name: Latitude, dtype: float64