In [1]:
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

In [2]:
from sklearn.decomposition import PCA
from sklearn import metrics

In [3]:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional

In [5]:
from sklearn.utils import resample
from matplotlib import pyplot
from numpy import mean
from numpy import std
from numpy import array

In [6]:
from numpy import tensordot
from numpy.linalg import norm

In [7]:
from sklearn.metrics import max_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score

In [8]:
from itertools import product

In [9]:
from sklearn.externals import joblib
from dask.distributed import Client, LocalCluster
cluster = LocalCluster()
client = Client(cluster)




In [10]:
df= pd.read_csv("weather3.csv")

In [11]:
def fit_model(trainX, trainY):

    model = Sequential()
    model.add(LSTM(64, activation='relu', return_sequences=True, input_shape=(4,1)))
    model.add(LSTM(64, activation='relu'))
    model.add(Dense(1))
   
    model.compile(loss='mse',
                optimizer='adam',
                metrics=['mae', 'mse'])
    
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model.fit(trainX, trainY, epochs=1000, validation_split = 0.2, verbose=0, 
                    callbacks=[early_stop, tfdocs.modeling.EpochDots()])
    
    return model

In [12]:
def ensemble_predictions(members, weights, testX):

    yhats = [model.predict(testX) for model in members]
    yhats = array(yhats)
    summed = tensordot(yhats, weights, axes=((0),(0)))
    return summed

In [13]:
def evaluate_r2(members, weights, testX, testY):

    yhat = ensemble_predictions(members, weights, testX)
    return r2_score(testY, yhat)

In [14]:
def normalize(weights):
    result = norm(weights,1)
    if result == 0.0:
        return weights
    return weights / result

In [29]:
def grid_search(members, testX, testY,n_members):

    i =0;
    w = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    best_score, best_weights = 0.0, None
    for weights in product(w,repeat=len(members)):
       
        if len(set(weights)) == 1:
            continue
    
        weights = normalize(weights)
        score = evaluate_r2(members, weights, testX, testY)
        if score > best_score:
            best_score, best_weights = score, weights
            print('>%s %.3f' % (best_weights, best_score))
            i+=1
        
        if i == n_members+1:
            break;
            
    return list(best_weights)
 

In [16]:
train_dataset = df.sample(frac=0.8,random_state=0)
test_dataset = df.drop(train_dataset.index)
    
train_stats = train_dataset.describe()
train_stats.pop("Solar Irradiance")
train_stats = train_stats.transpose()
    
train_labels = train_dataset.pop('Solar Irradiance')
test_labels = test_dataset.pop('Solar Irradiance')

In [17]:
def norm1(x):
    return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm1(train_dataset)
normed_test_data = norm1(test_dataset)

In [18]:
train_final= normed_train_data.values
test_final = normed_test_data.values
train=train_labels.values
test=test_labels.values    

In [19]:
from sklearn.decomposition import PCA
pca = PCA(.95)
pca.fit(train_final)
    
pca_train = pca.transform(train_final)
pca_test = pca.transform(test_final) 

In [20]:
X_train = np.expand_dims(pca_train, axis=2)
X_test = np.expand_dims(pca_test, axis=2)

In [21]:
n_members = 5
members = [fit_model(X_train, train) for _ in range(n_members)]


Epoch: 0, loss:206966.0000,  mae:341.3239,  mse:206966.0000,  val_loss:42034.8398,  val_mae:156.2465,  val_mse:42034.8398,  
....................................................................................................
Epoch: 100, loss:7878.8892,  mae:49.5870,  mse:7878.8892,  val_loss:8195.6270,  val_mae:53.0977,  val_mse:8195.6270,  
......................
Epoch: 0, loss:213095.6406,  mae:344.8398,  mse:213095.6406,  val_loss:38577.7734,  val_mae:145.4905,  val_mse:38577.7734,  
....................................................................................................
Epoch: 100, loss:8505.0439,  mae:50.6888,  mse:8505.0439,  val_loss:10129.0010,  val_mae:53.3184,  val_mse:10129.0010,  
.....................................
Epoch: 0, loss:205668.5156,  mae:339.5919,  mse:205668.5156,  val_loss:44122.9219,  val_mae:160.2603,  val_mse:44122.9219,  
....................................................................................................
Epoch: 100, loss:787

In [22]:
for i in range(n_members):
    yhat = members[i].predict(X_test)
    acc = r2_score(test, yhat)
    print('Model %d: R2 %.3f' % (i+1, acc))

Model 1: R2 0.928
Model 2: R2 0.924
Model 3: R2 0.926
Model 4: R2 0.923
Model 5: R2 0.922


In [23]:
weights = [1.0/n_members for _ in range(n_members)]
score = evaluate_r2(members, weights, X_test, test)
print('Equal Weights Score: %.3f' % score)

Equal Weights Score: 0.931


In [30]:
with joblib.parallel_backend('dask'):
    weights = grid_search(members, X_test, test, n_members)

>[0. 0. 0. 0. 1.] 0.922
>[0. 0. 0. 1. 0.] 0.923
>[0.  0.  0.  0.5 0.5] 0.928
>[0.         0.         0.         0.53333333 0.46666667] 0.928
>[0.         0.         0.         0.52941176 0.47058824] 0.928
>[0.         0.         0.         0.52631579 0.47368421] 0.928


In [25]:
score = evaluate_r2(members, weights, X_test, test)
print('Grid Search Weights: %s, Score: %.3f' % (weights, score))

Grid Search Weights: [0.0, 0.0, 0.0, 0.5294117647058824, 0.47058823529411764], Score: 0.928
