# This notebook explores a variety of training methods

## Imports 
<hr>

In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib widget 
import math
import pandas as pd

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.svm import SVR

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [4]:
def plotRegression(truth, pred):
    plt.figure(figsize=(10,10))
    plt.scatter(truth, pred)
    plt.grid()
    plt.xlabel("Truth")
    plt.ylabel("Predicted")
    plt.title("Truth Plotted against actual value")
    plt.plot([min(truth),max(truth)], [min(truth),max(truth)], 'r')
    plt.show()
    
def computeAverageError(pred, y):
    err = []
    for i in range(len(pred)):
        err.append(abs((y[i] - pred[i])/(y[i] + 1e-6)))

    return sum(err)/ len(err)

## Load the data and split into training and testing 
<hr>

In [5]:
############## This data has not been normalized/scaled at all ###################
dataUnscaled_filename = '/Users/brad/Desktop/CS6620/Project/Data/combined_processed_data.csv' 
data = pd.read_csv(dataUnscaled_filename, sep=',')
y = data['Solar_average'].to_numpy()
x = data.drop(['Unnamed: 0', 'date_time', 'Solar_average', 'Solar_total', 'Count'], axis=1).to_numpy()

In [6]:
train_length = int(x.shape[0] * 0.85)
x_train = x[0 : train_length].reshape(-1, x.shape[1])
y_train = y[0 : train_length].reshape(-1,1)
x_test = x[train_length : ].reshape(-1, x.shape[1])
y_test = y[train_length : ].reshape(-1,1)
print('After split and reshape')
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

After split and reshape
(4738, 39)
(4738, 1)
(837, 39)
(837, 1)


In [7]:
plt.figure()
plt.hist(y_train, edgecolor='black')
plt.title("Training")
plt.show()
plt.figure()
plt.hist(y_test, edgecolor='black')
plt.title("Testing")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Explore different methods of scaling 
<hr>

In [8]:
# Scale 
# sc_X = StandardScaler()
# sc_y = StandardScaler()
# x_train = sc_X.fit_transform(x_train)
# x_test = sc_X.fit_transform(x_test)
# y_train = sc_y.fit_transform(y_train)
# y_test = sc_y.fit_transform(y_test)

In [9]:
# # MinMaxScale a different way
# scalerX = MinMaxScaler()
# scalerY = MinMaxScaler()
# x_train = scalerX.fit_transform(x_train)
# x_test = scalerX.fit_transform(x_test)
# y_train = scalerY.fit_transform(y_train)
# y_test = scalerY.fit_transform(y_test)

In [10]:
# Best results came from using quantile uniform 

# scalerX = QuantileTransformer(output_distribution='uniform')
scalerX = MinMaxScaler()
scalerX.fit(x_train)
x_train = scalerX.transform(x_train)
x_test = scalerX.transform(x_test)

scalerY = QuantileTransformer(output_distribution='uniform')
scalerY.fit(y_train)
y_train = scalerY.transform(y_train)
y_test = scalerY.transform(y_test)



In [35]:
plt.figure()
plt.hist(y_train, edgecolor='black')
plt.title("Training data after Quantile Transform")
plt.xlabel("Solar Generation")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [36]:
plt.figure()
plt.hist(y_test, edgecolor='black')
plt.xlabel("Solar Generation")
plt.title("Test data after Quantile Transform")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Fit with KNN
<hr>

In [12]:
from sklearn import neighbors
from sklearn.metrics import mean_squared_error
from math import sqrt


In [13]:
KNNmodel = neighbors.KNeighborsRegressor(n_neighbors = 9)
KNNmodel.fit(x_train, y_train)  #fit the model
test_pred_knn = KNNmodel.predict(x_test) #make prediction on test set
train_pred_knn = KNNmodel.predict(x_train)
plotRegression(y_train, train_pred_knn)
plotRegression(y_test, test_pred_knn)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
print( r2_score(y_train, train_pred_knn) )
print( r2_score(y_test, test_pred_knn) )

0.8961013408013512
0.6035353144777108


## Try fitting the data with RandomForestRegressors
<hr>

In [15]:
regressor = RandomForestRegressor(n_estimators=100, random_state=2)
regressor.fit(x_train, y_train)

  regressor.fit(x_train, y_train)


RandomForestRegressor(random_state=2)

In [16]:
# Training data r^2 value 
y_train_pred = regressor.predict(x_train)
r2_score(y_train, y_train_pred)

0.989731095483329

In [17]:
# testing data r^2 value 
y_test_pred = regressor.predict(x_test)
r2_score(y_test, y_test_pred)

0.767518666955209

In [18]:
plotRegression(scalerY.inverse_transform(y_train.reshape(-1,1))/1000, scalerY.inverse_transform(y_train_pred.reshape(-1,1))/1000)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
plotRegression(scalerY.inverse_transform(y_test.reshape(-1,1))/1000, scalerY.inverse_transform(y_test_pred.reshape(-1,1))/1000)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Support Vector Regression 
<hr>

In [20]:
# sc_X = StandardScaler()
# sc_y = StandardScaler()
# x_train = sc_X.fit_transform(x_train)
# x_test = sc_X.fit_transform(x_test)
# y_train = sc_y.fit_transform(y_train)
# y_test = sc_y.fit_transform(y_test)

### Before transform, data has range () after transform, from from (-1,5) (Using Standard scaler)
### Using MinMaxScaler gives range between 0-1


In [21]:
x_train = x_train.reshape(-1, x_train.shape[1],)
x_test = x_test.reshape(-1, x_train.shape[1],)
# Reshape Y 
print('Before reshaping: ')
print(y_train.shape)
print(y_test.shape)

y_train = y_train.flatten()
y_test = y_test.flatten()
print('After reshaping: ')
print(y_train.shape)
print(y_test.shape)

Before reshaping: 
(4738, 1)
(837, 1)
After reshaping: 
(4738,)
(837,)


In [22]:
regressorSVR = SVR(kernel='poly')
regressorSVR.fit(x_train, y_train)

SVR(kernel='poly')

In [23]:
y_train_pred_svr = regressorSVR.predict(x_train)
y_test_pred_svr = regressorSVR.predict(x_test)

In [24]:
print(r2_score(y_train, y_train_pred_svr))
print(r2_score(y_test, y_test_pred_svr))

0.9317904655192941
0.7582883335904214


In [25]:
plotRegression(truth=y_train, pred=y_train_pred_svr)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [26]:
plotRegression(truth=y_test, pred=y_test_pred_svr)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Using keras RNN and ANN
<hr>

In [27]:
import tensorflow as tf 
from tensorflow import keras
from keras import backend as K
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.layers.experimental import preprocessing

In [28]:
# not sure if this is right. 
def coeff_determination(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [29]:
# x_train = x_train.reshape(-1,x_train.shape[1],)
# x_test = x_test.reshape(-1,x_train.shape[1],)
# scalerX = MinMaxScaler()
# scalerX.fit(x_train)
# x_train = scalerX.transform(x_train)
# x_test = scalerX.transform(x_test)

In [30]:
#LSTM model
model = Sequential()
model.add(LSTM(10, input_shape=(x_train.shape[1],1), return_sequences=True))
model.add(Dropout(0.2))
# model.add(LSTM(128, activation='relu', return_sequences=True))
# model.add(Dropout(0.2))
model.add(LSTM(10, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='relu'))
# optimizer 
opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-5)
# compile
# mean_squared_error
# mean_absolute_error
model.compile(loss='mean_absolute_error', optimizer=opt, metrics=[coeff_determination])

In [31]:
def plot_loss(history):
  plt.figure()
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend()
  plt.grid(True)
  plt.show()
  plt.figure()
  plt.plot(history.history['coeff_determination'], label='R^2')
  plt.plot(history.history['val_coeff_determination'], label='validation R^2')
  plt.xlabel('Epoch')
  plt.ylabel('R^2')
  plt.legend()
  plt.show()

In [32]:
x_train = x_train.reshape(-1,x_train.shape[1],1)
x_test = x_test.reshape(-1,x_train.shape[1],1)

In [33]:
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

In [34]:
history= model.fit(x=x_train, y=y_train, epochs=100, validation_data=(x_test, y_test),batch_size=50,verbose=1)
plot_loss(history)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [37]:
y_pred_train_LSTM = model.predict(x_train)
print(r2_score(y_train, y_pred_train_LSTM))
y_pred_test_LSTM = model.predict(x_test)
print(r2_score(y_test, y_pred_test_LSTM))

0.7366018259955913
0.5322781740413931


In [38]:
plotRegression(y_train, y_pred_train_LSTM)
plotRegression(y_test, y_pred_test_LSTM)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [64]:
# ann Model
ann_model = Sequential()
ann_model.add(Dense(20, input_dim=x_train.shape[1], activation='relu'))
ann_model.add(Dropout(0.2))
ann_model.add(Dense(10, activation='relu'))
ann_model.add(Dropout(0.2))
# ann_model.add(Dense(10, activation='relu'))
# ann_model.add(Dropout(0.2))
# ann_model.add(Dense(10, activation='relu'))
ann_model.add(Dense(1, activation='relu'))
# optimizer 
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
# compile
# mean_squared_error
# mean_absolute_error
ann_model.compile(loss='mean_absolute_error', optimizer=opt, metrics=[coeff_determination])

In [65]:
History = ann_model.fit(x=x_train.reshape(-1,x_train.shape[1], ), y=y_train.reshape(-1,), epochs=100, validation_data=(x_test.reshape(-1,x_train.shape[1], ), y_test.reshape(-1,)),batch_size=50,verbose=0)

In [66]:
plot_loss(History)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

num = 0
while num < 10:
    history= ann_model.fit(x=x_train.reshape(-1,x_train.shape[1], ), y=y_train.reshape(-1,), epochs=100, validation_data=(x_test.reshape(-1,x_train.shape[1], ), y_test.reshape(-1,)),batch_size=64,verbose=0)
    plot_loss(history)
    y_pred_train_ann = model.predict(x_train)
    print(r2_score(y_train, y_pred_train_ann))
    y_pred_test_ann = model.predict(x_test)
    print(r2_score(y_test, y_pred_test_ann))
    num += 1


In [67]:
y_pred_train_ann = ann_model.predict(x_train.reshape(-1,x_train.shape[1], ))
print(r2_score(y_train, y_pred_train_ann))
y_pred_test_ann = ann_model.predict(x_test.reshape(-1,x_train.shape[1], ))
print(r2_score(y_test, y_pred_test_ann))

0.8933351876894882
0.7761286851756322


In [68]:
plotRegression(y_train, y_pred_train_ann)
plotRegression(y_test, y_pred_test_ann)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [69]:
plotRegression(scalerY.inverse_transform(y_train.reshape(-1,1))/1000, scalerY.inverse_transform(y_pred_train_ann.reshape(-1,1))/1000)
plotRegression(scalerY.inverse_transform(y_test.reshape(-1,1))/1000, scalerY.inverse_transform(y_pred_test_ann.reshape(-1,1))/1000)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …