In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Activation, Flatten, AveragePooling2D

from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
import time
import kerastuner as kt

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14679340344073288516
]


In [3]:
data = pd.read_csv("temp_matrix (25).csv")
#check the data types:
data.dtypes

timestamp        int64
date            object
time            object
day             object
irradiance     float64
temperature    float64
0              float64
1              float64
2              float64
3              float64
4              float64
5              float64
6              float64
7              float64
8              float64
9              float64
10             float64
11             float64
12             float64
13             float64
14             float64
15             float64
16             float64
17             float64
18             float64
19             float64
20             float64
21             float64
22             float64
23             float64
24             float64
25             float64
26             float64
27             float64
28             float64
29             float64
30             float64
31             float64
pmax           float64
SCC              int64
panel           object
dtype: object

In [4]:
#dusty data:
dusty = data[data.panel=='dusty']

#seperating data into frames:

#These are columns that don't need to be normalized
dusty1 = dusty[['timestamp','date','time','day','panel']]

#everything else that is a float or int (minus the timestamp column) does need to be normalized
dusty2 = dusty.select_dtypes(include = ['float64','int64'])
dusty2 = dusty2.drop(columns=['timestamp'])
dusty2

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
1,557.80,28.21,27.705,,,26.074,,26.949,26.3620,,...,,25.648,25.880,,20.549,,,25.961,7964.932,630
3,787.00,33.99,46.665,,,44.952,,46.651,46.8200,,...,,47.396,48.307,,0.000,,,45.714,40853.528,4447
5,916.77,32.31,51.571,,,50.763,,51.238,52.0330,,...,,52.833,53.888,,0.000,,,50.882,48581.400,5223
7,1033.98,34.46,55.550,,,53.838,,54.083,54.5820,,...,,54.876,54.620,,0.000,,,51.626,52712.484,5837
9,1030.84,35.25,54.839,,,52.371,,52.890,52.9630,,...,,52.607,51.958,,0.000,,,48.639,62129.308,5906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3011,3.14,38.30,56.714,,,56.089,,54.851,55.2430,,...,,54.383,54.065,,52.932,,,51.983,22067.880,2611
3013,3.14,38.30,50.577,,,50.938,,49.331,49.8450,,...,,49.208,49.596,,47.563,,,47.101,15904.834,1721
3015,3.14,38.30,44.139,,,44.475,,43.627,43.8820,,...,,43.363,43.519,,0.000,,,42.301,7421.639,792
3017,3.14,38.30,38.558,,,38.739,,38.527,38.6200,,...,,38.102,38.406,,37.970,,,38.015,671.760,140


In [5]:
#these columns are broken sensors, need to be replaced with 0's:
#01,02,04,07,12,15,16,19,24,27,29,30

dusty2.update(dusty2[['1','2','4','7','12','15','16','19','24','27','29','30']].fillna(0))
dusty2.head()

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
1,557.8,28.21,27.705,0.0,0.0,26.074,0.0,26.949,26.362,0.0,...,0.0,25.648,25.88,0.0,20.549,0.0,0.0,25.961,7964.932,630
3,787.0,33.99,46.665,0.0,0.0,44.952,0.0,46.651,46.82,0.0,...,0.0,47.396,48.307,0.0,0.0,0.0,0.0,45.714,40853.528,4447
5,916.77,32.31,51.571,0.0,0.0,50.763,0.0,51.238,52.033,0.0,...,0.0,52.833,53.888,0.0,0.0,0.0,0.0,50.882,48581.4,5223
7,1033.98,34.46,55.55,0.0,0.0,53.838,0.0,54.083,54.582,0.0,...,0.0,54.876,54.62,0.0,0.0,0.0,0.0,51.626,52712.484,5837
9,1030.84,35.25,54.839,0.0,0.0,52.371,0.0,52.89,52.963,0.0,...,0.0,52.607,51.958,0.0,0.0,0.0,0.0,48.639,62129.308,5906


In [6]:
#replacing the remainder NaNs with meaningful values (not 0s)
#by default this will be a linear interpolation unless a different method is specified

new_dusty2 = dusty2.interpolate()
new_dusty2

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
1,557.80,28.21,27.705,0.0,0.0,26.074,0.0,26.949,26.3620,0.0,...,0.0,25.648,25.880,0.0,20.549,0.0,0.0,25.961,7964.932,630
3,787.00,33.99,46.665,0.0,0.0,44.952,0.0,46.651,46.8200,0.0,...,0.0,47.396,48.307,0.0,0.000,0.0,0.0,45.714,40853.528,4447
5,916.77,32.31,51.571,0.0,0.0,50.763,0.0,51.238,52.0330,0.0,...,0.0,52.833,53.888,0.0,0.000,0.0,0.0,50.882,48581.400,5223
7,1033.98,34.46,55.550,0.0,0.0,53.838,0.0,54.083,54.5820,0.0,...,0.0,54.876,54.620,0.0,0.000,0.0,0.0,51.626,52712.484,5837
9,1030.84,35.25,54.839,0.0,0.0,52.371,0.0,52.890,52.9630,0.0,...,0.0,52.607,51.958,0.0,0.000,0.0,0.0,48.639,62129.308,5906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3011,3.14,38.30,56.714,0.0,0.0,56.089,0.0,54.851,55.2430,0.0,...,0.0,54.383,54.065,0.0,52.932,0.0,0.0,51.983,22067.880,2611
3013,3.14,38.30,50.577,0.0,0.0,50.938,0.0,49.331,49.8450,0.0,...,0.0,49.208,49.596,0.0,47.563,0.0,0.0,47.101,15904.834,1721
3015,3.14,38.30,44.139,0.0,0.0,44.475,0.0,43.627,43.8820,0.0,...,0.0,43.363,43.519,0.0,0.000,0.0,0.0,42.301,7421.639,792
3017,3.14,38.30,38.558,0.0,0.0,38.739,0.0,38.527,38.6200,0.0,...,0.0,38.102,38.406,0.0,37.970,0.0,0.0,38.015,671.760,140


In [7]:
#concatinating the dataframes:
dusty_df = pd.concat([dusty1, new_dusty2], axis=1)

In [8]:
#seperating the newly revised data into frames for the independent & dependent variables
#the colon indicates all rows, and '00':'31' indicates all the columns from 00 to 31

dusty_ind = dusty_df.loc[:,'0':'31'] 
dusty_dep = dusty_df.loc[:,'pmax']

#reshaping the independent dataframe into 2D
dusty_ind = dusty_ind.to_numpy() #converting it to a numpy array
dusty_ind[0].reshape(-1,4)

print(dusty_ind.ndim)

#reshaping the dependent dataframes into 1D with the first value matching the first value of the 2D array
dusty_dep = dusty_dep.to_numpy() #converting it to a numpy array
dusty_dep = dusty_dep.reshape(-1,1)

print(dusty_dep.ndim)

2
2


In [9]:
#Splitting the data into the Training and the Testing set
X_train,X_test,y_train,y_test,=train_test_split(dusty_ind,dusty_dep,test_size=0.2,random_state=0)

In [10]:
print(X_train)

[[60.183  0.     0.    ...  0.     0.    58.815]
 [68.493  0.     0.    ...  0.     0.    63.699]
 [34.455  0.     0.    ...  0.     0.    34.69 ]
 ...
 [55.982  0.     0.    ...  0.     0.    57.771]
 [57.69   0.     0.    ...  0.     0.    51.42 ]
 [35.8    0.     0.    ...  0.     0.    35.61 ]]


In [11]:
#Feature scaleing using the fit transform function to calibrate the measurement so that the training and test data have a similar shape

#create MinMaxScaler objects
MinMaxScaler = preprocessing.MinMaxScaler(feature_range=(0,1))
X_train = MinMaxScaler.fit_transform(X_train)
X_test = MinMaxScaler.fit_transform(X_test)

y_train = MinMaxScaler.fit_transform(y_train)
y_test = MinMaxScaler.fit_transform(y_test)



In [12]:
X_train = X_train.reshape(X_train.shape[0], 8,4,1)
X_train.ndim

4

In [13]:
X_test = X_test.reshape(X_test.shape[0], 8,4,1)
X_test.ndim

4

In [14]:
y_train.ndim

2

In [15]:
y_test.ndim

2

In [23]:
print(X_train.dtype)

float64


In [17]:
#custome loss functions for r_square
def r_square(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return (1 - SS_res/(SS_tot + K.epsilon()))

def r_square_loss(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return 1 - ( 1 - SS_res/(SS_tot + K.epsilon()))

In [24]:
LOG_DIR = f"{int(time.time())}"

def build_model(hp):
    #Instantiate an empty model
    model = keras.models.Sequential()

    # C1 Convolutional Layer
    model.add(Conv2D(6, kernel_size=(2, 2), strides=(1, 1), activation='relu', input_shape=(8,4,1), padding="same"))

    # S2 Pooling Layer
    model.add(AveragePooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid'))

    # C3 Convolutional Layer
    model.add(Conv2D(16, kernel_size=(2, 2), strides=(1, 1), activation='relu', padding='valid'))

    # C5 Fully Connected Convolutional Layer
    model.add(Conv2D(120, kernel_size=(2, 2), strides=(1, 1), activation='relu', padding='valid'))

    #Flatten the CNN output so that we can connect it with fully connected layers
    model.add(Flatten())

    # FC6 Fully Connected Layer
    model.add(Dense(84, activation='relu'))

    #Output Layer with softmax activation
    model.add(Dense(1, activation='relu'))
    
    #Tune the learning rate for the optimizer
    #Choose an optimal value from 0.01 or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
    
    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate = hp_learning_rate), metrics=["mse",r_square], loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True)) 
    
    return model


In [25]:
#tuner object
tuner = RandomSearch(
    build_model, objective = "mse", max_trials = 1, executions_per_trial = 1, directory = LOG_DIR
)

#tuner search
hist = tuner.search(x=X_train, y=y_train, epochs=10, batch_size=10, validation_data=(X_test,y_test), verbose=1)

Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


Traceback (most recent call last):
  File "C:\Users\aisha\miniconda3\envs\tensorflow\lib\site-packages\kerastuner\engine\hypermodel.py", line 105, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-24-812c7f665762>", line 33, in build_model
    hp_learning_rate = hp.Choice('learning_rate', values = float64)
NameError: name 'float64' is not defined


RuntimeError: Too many failed attempts to build model.

In [None]:
print(hist)

In [None]:
print(tuner.get_best_hyperparameters()[0].values)
print(tuner.results_summary())
print(tuner.get_best_models()[0].summary())

In [None]:
hp = kt.HyperParameters()
model = build_model(hp)
test_score = model.evaluate(X_test, y_test)
print("Test loss {:.4f}, accuracy {:.2f}%".format(test_score[0], test_score[1] * 100))

In [None]:
f, ax = plt.subplots()
ax.plot([None] + model.history['mse'], 'o-')
ax.plot([None] + model.history['val_mse'], 'x-')

# Plot legend and use the best location automatically: loc = 0.
ax.legend(['Train mse', 'Validation mse'], loc = 0)
ax.set_title('Training/Validation acc per Epoch')
ax.set_xlabel('Epoch')
ax.set_ylabel('mse')

In [None]:
f, ax = plt.subplots()
ax.plot([None] + hist.history['loss'], 'o-')
ax.plot([None] + hist.history['val_loss'], 'x-')

# Plot legend and use the best location automatically: loc = 0.
ax.legend(['Train Loss', 'Validation Loss'], loc = 0)
ax.set_title('Training/Validation Loss per Epoch')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')

In [None]:
#when you compile the model, add your custom loss function as a metric
model.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [None]:
plot from history
plt.plot(hist.history['r_square'])
plt.plot(hist.history['val_r_square'])
plt.title('R Square')
plt.ylabel('r_square')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')