In [1]:
# import library
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam
import IPython
import kerastuner as kt
from kerastuner import HyperParameters
import matplotlib.pyplot as plt

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8513793104071128899
]


In [3]:
data = pd.read_csv("temp_matrix (25).csv")
#check the data types
data.dtypes

timestamp        int64
date            object
time            object
day             object
irradiance     float64
temperature    float64
0              float64
1              float64
2              float64
3              float64
4              float64
5              float64
6              float64
7              float64
8              float64
9              float64
10             float64
11             float64
12             float64
13             float64
14             float64
15             float64
16             float64
17             float64
18             float64
19             float64
20             float64
21             float64
22             float64
23             float64
24             float64
25             float64
26             float64
27             float64
28             float64
29             float64
30             float64
31             float64
pmax           float64
SCC              int64
panel           object
dtype: object

In [4]:
#clean data:
clean = data[data.panel=='clean']

#seperating data into frames:

#These are columns that don't need to be normalized
clean1 = clean[['timestamp','date','time','day','panel']]

#everything else minus the timestamp column does need to be normalized
clean2 = clean.select_dtypes(include = ['float64','int64'])
clean2 = clean2.drop(columns=['timestamp'])
clean2

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
0,557.80,28.21,31.463,,,28.36625,,30.656,29.605,,...,,26.068,25.936,,26.624,,,25.9125,8801.920,673
2,787.00,33.99,45.556,,,46.98625,,47.513,48.333,,...,,46.507,47.690,,44.890,,,47.0325,56452.320,4520
4,916.77,32.31,49.921,,,52.91900,,52.556,53.696,,...,,51.588,53.025,,49.340,,,52.6700,61858.730,5299
6,1033.98,34.46,55.106,,,,,56.144,56.764,,...,,55.483,56.408,,53.876,,,,66282.240,5963
8,1030.84,35.25,55.494,,,55.81500,,55.433,55.040,,...,,53.158,53.786,,51.770,,,52.9100,67181.928,5990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3010,3.14,38.30,,,,,,,,,...,,,,,,,,,43498.806,4026
3012,3.14,38.30,,,,,,,,,...,,,,,,,,,32653.666,2777
3014,3.14,38.30,,,,,,,,,...,,,,,,,,,9643.520,779
3016,3.14,38.30,,,,,,,,,...,,,,,,,,,1429.268,224


In [5]:
#these columns are broken sensors, need to be replaced with 0's:
#01,02,04,07,12,15,16,19,24,27,29,30
clean2.update(clean2[['1','2','4','7','12','15','16','19','24','27','29','30']].fillna(0))
clean2.head()

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
0,557.8,28.21,31.463,0.0,0.0,28.36625,0.0,30.656,29.605,0.0,...,0.0,26.068,25.936,0.0,26.624,0.0,0.0,25.9125,8801.92,673
2,787.0,33.99,45.556,0.0,0.0,46.98625,0.0,47.513,48.333,0.0,...,0.0,46.507,47.69,0.0,44.89,0.0,0.0,47.0325,56452.32,4520
4,916.77,32.31,49.921,0.0,0.0,52.919,0.0,52.556,53.696,0.0,...,0.0,51.588,53.025,0.0,49.34,0.0,0.0,52.67,61858.73,5299
6,1033.98,34.46,55.106,0.0,0.0,,0.0,56.144,56.764,0.0,...,0.0,55.483,56.408,0.0,53.876,0.0,0.0,,66282.24,5963
8,1030.84,35.25,55.494,0.0,0.0,55.815,0.0,55.433,55.04,0.0,...,0.0,53.158,53.786,0.0,51.77,0.0,0.0,52.91,67181.928,5990


In [6]:
#replacing the remainder NaNs with meaningful values (not 0s)
#by default this will be a linear interpolation unless a different method is specified

new_clean2 = clean2.interpolate()
new_clean2

Unnamed: 0,irradiance,temperature,0,1,2,3,4,5,6,7,...,24,25,26,27,28,29,30,31,pmax,SCC
0,557.80,28.21,31.4630,0.0,0.0,28.36625,0.0,30.6560,29.6050,0.0,...,0.0,26.068,25.9360,0.0,26.624,0.0,0.0,25.9125,8801.920,673
2,787.00,33.99,45.5560,0.0,0.0,46.98625,0.0,47.5130,48.3330,0.0,...,0.0,46.507,47.6900,0.0,44.890,0.0,0.0,47.0325,56452.320,4520
4,916.77,32.31,49.9210,0.0,0.0,52.91900,0.0,52.5560,53.6960,0.0,...,0.0,51.588,53.0250,0.0,49.340,0.0,0.0,52.6700,61858.730,5299
6,1033.98,34.46,55.1060,0.0,0.0,54.36700,0.0,56.1440,56.7640,0.0,...,0.0,55.483,56.4080,0.0,53.876,0.0,0.0,52.7900,66282.240,5963
8,1030.84,35.25,55.4940,0.0,0.0,55.81500,0.0,55.4330,55.0400,0.0,...,0.0,53.158,53.7860,0.0,51.770,0.0,0.0,52.9100,67181.928,5990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3010,3.14,38.30,35.4225,0.0,0.0,35.53250,0.0,35.3775,35.3775,0.0,...,0.0,35.095,35.0775,0.0,35.190,0.0,0.0,34.9875,43498.806,4026
3012,3.14,38.30,35.4225,0.0,0.0,35.53250,0.0,35.3775,35.3775,0.0,...,0.0,35.095,35.0775,0.0,35.190,0.0,0.0,34.9875,32653.666,2777
3014,3.14,38.30,35.4225,0.0,0.0,35.53250,0.0,35.3775,35.3775,0.0,...,0.0,35.095,35.0775,0.0,35.190,0.0,0.0,34.9875,9643.520,779
3016,3.14,38.30,35.4225,0.0,0.0,35.53250,0.0,35.3775,35.3775,0.0,...,0.0,35.095,35.0775,0.0,35.190,0.0,0.0,34.9875,1429.268,224


In [7]:
#concatinating the dataframes:

clean_df = pd.concat([clean1, new_clean2], axis=1)

In [8]:
#seperating the newly revised data into frames for the independent & dependent variables
#the colon indicates all rows, and '00':'31' indicates all the columns from 00 to 31

clean_ind = clean_df.loc[:,'0':'31'] 
clean_dep = clean_df.loc[:,'pmax']

#reshaping the independent dataframe into 2D
clean_ind = clean_ind.to_numpy() #converting it to a numpy array
clean_ind[0].reshape(-1,4)


print(clean_ind.ndim)

#reshaping the dependent dataframes into 1D with the first value matching the first value of the 2D array
clean_dep = clean_dep.to_numpy() #converting it to a numpy array
clean_dep = clean_dep.reshape(-1,1)
print(clean_dep.ndim)

2
2


In [9]:
#Splitting the data into the Training and the Testing set
X_train,X_test,y_train,y_test,=train_test_split(clean_ind,clean_dep,test_size=0.2,random_state=0)

In [10]:
print(X_train)

[[58.983       0.          0.         ...  0.          0.
  61.57777778]
 [35.4225      0.          0.         ...  0.          0.
  34.9875    ]
 [35.4225      0.          0.         ...  0.          0.
  34.9875    ]
 ...
 [35.4225      0.          0.         ...  0.          0.
  34.9875    ]
 [57.513       0.          0.         ...  0.          0.
  53.935     ]
 [36.1425      0.          0.         ...  0.          0.
  35.6875    ]]


In [11]:
#Feature scaleing using the fit transform function to calibrate the measurement so that the training and test data have a similar shape

#create MinMaxScaler objects
MinMaxScaler = preprocessing.MinMaxScaler(feature_range=(0,1))
X_train = MinMaxScaler.fit_transform(X_train)
X_test = MinMaxScaler.fit_transform(X_test)

y_train = MinMaxScaler.fit_transform(y_train)
y_test = MinMaxScaler.fit_transform(y_test)

In [12]:
X_train = X_train.reshape(X_train.shape[0], 8,4,1)
X_train.ndim

4

In [13]:
X_test = X_test.reshape(X_test.shape[0], 8,4,1)
X_test.ndim

4

In [14]:
y_train.ndim

2

In [15]:
y_test.ndim

2

In [16]:
#Reserving 10000 samples for validation
X_val = X_train [-10000:]
y_val = y_train [-10000:]

In [17]:
#custome loss functions for r_square
def r_square(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return (1 - SS_res/(SS_tot + K.epsilon()))

def r_square_loss(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return 1 - ( 1 - SS_res/(SS_tot + K.epsilon()))

In [18]:
#Instantiate an empty model
def get_compiled_model(hp):
    model = Sequential() 
    # C1 Convolutional Layer
    model.add(Conv2D(6, kernel_size=(2, 2), strides=(1, 1), activation='relu', input_shape=(8,4,1), padding="same"))
    
    # C3 Convolutional Layer
    model.add(Conv2D(16, kernel_size=(2, 2), strides=(1, 1), activation='relu', padding='valid'))

    # C5 Fully Connected Convolutional Layer
    model.add(Conv2D(120, kernel_size=(2, 2), strides=(1, 1), activation='relu', padding='valid'))

    #Flatten the CNN output so that we can connect it with fully connected layers
    model.add(Flatten())
    
    #FC6 Fully Connected Layer
        #tune the number of units in the first Dense layer
    model.add(Dense(84, activation='relu'))
    
    #Output Layer with softmax activation
    model.add(Dense(1, activation='relu'))
    
    model.compile(
        loss=r_square, 
        optimizer=keras.optimizers.Adam(), 
        metrics=["mse", r_square]
        )
        
    return model

In [19]:
#create a model instance
model = get_compiled_model(hp)

#create the tuner instance
#tuner= kt.Hyperband(model_builder,
 #                   objective = 'val_accuracy',
 #                   max_epochs=10,
 #                   factor=3,
 #                   directory='checkpoint_dir',
 #                   project_name='Solar_Panel_Monitoring')
                    

NameError: name 'hp' is not defined

In [None]:
#create a callback that saved the model's weights
cp_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                              verbose=1)
#train the model with callback
hist = model.fit(
    X_train,
    y_train, 
    epochs=100, 
    batch_size= 10,
    validation_data=(X_val, y_val), 
    callbacks=[cp_callback],
    verbose=1)

In [None]:
#evaluating the model on the rest data
test_score = model.evaluate(X_test, y_test, batch_size=20)
print("Test loss {:.4f}, accuracy {:.2f}%".format(test_score[0], test_score[1] * 100))

In [None]:
#generate predications
print("Generate predictions for 3 samples:")
predictions = model.predict(X_test[:3])
print("predictions shape:", predictions.shape)
model.summary()

In [None]:
f, ax = plt.subplots()
ax.plot([None] + hist.history['mse'], 'o-')
ax.plot([None] + hist.history['val_mse'], 'x-')
# Plot legend and use the best location automatically: loc = 0.
ax.legend(['Train mse', 'Validation mse'], loc = 0)
ax.set_title('Training/Validation acc per Epoch')
ax.set_xlabel('Epoch')
ax.set_ylabel('mse')

In [None]:
f, ax = plt.subplots()
ax.plot([None] + hist.history['loss'], 'o-')
ax.plot([None] + hist.history['val_loss'], 'x-')
# Plot legend and use the best location automatically: loc = 0.
ax.legend(['Train Loss', 'Validation Loss'], loc = 0)
ax.set_title('Training/Validation Loss per Epoch')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')

In [None]:
#plot from history
plt.plot(hist.history['r_square'])
plt.plot(hist.history['val_r_square'])
plt.title('R Square')
plt.ylabel('r_square')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')

In [None]:
ls {checkpoint_dir}

In [None]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

In [None]:
#load the weights
model.load_weights(latest)

#Re-evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

In [None]:
X_test.shape

In [None]:
y_test.shape