In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Flow of Code:
#### 1. Read both datasets, and do data preparation: Scaling, One-hot Encoding
#### 2. For limited data, train  model on Temp, Hum, Dsw(4), Shade(4), Encoding input features (find hyperparameters analytically)
#### 3. For big data, train model on Temp, Hum, Encoding 
#### 4. Fine tune weather station model on microclimate data and compare performance
#### 5. Work on adding artificial features and dropout on microclimate model

In [50]:
# Load the TensorBoard notebook extension.

%reload_ext tensorboard
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.keras import initializers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.optimizer_v2.adam import Adam
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from tensorflow.keras import backend as K
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import pathlib
import pickle
import pathlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from datetime import datetime
import seaborn as sns
import tensorboard
tensorboard.__version__

'2.6.0'

In [55]:
class NeuralNetwork():
    def __init__(self,activation_type, hidden_layers, loss, kernel_initializer, bias_initializer, data_type,epochs):
        self.activation_type = activation_type
        self.hidden_layers = hidden_layers
        self.loss = loss
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer 
        self.data_type = data_type
        self.epochs = epochs
    def huber_loss(y_true, y_pred, clip_delta=1.0):
        error = y_true - y_pred
        cond  = tf.keras.backend.abs(error) < clip_delta
        squared_loss = 0.5 * tf.keras.backend.square(error)
        linear_loss  = clip_delta * (tf.keras.backend.abs(error) - 0.5 * clip_delta)
        return tf.where(cond, squared_loss, linear_loss)
    def huber_loss_mean(y_true, y_pred, clip_delta=1.0):
        return tf.keras.backend.mean(huber_loss(y_true, y_pred, clip_delta))
    def prepare_data(self, dataset, func):
        Y = dataset['CHWTON/SQFT']
        if self.data_type == 'WeatherStation'  and (func == 'test' or func == 'train'):
            X = dataset[['Air Temp', 'Abs Hum']]
        if self.data_type == 'Microclimate' and func == 'train':
            X = dataset[['Air Temp', 
                         'Abs Hum', 
                         'DSW North', 
                         'DSW South', 
                         'DSW East', 
                         'DSW West', 
                         'Shade North', 
                         'Shade East', 
                         'Shade West',
                         'Shade South']]
        if self.data_type == 'Microclimate' and func == 'test':
            X = dataset[['Air Temp', 
                         'Abs Hum']]
        X_bldg_name = dataset[['bldgname']]
        Y = Y.values.reshape(-1, 1)
        #create one-hot encoding for all buildings
        one_hot_encoder = OneHotEncoder(sparse=False)
        one_hot_encoder.fit(X_bldg_name)
        X_bldg_name_encoded = one_hot_encoder.transform(X_bldg_name)
        X_bldg_name_encoded = pd.DataFrame(data=X_bldg_name_encoded, columns=one_hot_encoder.categories_)
        X_bldg_name_encoded = np.asarray(X_bldg_name_encoded).astype('float32')
        # Normalize input and target values
        scaler1 = StandardScaler()
        X_scaled = scaler1.fit_transform(X)
        scaler2 = StandardScaler()
        Y_scaled = scaler2.fit_transform(Y)
        # add categorical to numeric data
        X_final = np.concatenate((X_bldg_name_encoded, X_scaled), axis =1)
        X_final = np.asarray(X_final).astype('float32')
        Y_scaled = np.asarray(Y_scaled).astype('float32')
        X_train, X_test, y_train, y_test = train_test_split(X_final, Y_scaled, 
                                                           test_size=0.2, 
                                                           random_state=20)
        return X_train, X_test, y_train, y_test
    def define_model(self):
        if self.data_type == 'WeatherStation' :
            model = Sequential()
            if self.activation_type == 'leaky_relu':
                model.add(Dense(20, 
                                input_dim=13, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(30, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            else:
                model.add(Dense(30, 
                                input_dim=13, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            
            
        if self.data_type == 'Microclimate':
            model = Sequential()
            if self.activation_type == 'leaky_relu':
                model.add(Dense(30, 
                                input_dim=21, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            else:
                model.add(Dense(30, 
                                input_dim=21, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
        model.compile( loss = self.loss, 
                      optimizer = 'adam')
        model.summary()
        return model
    def train(self, dataset):
        X_train, X_test, y_train, y_test = self.prepare_data(dataset,'train')
        self.model = self.define_model()
        # Define the Keras TensorBoard callback.
        logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
        self.history = self.model.fit(X_train,y_train, 
                            batch_size=128, 
                            epochs=self.epochs, 
                            verbose=0, 
                            validation_split=0.2, 
                            callbacks=[tensorboard_callback])
    def fine_tune(self, dataset,alpha=0.001):
        self.model.trainable = True
        fine_tune_at = 2
        for layer in self.model.layers[:fine_tune_at]:
            layer.trainable =  False
        opt= Adam(learning_rate=alpha)
        self.model.compile(loss=self.loss, 
                           optimizer=opt)
        X_train, X_test, y_train, y_test = self.prepare_data(dataset,'test')
        # Define the Keras TensorBoard callback.
        logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
        self.history = self.model.fit(X_train,y_train, 
                            batch_size=128, 
                            epochs=self.epochs, 
                            verbose=0, 
                            validation_split=0.2, 
                            callbacks=[tensorboard_callback])
        
        
    def plot_curve(self):
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.show()
    def evaluate(self, dataset, data_type ):
        call_type = 'test'
        if data_type == self.data_type:
            call_type = 'train'
        _, X_test, _, y_test = self.prepare_data(dataset,call_type)
        Y_preds = self.model.predict(X_test)
        RMSE = np.sqrt(metrics.mean_squared_error(y_test, Y_preds))
        R2_score = metrics.r2_score(y_test, Y_preds)
        print("The RMSE score for test dataset of "+ data_type +" is:", RMSE)
        print("The R2 score for test dataset of "+ data_type +" is:", R2_score)
        return RMSE,R2_score
        
                

In [46]:
MicroclimateData = pd.read_csv('/content/drive/Shareddrives/Microclimate-Building Energy/Data/microclimate_model/Combined/all_buildings_limited.csv')
WeatherStationData = pd.read_csv('/content/drive/Shareddrives/Microclimate-Building Energy/Data/NN_big_data/Combined/all_buildings_big.csv')
if __name__ == "__main__":
    # Train microclimate Model
    MicroclimateModel = NeuralNetwork('relu',4, 'huber_loss', 'he_normal','he_normal', 'Microclimate', 200)
    MicroclimateModel.train(MicroclimateData)
    _,_ = MicroclimateModel.evaluate(MicroclimateData, 'Microclimate')
    
    _,xTest,_,yTest = MicroclimateModel.prepare_data(MicroclimateData,'train')
    yPred = MicroclimateModel.model.predict(xTest)
    
    


Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_70 (Dense)             (None, 30)                660       
_________________________________________________________________
dense_71 (Dense)             (None, 40)                1240      
_________________________________________________________________
dense_72 (Dense)             (None, 40)                1640      
_________________________________________________________________
dense_73 (Dense)             (None, 20)                820       
_________________________________________________________________
dense_74 (Dense)             (None, 1)                 21        
Total params: 4,381
Trainable params: 4,381
Non-trainable params: 0
_________________________________________________________________
The RMSE score for test dataset of Microclimateis: 0.13632365
The R2 score for test dataset of Microclimateis: 0.9808

In [47]:
    y_tes = [x[0] for x in yTest]    
    y_pre = [x[0] for x in yPred]
    ModelPred = pd.DataFrame({'Actual CHWTON':y_tes, 'Predicted CHWTON':y_pre})
    ModelPred

Unnamed: 0,Actual CHWTON,Predicted CHWTON
0,-1.031620,-1.103264
1,1.685585,1.559486
2,1.434169,1.219953
3,1.121287,1.256245
4,0.741267,0.856180
...,...,...
1913,-0.843508,-0.863981
1914,1.545410,1.773315
1915,1.882579,2.260026
1916,1.285109,1.196050


In [48]:
# Train Weather Station Model
WeatherStationModel = NeuralNetwork('relu',4, 'huber_loss', 'he_normal','he_normal', 'WeatherStation', 100)
WeatherStationModel.train(WeatherStationData)
_,_ = WeatherStationModel.evaluate(MicroclimateData, 'Microclimate')


Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_75 (Dense)             (None, 30)                420       
_________________________________________________________________
dense_76 (Dense)             (None, 40)                1240      
_________________________________________________________________
dense_77 (Dense)             (None, 40)                1640      
_________________________________________________________________
dense_78 (Dense)             (None, 20)                820       
_________________________________________________________________
dense_79 (Dense)             (None, 1)                 21        
Total params: 4,141
Trainable params: 4,141
Non-trainable params: 0
_________________________________________________________________
The RMSE score for test dataset of Microclimateis: 0.5034203
The R2 score for test dataset of Microclimateis: 0.73845

In [59]:
# Fine Tune and compare results
l_rates = [0.001,0.00125,0.0015,0.00175,0.002,0.003,0.01]
R2_max=0
RMSE_min=1
lr=0
for a in l_rates:
  print("For learning rate :"+str(a))
  WeatherStationModel.fine_tune(MicroclimateData,alpha=a)
  RMSE_fine_tune,R2_fine_tune = WeatherStationModel.evaluate(MicroclimateData, 'Microclimate')
  if R2_fine_tune>R2_max and RMSE_fine_tune<RMSE_min:
    lr=a
    R2_max=R2_fine_tune
    RMSE_min=RMSE_fine_tune
print("For final learning rate :"+str(lr))
#MicroClimate Data
print("The RMSE score for test dataset of Microclimate is:", RMSE_min)
print("The R2 score for test dataset of Microclimate is:", R2_max)
#Weather Data
Rm,R2=WeatherStationModel.evaluate(WeatherStationData, 'Weatherstation')



For learning rate :0.001
The RMSE score for test dataset of Microclimateis: 0.14760561
The R2 score for test dataset of Microclimateis: 0.9775149409672151
For learning rate :0.00125
The RMSE score for test dataset of Microclimateis: 0.14565828
The R2 score for test dataset of Microclimateis: 0.9781043068730914
For learning rate :0.0015
The RMSE score for test dataset of Microclimateis: 0.14662717
The R2 score for test dataset of Microclimateis: 0.9778120499774631
For learning rate :0.00175
The RMSE score for test dataset of Microclimateis: 0.14871268
The R2 score for test dataset of Microclimateis: 0.9771763929077267
For learning rate :0.002
The RMSE score for test dataset of Microclimateis: 0.14461781
The R2 score for test dataset of Microclimateis: 0.9784160056388996
For learning rate :0.003
The RMSE score for test dataset of Microclimateis: 0.14986454
The R2 score for test dataset of Microclimateis: 0.9768214586978001
For learning rate :0.01
The RMSE score for test dataset of Microc

In [None]:
%tensorboard --logdir logs