## Flow of Code:
#### 1. Read both datasets, and do data preparation: Scaling, One-hot Encoding
#### 2. For limited data, train  model on Temp, Hum, Dsw(4), Shade(4), Encoding input features (find hyperparameters analytically)
#### 3. For big data, train model on Temp, Hum, Encoding 
#### 4. Fine tune weather station model on microclimate data and compare performance
#### 5. Work on adding artificial features and dropout on microclimate model

In [3]:
# Load the TensorBoard notebook extension.

%reload_ext tensorboard
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.keras import initializers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from tensorflow.keras import backend as K
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import pathlib
import pickle
import pathlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from datetime import datetime
import seaborn as sns
import tensorboard
tensorboard.__version__

'2.4.0'

In [30]:
class NeuralNetwork():
    def __init__(self,activation_type, hidden_layers, loss, kernel_initializer, bias_initializer, data_type,epochs):
        self.activation_type = activation_type
        self.hidden_layers = hidden_layers
        self.loss = loss
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer 
        self.data_type = data_type
        self.epochs = epochs
    def huber_loss(y_true, y_pred, clip_delta=1.0):
        error = y_true - y_pred
        cond  = tf.keras.backend.abs(error) < clip_delta
        squared_loss = 0.5 * tf.keras.backend.square(error)
        linear_loss  = clip_delta * (tf.keras.backend.abs(error) - 0.5 * clip_delta)
        return tf.where(cond, squared_loss, linear_loss)
    def huber_loss_mean(y_true, y_pred, clip_delta=1.0):
        return tf.keras.backend.mean(huber_loss(y_true, y_pred, clip_delta))
    def prepare_data(self, dataset, func):
        Y = dataset['CHWTON/SQFT']
        if self.data_type == 'WeatherStation'  and (func == 'test' or func == 'train'):
            X = dataset[['Air Temp', 'Abs Hum']]
        if self.data_type == 'Microclimate' and func == 'train':
            X = dataset[['Air Temp', 
                         'Abs Hum', 
                         'DSW North', 
                         'DSW South', 
                         'DSW East', 
                         'DSW West', 
                         'Shade North', 
                         'Shade East', 
                         'Shade West',
                         'Shade South']]
        if self.data_type == 'Microclimate' and func == 'test':
            X = dataset[['Air Temp', 
                         'Abs Hum']]
        X_bldg_name = dataset[['bldgname']]
        Y = Y.values.reshape(-1, 1)
        #create one-hot encoding for all buildings
        one_hot_encoder = OneHotEncoder(sparse=False)
        one_hot_encoder.fit(X_bldg_name)
        X_bldg_name_encoded = one_hot_encoder.transform(X_bldg_name)
        X_bldg_name_encoded = pd.DataFrame(data=X_bldg_name_encoded, columns=one_hot_encoder.categories_)
        X_bldg_name_encoded = np.asarray(X_bldg_name_encoded).astype('float32')
        # Normalize input and target values
        scaler1 = StandardScaler()
        X_scaled = scaler1.fit_transform(X)
        scaler2 = StandardScaler()
        Y_scaled = scaler2.fit_transform(Y)
        # add categorical to numeric data
        X_final = np.concatenate((X_bldg_name_encoded, X_scaled), axis =1)
        X_final = np.asarray(X_final).astype('float32')
        Y_scaled = np.asarray(Y_scaled).astype('float32')
        X_train, X_test, y_train, y_test = train_test_split(X_final, Y_scaled, 
                                                           test_size=0.2, 
                                                           random_state=20)
        return X_train, X_test, y_train, y_test
    def define_model(self):
        if self.data_type == 'WeatherStation' :
            model = Sequential()
            if self.activation_type == 'leaky_relu':
                model.add(Dense(20, 
                                input_dim=13, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(30, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            else:
                model.add(Dense(30, 
                                input_dim=13, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            
            
        if self.data_type == 'Microclimate':
            model = Sequential()
            if self.activation_type == 'leaky_relu':
                model.add(Dense(30, 
                                input_dim=21, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
            else:
                model.add(Dense(30, 
                                input_dim=21, 
                                kernel_initializer=self.kernel_initializer, 
                                bias_initializer=self.bias_initializer, 
                                activation= self.activation_type))
                for i in range(0,(self.hidden_layers-2)):
                    model.add(Dense(40, activation = self.activation_type))
                model.add(Dense(20, activation = self.activation_type))
                model.add(Dense(1, activation = 'linear'))
        model.compile( loss = self.loss, 
                      optimizer = 'adam')
        model.summary()
        return model
    def train(self, dataset):
        X_train, X_test, y_train, y_test = self.prepare_data(dataset,'train')
        self.model = self.define_model()
        # Define the Keras TensorBoard callback.
        logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
        self.history = self.model.fit(X_train,y_train, 
                            batch_size=128, 
                            epochs=self.epochs, 
                            verbose=0, 
                            validation_split=0.2, 
                            callbacks=[tensorboard_callback])
    def fine_tune(self, dataset):
        self.model.trainable = True
        fine_tune_at = 2
        for layer in self.model.layers[:fine_tune_at]:
            layer.trainable =  False
        self.model.compile(loss=self.loss, 
                           optimizer='adam')
        X_train, X_test, y_train, y_test = self.prepare_data(dataset,'test')
        # Define the Keras TensorBoard callback.
        logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
        self.history = self.model.fit(X_train,y_train, 
                            batch_size=128, 
                            epochs=self.epochs, 
                            verbose=0, 
                            validation_split=0.2, 
                            callbacks=[tensorboard_callback])
        
        
    def plot_curve(self):
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.show()
    def evaluate(self, dataset, data_type ):
        call_type = 'test'
        if data_type == self.data_type:
            call_type = 'train'
        _, X_test, _, y_test = self.prepare_data(dataset,call_type)
        Y_preds = self.model.predict(X_test)
        RMSE = np.sqrt(metrics.mean_squared_error(y_test, Y_preds))
        R2_score = metrics.r2_score(y_test, Y_preds)
        print("The RMSE score for test dataset of "+ data_type +"is:", RMSE)
        print("The R2 score for test dataset of "+ data_type +"is:", R2_score)        

In [31]:
MicroclimateData = pd.read_csv('./all_buildings_limited.csv')
WeatherStationData = pd.read_csv('./all_buildings_big.csv')
if __name__ == "__main__":
    # Train microclimate Model
    MicroclimateModel = NeuralNetwork('relu',4, 'huber_loss', 'he_normal','he_normal', 'Microclimate', 200)
    MicroclimateModel.train(MicroclimateData)
    MicroclimateModel.evaluate(MicroclimateData, 'Microclimate')
    
    
    


Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_55 (Dense)             (None, 30)                660       
_________________________________________________________________
dense_56 (Dense)             (None, 40)                1240      
_________________________________________________________________
dense_57 (Dense)             (None, 40)                1640      
_________________________________________________________________
dense_58 (Dense)             (None, 20)                820       
_________________________________________________________________
dense_59 (Dense)             (None, 1)                 21        
Total params: 4,381
Trainable params: 4,381
Non-trainable params: 0
_________________________________________________________________


2021-10-29 00:15:59.265556: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:15:59.265636: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.




2021-10-29 00:16:00.102251: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:16:00.102322: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.
2021-10-29 00:16:00.157408: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:223]  GpuTracer has collected 0 callback api events and 0 activity events. 
2021-10-29 00:16:00.160692: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/20211029-001559/train/plugins/profile/2021_10_29_00_16_00
2021-10-29 00:16:00.162209: I tensorflow/core/profiler/rpc/client/save_profile.cc:182] Dumped gzipped tool data for trace.json.gz to logs/fit/20211029-001559/train/plugins/profile/2021_10_29_00_16_00/biswas.trace.json.gz
2021-10-29 00:16:00.163823: I tensorflow/core/profiler/rpc/client/save_profile.c

The RMSE score for test dataset of Microclimateis: 0.13961779
The R2 score for test dataset of Microclimateis: 0.9798826969943879


In [32]:
# Train Weather Station Model
WeatherStationModel = NeuralNetwork('relu',4, 'huber_loss', 'he_normal','he_normal', 'WeatherStation', 100)
WeatherStationModel.train(WeatherStationData)
WeatherStationModel.evaluate(MicroclimateData, 'Microclimate')


Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_60 (Dense)             (None, 30)                420       
_________________________________________________________________
dense_61 (Dense)             (None, 40)                1240      
_________________________________________________________________
dense_62 (Dense)             (None, 40)                1640      
_________________________________________________________________
dense_63 (Dense)             (None, 20)                820       
_________________________________________________________________
dense_64 (Dense)             (None, 1)                 21        
Total params: 4,141
Trainable params: 4,141
Non-trainable params: 0
_________________________________________________________________


2021-10-29 00:17:00.914003: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:17:00.914059: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.




2021-10-29 00:17:01.688106: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:17:01.688213: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.
2021-10-29 00:17:01.720885: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:223]  GpuTracer has collected 0 callback api events and 0 activity events. 
2021-10-29 00:17:01.730258: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/20211029-001700/train/plugins/profile/2021_10_29_00_17_01
2021-10-29 00:17:01.737240: I tensorflow/core/profiler/rpc/client/save_profile.cc:182] Dumped gzipped tool data for trace.json.gz to logs/fit/20211029-001700/train/plugins/profile/2021_10_29_00_17_01/biswas.trace.json.gz
2021-10-29 00:17:01.741221: I tensorflow/core/profiler/rpc/client/save_profile.c

The RMSE score for test dataset of Microclimateis: 0.5009891
The R2 score for test dataset of Microclimateis: 0.7409731764138219


In [33]:
# Fine Tune and compare results
WeatherStationModel.fine_tune(MicroclimateData)
WeatherStationModel.evaluate(MicroclimateData, 'Microclimate')
WeatherStationModel.evaluate(WeatherStationData, 'WeatherStation')

2021-10-29 00:22:04.974229: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:22:04.974303: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.




2021-10-29 00:22:05.736087: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-10-29 00:22:05.736177: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1441] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI could not be loaded or symbol could not be found.
2021-10-29 00:22:05.783886: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:223]  GpuTracer has collected 0 callback api events and 0 activity events. 
2021-10-29 00:22:05.787130: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/20211029-002204/train/plugins/profile/2021_10_29_00_22_05
2021-10-29 00:22:05.788484: I tensorflow/core/profiler/rpc/client/save_profile.cc:182] Dumped gzipped tool data for trace.json.gz to logs/fit/20211029-002204/train/plugins/profile/2021_10_29_00_22_05/biswas.trace.json.gz
2021-10-29 00:22:05.790039: I tensorflow/core/profiler/rpc/client/save_profile.c

The RMSE score for test dataset of Microclimateis: 0.15505601
The R2 score for test dataset of Microclimateis: 0.975187784048241
The RMSE score for test dataset of WeatherStationis: 0.5142654
The R2 score for test dataset of WeatherStationis: 0.7355653470084045


In [24]:
%tensorboard --logdir logs

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
Traceback (most recent call last):
  File "/usr/local/bin/tensorboard", line 8, in <module>
    from tensorboard.tensorboard.main import *
ModuleNotFoundError: No module named 'tensorboard'