In [176]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import random
import tensorflow.keras.utils as utils
import pydot
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from sklearn.ensemble import RandomForestRegressor
class Mics_Model:
    def __init__(self, dataset_dir, use_encoder=True, sampling_method="Vanilla", global_model="NN", group_number = 3):
        self.dataset_dir = dataset_dir
        self.use_encoder = use_encoder
        self.sampling_method = sampling_method
        self.global_model = global_model
        self.group_number = group_number
        self.raw_data = None
            
    #ASSUMPTION: column 0: index, column 1: labels, remaining columns are features. 
    def get_raw_data(self):
        raw_data = pd.read_csv(self.dataset_dir, index_col=0)
        raw_data = raw_data.fillna(raw_data.mean())
        raw_data = raw_data.sample(frac=1, random_state=41)
        self.raw_data = raw_data
        
    #This method assigns the feature number = column number - 1 (exclude label column). After that, it returns a list of
    #input feature numbers according to group count. Ex: for 28 cols, 27 features, 4 group_num: returns [7,7,6,7] 
    #Output of this function can be fed to get_model methods as inp_sizes input.
    def get_input_group_lenthgs(self):
        count = self.group_number
        input_sizes = [None]*count
        feature_num = len(self.raw_data.columns) - 1
        for i in range(count):
            group_size = round(feature_num/(count-i))
            input_sizes[i] = group_size
            feature_num = feature_num - group_size
        return input_sizes
    
    #This method returns grouped column numbers
    #[[1,4,5],[2,3,6]]
    def get_grouped_feature_cols(self):
        grouped_feature_cols = [None]*self.group_number
        feature_num = len(self.raw_data.columns) - 1
        inp_sizes = self.get_input_group_lenthgs()
        total_nums = [i for i in range(feature_num)]
        for j in range(len(inp_sizes)):
            size = inp_sizes[j]
            temp_list = random.sample(total_nums, size)
            grouped_feature_cols[j] = temp_list
            for k in temp_list:
                total_nums.remove(k)
        return grouped_feature_cols
    
    #groups is a list of lists [[1,4,5], [2,3,6]] which is output of get_grouped_feature_cols method
    #returns: [[train_x1, train_x2..., train_xn, train_y],
    #          [test_x1, test_x2..., test_xn, test_y]]
    def get_features_and_labels(self, groups):
        row_num = len(self.raw_data.index)
        
        trainx_df = self.raw_data.iloc[:int(0.8*row_num), 1:]
        trainy_df = self.raw_data.iloc[:int(0.8*row_num), 0]
        testx_df = self.raw_data.iloc[int(0.8*row_num):, 1:]
        testy_df = self.raw_data.iloc[int(0.8*row_num):, 0]        
        
        scaler = StandardScaler()
        trainx_scaled = pd.DataFrame(scaler.fit_transform(trainx_df), columns = trainx_df.columns, index = trainx_df.index)
        textx_scaled = pd.DataFrame(scaler.transform(testx_df), columns = testx_df.columns, index = testx_df.index)
        
        features_and_labels = [[None for _ in range(self.group_number + 1)] for _ in range(2)]
        
        for index, group in enumerate(groups):
            train_temp = trainx_scaled.iloc[:,group]
            features_and_labels[0][index] = train_temp.values
            test_temp = textx_scaled.iloc[:,group]
            features_and_labels[1][index] = test_temp.values            
        features_and_labels[0][self.group_number] = trainy_df.values
        features_and_labels[1][self.group_number] = testy_df.values   
        return features_and_labels
    
    #returns [[train_x1, train_x2..., train_xn, train_y],
    #         [test_x1, test_x2..., test_xn, test_y]]
    
    def get_vanilla_encoder_model(self, inp_size):
        inputs = keras.layers.Input(shape=(inp_size))
        h1 = keras.layers.Dense(10, activation="relu")(inputs)
        h1 = keras.layers.Dense(10, activation="relu")(inputs)        
        outputs = keras.layers.Dense(inp_size, activation="relu")(h1)
        return keras.Model(inputs,outputs)
    
    #This subclass is created for sampling for a given mean and log_variance.
    class Sampling(layers.Layer):
        def call(self, inputs):
            z_mean, z_log_var = inputs
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon #multiplies with std
    
    def get_variatonal_encoder_model(self, inp_size):
        inputs = layers.Input(shape=(inp_size))
        h1 = layers.Dense(10, activation="relu")(inputs)
        z_mean = layers.Dense(inp_size, name="z_mean")(h1)
        z_log_var = layers.Dense(inp_size, name="z_log_var")(h1)
        outputs = self.Sampling()([z_mean, z_log_var])
        return keras.Model(inputs,outputs)
    #New sampling methods can be added here 
    
    def get_nn_model(self, inp_sizes, drop_out=0.25, hidden_num = 4, hidden_size=32):
        inp_group_count = len(inp_sizes)
        inputs = [None]*inp_group_count
        for i in range(inp_group_count):
            inputs[i] = keras.layers.Input(shape=(inp_sizes[i]), name="input_"+str(i))
        if self.use_encoder == True:
            encoders = [None]*inp_group_count
            if self.sampling_method == "Vanilla":
                for j in range(inp_group_count):
                    encoders[j] = self.get_vanilla_encoder_model(inp_sizes[j])
            elif self.sampling_method == "Variational":
                for j in range(inp_group_count):
                    encoders[j] = self.get_variatonal_encoder_model(inp_sizes[j])
            #This place can be extended if new sampling methods are added.
            global_inputs = [None]*inp_group_count
            for k in range(inp_group_count):
                global_inputs[k] = encoders[k](inputs[k])
            global_input = keras.layers.concatenate(global_inputs)
        else:
            global_input = keras.layers.concatenate(inputs)
            
        h = keras.layers.Dense(hidden_size, activation="relu", kernel_regularizer=regularizers.l1_l2(l1=1e-4, l2=1e-3))(global_input)
        h = keras.layers.Dropout(drop_out)(h)
        for hidden in range(hidden_num):
            h = keras.layers.Dense(hidden_size, activation="relu", kernel_regularizer=regularizers.l1_l2(l1=1e-4, l2=1e-3))(h)
            h = keras.layers.Dropout(drop_out)(h) 

        outputs = keras.layers.Dense(1, activation="relu")(h)    
        return keras.Model(inputs=inputs, outputs = outputs) 
    
    def default_exp(self):
        inp_sizes = self.get_input_group_lenthgs()
        groups = self.get_grouped_feature_cols()
        features_and_labels = self.get_features_and_labels(groups)
        MICS_model = self.get_nn_model(inp_sizes=inp_sizes)
        callback = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50), 
                keras.callbacks.ReduceLROnPlateau("val_loss", factor = 0.8, patience=30,
                                                 verbose = 2, mode = "auto", 
                                                  min_lr = 1e-6)]
        MICS_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss=keras.losses.MeanSquaredError())
        history = MICS_model.fit(x = features_and_labels[0][:-1], y = features_and_labels[0][-1],  
                                 validation_data = (features_and_labels[1][:-1], features_and_labels[1][-1]),
                                 epochs=300, batch_size = 300, callbacks=callback)
        training_val_loss = history.history["val_loss"]
        best_row_index = np.argmin(training_val_loss)
        best_val_loss = training_val_loss[best_row_index]
        print(best_val_loss)
                


In [182]:
dataset_dir = "./Datasets/energydata_use.csv"
deneyelim = Mics_Model(dataset_dir, use_encoder=False, group_number=3)
deneyelim.get_raw_data()
deneyelim.default_exp()

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

Unnamed: 0_level_0,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,...,T8,RH_8,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-04-30 15:10:00,90,0,21.89,35.633333,21.2,34.256667,24.493333,35.393333,20.1,37.066667,...,22.7,38.09,19.39,38.363333,10.533333,759.65,59.5,5.166667,40.0,2.933333
2016-05-15 18:30:00,100,30,23.6,37.59,22.18,37.09,24.29,35.564286,24.2,34.59,...,25.79,35.0,22.79,33.718,11.7,760.95,60.0,3.0,34.5,4.15
2016-03-10 19:50:00,150,20,22.0,39.693333,19.79,40.4,21.23,39.363333,21.39,35.9,...,22.426667,36.73,18.133333,39.933333,6.083333,761.283333,67.333333,2.0,65.0,0.45
2016-01-18 04:10:00,40,0,20.633333,40.53,19.0,40.4,20.26,40.29,20.0,38.0,...,20.89,47.59,17.6,40.933333,-4.0,761.3,85.666667,3.0,27.0,-6.133333
2016-01-25 11:40:00,30,0,20.066667,44.09,19.39,43.326667,20.2,44.29,18.2,45.56,...,18.088889,48.322778,16.633333,48.59,11.6,763.3,60.0,4.0,40.0,3.933333


In [129]:
groups=deneyelim.get_grouped_feature_cols()
groups

[[8, 19, 15, 0, 2, 10, 24, 9],
 [14, 11, 18, 13, 16, 6, 20, 4],
 [17, 12, 3, 1, 22, 23, 5, 7, 21]]

In [134]:
r = deneyelim.get_features_and_labels(groups)

9

In [106]:
deneyelim.get_features_and_labels(groups)

[[                         RH_6        T4        T8    RH_out        T5  \
  date                                                                    
  2016-01-11 17:00:00  0.769623 -0.761611 -1.924584  0.786817 -1.385479   
  2016-01-11 17:10:00  0.762633 -0.761611 -1.924584  0.786817 -1.385479   
  2016-01-11 17:20:00  0.729854 -0.810232 -1.924584  0.786817 -1.385479   
  2016-01-11 17:30:00  0.739495 -0.834542 -1.983431  0.786817 -1.385479   
  2016-01-11 17:40:00  0.792640 -0.834542 -1.983431  0.786817 -1.359694   
  ...                       ...       ...       ...       ...       ...   
  2016-04-30 07:30:00 -0.294374 -0.569336 -0.106203  1.096424  0.110056   
  2016-04-30 07:40:00 -0.287143 -0.609117 -0.106203  1.070624  0.058486   
  2016-04-30 07:50:00 -0.281118 -0.569336 -0.159166  1.044823  0.110056   
  2016-04-30 08:00:00 -0.300159 -0.569336 -0.159166  1.019022  0.161626   
  2016-04-30 08:10:00 -0.329323 -0.569336 -0.159166  0.928720  0.257031   
  
                      

In [56]:
a = [None]*3
b = [a,a]


In [57]:
b

[[None, None, None], [None, None, None]]

In [58]:
b[0][1] = 5
b

[[None, 5, None], [None, 5, None]]

In [61]:
a=[[None]*3]*2
a

[[None, None, None], [None, None, None]]

In [62]:
a[0][2]=5
a

[[None, None, 5], [None, None, 5]]

In [63]:
x = [[None for _ in range(3)] for _ in range(2)]
x

[[None, None, None], [None, None, None]]

In [64]:
x[1][2]=2
x

[[None, None, None], [None, None, 2]]

In [107]:
type([1,2,3])

list

In [112]:
a = [1,4,5]
(a[:-1])

[1, 4]

In [115]:
c = [[2,4,6],[1,3,5]]
c[1][-1]

5