In [117]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import random
import tensorflow.keras.utils as utils
import pydot
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from sklearn.ensemble import RandomForestRegressor
class Mics_Model:
    def __init__(self, dataset_dir, use_encoder=True, sampling_method="Vanilla", global_model="NN", group_number = 3):
        self.dataset_dir = dataset_dir
        self.use_encoder = use_encoder
        self.sampling_method = sampling_method
        self.global_model = global_model
        self.group_number = group_number
        self.raw_data = None
            
    #ASSUMPTION: label column is in the end and all other columns are features. There is no index column in the data.
    def get_raw_data(self):
        raw_data = pd.read_csv(self.dataset_dir, index_col=0)
        raw_data = raw_data.fillna(raw_data.mean())
        self.raw_data = raw_data
        
    #This method assigns the feature number = column number - 1 (exclude label column). After that, it returns a list of
    #input feature numbers according to group count. Ex: for 28 cols, 27 features, 4 group_num: returns [7,7,6,7] 
    #Output of this function can be fed to get_model methods as inp_sizes input.
    def get_input_group_lenthgs(self):
        count = self.group_number
        input_sizes = [None]*count
        feature_num = len(self.raw_data.columns) - 1
        for i in range(count):
            group_size = round(feature_num/(count-i))
            input_sizes[i] = group_size
            feature_num = feature_num - group_size
        return input_sizes
    
    #This method returns grouped column numbers
    #[[1,4,5],[2,3,6]]
    def get_grouped_feature_cols(self):
        grouped_feature_cols = [None]*self.group_number
        feature_num = len(self.raw_data.columns) - 1
        inp_sizes = self.get_input_group_lenthgs()
        total_nums = [i for i in range(feature_num)]
        for j in range(len(inp_sizes)):
            size = inp_sizes[j]
            temp_list = random.sample(total_nums, size)
            grouped_feature_cols[j] = temp_list
            for k in temp_list:
                total_nums.remove(k)
        return grouped_feature_cols
    
    #groups is a list of lists [[1,4,5], [2,3,6]] which is output of get_grouped_feature_cols method
    #returns: [[train_x1, train_x2..., train_xn, train_y],
    #          [test_x1, test_x2..., test_xn, test_y]]
    def get_features_and_labels(self, groups):
        row_num = len(self.raw_data.index)
        
        trainx_df = self.raw_data.iloc[:int(0.8*row_num), 1:]
        trainy_df = self.raw_data.iloc[:int(0.8*row_num), 0]
        testx_df = self.raw_data.iloc[int(0.8*row_num):, 1:]
        testy_df = self.raw_data.iloc[int(0.8*row_num):, 0]        
        
        scaler = StandardScaler()
        trainx_scaled = pd.DataFrame(scaler.fit_transform(trainx_df), columns = trainx_df.columns, index = trainx_df.index)
        textx_scaled = pd.DataFrame(scaler.transform(testx_df), columns = testx_df.columns, index = testx_df.index)
        
        features_and_labels = [[None for _ in range(self.group_number + 1)] for _ in range(2)]
        
        for index, group in enumerate(groups):
            train_temp = trainx_scaled.iloc[:,group]
            features_and_labels[0][index] = train_temp.values
            test_temp = textx_scaled.iloc[:,group]
            features_and_labels[1][index] = test_temp.values            
        features_and_labels[0][self.group_number] = trainy_df.values
        features_and_labels[1][self.group_number] = testy_df.values   
        return features_and_labels
    
    #return [[train_x1, train_x2..., train_xn, train_y],
    #        [test_x1, test_x2..., test_xn, test_y]]
    
    def get_vanilla_encoder_model(self, inp_size):
        inputs = keras.layers.Input(shape=(inp_size))
        h1 = keras.layers.Dense(10, activation="relu")(inputs)
        h1 = keras.layers.Dense(10, activation="relu")(inputs)        
        outputs = keras.layers.Dense(inp_size, activation="relu")(h1)
        return keras.Model(inputs,outputs)
    
    class Sampling(layers.Layer):
        def call(self, inputs):
            z_mean, z_log_var = inputs
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon #multiplies with std
    
    def get_variatonal_encoder_model(self, inp_size):
        inputs = layers.Input(shape=(inp_size))
        h1 = layers.Dense(10, activation="relu")(inputs)
        z_mean = layers.Dense(inp_size, name="z_mean")(h1)
        z_log_var = layers.Dense(inp_size, name="z_log_var")(h1)
        outputs = Sampling()([z_mean, z_log_var])
        return keras.Model(inputs,outputs)
    #New sampling methods can be added here 
    
    def get_nn_model(self, inp_sizes, drop_out=0.25, hidden_num = 4, hidden_size=32):
        inp_group_count = len(inp_sizes)
        inputs = [None]*inp_group_count
        for i in range(inp_group_count):
            inputs[i] = keras.layers.Input(shape=(inp_sizes[i]), name="input_"+str(i))
        if self.use_encoder == True:
            encoders = [None]*inp_group_count
            if self.sampling_method == "Vanilla":
                for j in range(inp_group_count):
                    encoders[j] = self.get_vanilla_encoder_model(inp_sizes[j])
            elif self.sampling_method == "Variational":
                for j in range(inp_group_count):
                    encoders[j] = self.get_variatonal_encoder_model(inp_sizes[j])
            #This place can be extended if new sampling methods are added.
            global_inputs = [None]*inp_group_count
            for k in range(inp_group_count):
                global_inputs[k] = encoders[k](inputs[k])
                global_input = keras.layers.concatenate(global_inputs)
        else:
            global_input = keras.layers.concatenate(inputs)
            
        h = keras.layers.Dense(hidden_size, activation="relu", kernel_regularizer=regularizers.l1_l2(l1=1e-4, l2=1e-3))(global_inp)
        h = keras.layers.Dropout(drop_out)(h)
        for hidden in range(hidden_num):
            h = keras.layers.Dense(hidden_size, activation="relu", kernel_regularizer=regularizers.l1_l2(l1=1e-4, l2=1e-3))(h)
            h = keras.layers.Dropout(drop_out)(h) 

        outputs = keras.layers.Dense(1, activation="relu")(h)    
        return keras.Model(inputs=[inputs_A, inputs_B, inputs_C], outputs = outputs) 
    
    def default_exp(self):
        inp_sizes = self.get_input_group_lenthgs()
        groups = self.get_grouped_feature_cols()
        features_and_labels = self.get_features_and_labels(groups)
        MICS_model = self.get_nn_model(inp_sizes=inp_sizes)
        callback = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50), 
                keras.callbacks.ReduceLROnPlateau("val_loss", factor = 0.8, patience=30,
                                                 verbose = 2, mode = "auto", 
                                                  min_lr = 1e-6)]
        MICS_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss=keras.losses.MeanSquaredError())
        history = MICS_model.fit(x = features_and_labels[0][:-1], y = features_and_labels[0][-1].values,  
                                 validation_data = (features_and_labels[1][:-1], features_and_labels[1][-1]),
                                 epochs=300, batch_size = 300, callbacks=callback)
        training_val_loss = history.history["val_loss"]
        best_row_index = np.argmin(training_val_loss)
        best_val_loss = training_val_loss[best_row_index]
                


In [101]:
import random
def get_grouped_feature_cols(group_number, col_num, inp_sizes):
    grouped_feature_cols = [None]*group_number
    feature_num = col_num - 1
    total_nums = [i for i in range(1,feature_num+1)]
    for j in range(len(inp_sizes)):
        size = inp_sizes[j]
        temp_list = random.sample(total_nums, size)
        grouped_feature_cols[j] = temp_list
        for k in temp_list:
            total_nums.remove(k)
    return grouped_feature_cols  
res = get_grouped_feature_cols(4, 24, [6,6,5,6])
res

[[7, 18, 16, 11, 3, 4],
 [2, 20, 10, 14, 13, 12],
 [5, 6, 1, 15, 22],
 [21, 23, 9, 8, 19, 17]]

In [119]:
dataset_dir = "./Datasets/energydata_use.csv"
deneyelim = Mics_Model(dataset_dir)
deneyelim.get_raw_data()
deneyelim.default_exp()


NameError: name 'selg' is not defined

In [103]:
deneyelim.raw_data

Unnamed: 0_level_0,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,...,T8,RH_8,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-11 17:00:00,60,30,19.890000,47.596667,19.200000,44.790000,19.790000,44.730000,19.000000,45.566667,...,18.2000,48.900000,17.033333,45.5300,6.600000,733.5,92.000000,7.000000,63.000000,5.300000
2016-01-11 17:10:00,60,30,19.890000,46.693333,19.200000,44.722500,19.790000,44.790000,19.000000,45.992500,...,18.2000,48.863333,17.066667,45.5600,6.483333,733.6,92.000000,6.666667,59.166667,5.200000
2016-01-11 17:20:00,50,30,19.890000,46.300000,19.200000,44.626667,19.790000,44.933333,18.926667,45.890000,...,18.2000,48.730000,17.000000,45.5000,6.366667,733.7,92.000000,6.333333,55.333333,5.100000
2016-01-11 17:30:00,50,40,19.890000,46.066667,19.200000,44.590000,19.790000,45.000000,18.890000,45.723333,...,18.1000,48.590000,17.000000,45.4000,6.250000,733.8,92.000000,6.000000,51.500000,5.000000
2016-01-11 17:40:00,60,40,19.890000,46.333333,19.200000,44.530000,19.790000,45.000000,18.890000,45.530000,...,18.1000,48.590000,17.000000,45.4000,6.133333,733.9,92.000000,5.666667,47.666667,4.900000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-05-27 17:20:00,100,0,25.566667,46.560000,25.890000,42.025714,27.200000,41.163333,24.700000,45.590000,...,24.7000,50.074000,23.200000,46.7900,22.733333,755.2,55.666667,3.333333,23.666667,13.333333
2016-05-27 17:30:00,90,0,25.500000,46.500000,25.754000,42.080000,27.133333,41.223333,24.700000,45.590000,...,24.7000,49.790000,23.200000,46.7900,22.600000,755.2,56.000000,3.500000,24.500000,13.300000
2016-05-27 17:40:00,270,10,25.500000,46.596667,25.628571,42.768571,27.050000,41.690000,24.700000,45.730000,...,24.7000,49.660000,23.200000,46.7900,22.466667,755.2,56.333333,3.666667,25.333333,13.266667
2016-05-27 17:50:00,420,10,25.500000,46.990000,25.414000,43.036000,26.890000,41.290000,24.700000,45.790000,...,24.6625,49.518750,23.200000,46.8175,22.333333,755.2,56.666667,3.833333,26.166667,13.233333


In [104]:
groups = deneyelim.get_grouped_feature_cols()

In [105]:
groups

[[12, 7, 15, 21, 9, 22, 17, 8],
 [1, 24, 0, 19, 18, 6, 2, 5],
 [14, 13, 10, 4, 16, 23, 11, 3, 20]]

In [106]:
deneyelim.get_features_and_labels(groups)

[[                         RH_6        T4        T8    RH_out        T5  \
  date                                                                    
  2016-01-11 17:00:00  0.769623 -0.761611 -1.924584  0.786817 -1.385479   
  2016-01-11 17:10:00  0.762633 -0.761611 -1.924584  0.786817 -1.385479   
  2016-01-11 17:20:00  0.729854 -0.810232 -1.924584  0.786817 -1.385479   
  2016-01-11 17:30:00  0.739495 -0.834542 -1.983431  0.786817 -1.385479   
  2016-01-11 17:40:00  0.792640 -0.834542 -1.983431  0.786817 -1.359694   
  ...                       ...       ...       ...       ...       ...   
  2016-04-30 07:30:00 -0.294374 -0.569336 -0.106203  1.096424  0.110056   
  2016-04-30 07:40:00 -0.287143 -0.609117 -0.106203  1.070624  0.058486   
  2016-04-30 07:50:00 -0.281118 -0.569336 -0.159166  1.044823  0.110056   
  2016-04-30 08:00:00 -0.300159 -0.569336 -0.159166  1.019022  0.161626   
  2016-04-30 08:10:00 -0.329323 -0.569336 -0.159166  0.928720  0.257031   
  
                      

In [56]:
a = [None]*3
b = [a,a]


In [57]:
b

[[None, None, None], [None, None, None]]

In [58]:
b[0][1] = 5
b

[[None, 5, None], [None, 5, None]]

In [61]:
a=[[None]*3]*2
a

[[None, None, None], [None, None, None]]

In [62]:
a[0][2]=5
a

[[None, None, 5], [None, None, 5]]

In [63]:
x = [[None for _ in range(3)] for _ in range(2)]
x

[[None, None, None], [None, None, None]]

In [64]:
x[1][2]=2
x

[[None, None, None], [None, None, 2]]

In [107]:
type([1,2,3])

list

In [112]:
a = [1,4,5]
(a[:-1])

[1, 4]

In [115]:
c = [[2,4,6],[1,3,5]]
c[1][-1]

5