# CATBOOST

In [None]:
from catboost import CatBoostRegressor

model = CatBoostRegressor(iterations=2000,
                          learning_rate=3e-4,
                          depth=10)
# Fit model
model.fit(X_train, y_train)
# Get predictions
preds = model.predict(X_test)

In [None]:
# y_pred=reg.predict(X_test)
print("R2 Score", (r2_score(y_test, preds)))
mse = mean_squared_error(y_test, preds)
print("MSE", (np.sqrt(mse)))

# PyTorch

In [None]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
# import imageio


torch.manual_seed(1)    # reproducible


class myDataset(Data.Dataset):
    def __init__(self,X_train,y_train):
        self.X_train = X_train
        self.y_train = y_train
        
    def __getitem__(self, idx):
        return torch.tensor(self.X_train[idx], dtype = torch.float), torch.tensor(self.y_train[idx], dtype = torch.float)
    
    def __len__(self):
        return self.X_train.shape[0]

my_data = myDataset(X_train, y_train)
my_data_test = myDataset(X_test, y_test)
loader = Data.DataLoader(
    dataset=my_data, 
    batch_size=128,
    shuffle=False,)

loader_test = Data.DataLoader(
    dataset=my_data_test, 
    batch_size=128, 
    shuffle=False,)
# this is one way to define a network
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.hidden1 = torch.nn.Linear(n_hidden, n_hidden)
        self.hidden2 = torch.nn.Linear(n_hidden, n_hidden)
        self.hidden3 = torch.nn.Linear(n_hidden, n_hidden)
        self.hidden4 = torch.nn.Linear(n_hidden, n_hidden)
        self.hidden5 = torch.nn.Linear(n_hidden, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        x = F.relu(self.hidden4(x))
        x = F.relu(self.hidden5(x))
        x = self.predict(x)             # linear output
        return x

net = Net(n_feature=181, n_hidden=20, n_output=1)     # define the network
# print(net)  # net architecture
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

def train(net, loader, optimizer, loss_func):
    # train the network
    losses = []
    for t in loader:
        x, y = t
        prediction = net(x)     # input x and predict based on x
        loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)
        optimizer.zero_grad()   # clear gradients for next train
        loss.backward()         # backpropagation, compute gradients
        optimizer.step()        # apply gradients
        losses.append(loss.item())
    print(sum(losses)/len(losses))
    
def test(net,loader):
    pred = np.zeros((0,1))
    y_true = np.zeros((0,1))
    for i in loader:
        x,y = i
        opt = net(x)
#         pred.append(opt.detach().numpy())
#         y_true.append(y.detach().numpy())
        pred = np.append(opt.detach().numpy(),pred)
        y_true =np.append(y.detach().numpy(),y_true)
    return pred,y_true

    
EPOCHS = 200
    
for i in range(EPOCHS):
    train(net, loader, optimizer, loss_func)
a,b = test(net, loader_test)
    
#     # plot and show learning process
#     plt.cla()
#     ax.set_title('Regression Analysis', fontsize=35)
#     ax.set_xlabel('Independent variable', fontsize=24)
#     ax.set_ylabel('Dependent variable', fontsize=24)
#     ax.set_xlim(-1.05, 1.5)
#     ax.set_ylim(-0.25, 1.25)
#     ax.scatter(x.data.numpy(), y.data.numpy(), color = "orange")
#     ax.plot(x.data.numpy(), prediction.data.numpy(), 'g-', lw=3)
#     ax.text(1.0, 0.1, 'Step = %d' % t, fontdict={'size': 24, 'color':  'red'})
#     ax.text(1.0, 0, 'Loss = %.4f' % loss.data.numpy(),
#             fontdict={'size': 24, 'color':  'red'})

#     # Used to return the plot as an image array 
#     # (https://ndres.me/post/matplotlib-animated-gifs-easily/)
#     fig.canvas.draw()       # draw the canvas, cache the renderer
#     image = np.frombuffer(fig.canvas.tostring_rgb(), dtype='uint8')
#     image  = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))

#     my_images.append(image)

# Tensorflow

In [None]:
%matplotlib inline
def load_dataset(flatten=False):
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    print(y_train)
    # normalize x
    X_train = X_train.astype(float) / 255.
    X_test = X_test.astype(float) / 255.
    # we reserve the last 10000 training examples for validation
    X_train, X_val = X_train[:-10000], X_train[-10000:]
    y_train, y_val = y_train[:-10000], y_train[-10000:]
    if flatten:
        X_train = X_train.reshape([X_train.shape[0], -1])
        X_val = X_val.reshape([X_val.shape[0], -1])
        X_test = X_test.reshape([X_test.shape[0], -1])
    return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
## Printing dimensions
print(X_train.shape, y_train.shape)
## Visualizing the first digit
plt.imshow(X_train[0], cmap="Greys");

In [None]:
## Changing dimension of input images from N*28*28 to N*784
X_train = X_train.reshape((X_train.shape[0],X_train.shape[1]*X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0],X_test.shape[1]*X_test.shape[2]))
print(X_train)
print(X_test)
print('Train dimension:');print(X_train.shape)
print('Test dimension:');print(X_test.shape)
## Changing labels to one-hot encoded vector
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.transform(y_test)
print('Train labels dimension:');print(y_train.shape)
print('Test labels dimension:');print(y_test.shape)
y_train.shape
# y_test

In [None]:
## Importing required libraries
s = tf.InteractiveSession()

In [None]:
## Defining various initialization parameters for 784-512-256-10 MLP model
# num_classes = y_train.shape[1]
num_features = X_train.shape[1]
num_output = y_train.shape[1]
num_layers_0 = 10
num_layers_1 = 10
starter_learning_rate = 0.001
regularizer_rate = 0.1

In [None]:
# Placeholders for the input data
tf.compat.v1.disable_eager_execution()
input_X = tf.placeholder('float32',shape =(None,num_features),name="input_X")
input_y = tf.placeholder('float32',shape = (None,1),name='input_Y')
## for dropout layer
keep_prob = tf.placeholder(tf.float32)

In [None]:
## Weights initialized by random normal function with std_dev = 1/sqrt(number of input features)
weights_0 = tf.Variable(tf.random_normal([num_features,num_layers_0], stddev=(1/tf.sqrt(float(num_features)))))
bias_0 = tf.Variable(tf.random_normal([num_layers_0]))
weights_1 = tf.Variable(tf.random_normal([num_layers_0,num_layers_1], stddev=(1/tf.sqrt(float(num_layers_0)))))
bias_1 = tf.Variable(tf.random_normal([num_layers_1]))
weights_2 = tf.Variable(tf.random_normal([num_layers_1,num_output], stddev=(1/tf.sqrt(float(num_layers_1)))))
bias_2 = tf.Variable(tf.random_normal([num_output]))

In [None]:
## Initializing weigths and biases
hidden_output_0 = tf.nn.relu(tf.matmul(input_X,weights_0)+bias_0)
hidden_output_0_0 = tf.nn.dropout(hidden_output_0, keep_prob)
hidden_output_1 = tf.nn.relu(tf.matmul(hidden_output_0_0,weights_1)+bias_1)
hidden_output_1_1 = tf.nn.dropout(hidden_output_1, keep_prob)
predicted_y = tf.matmul(hidden_output_1_1,weights_2) + bias_2

In [None]:
## Defining the loss function
# loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predicted_y,labels=input_y)) \
#         + regularizer_rate*(tf.reduce_sum(tf.square(bias_0)) + tf.reduce_sum(tf.square(bias_1)))

loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(input_y, predicted_y))))

In [None]:
## Variable learning rate
learning_rate = tf.train.exponential_decay(starter_learning_rate, 0, 5, 0.85, staircase=True)
## Adam optimzer for finding the right weight
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss,var_list=[weights_0,weights_1,weights_2,
                                                                         bias_0,bias_1,bias_2])

In [None]:
## Metrics definition
correct_prediction = tf.equal(tf.argmax(y_train,1), tf.argmax(predicted_y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
## Training parameters
batch_size = 128
epochs=14
dropout_prob = 0.6
training_accuracy = []
training_loss = []
testing_accuracy = []
s.run(tf.global_variables_initializer())
for epoch in range(epochs):    
    arr = np.arange(X_train.shape[0])
    np.random.shuffle(arr)
    for index in range(0,X_train.shape[0],batch_size):
        s.run(optimizer, {input_X: X_train[arr[index:index+batch_size]],
                          input_y: y_train[arr[index:index+batch_size]],
                        keep_prob:dropout_prob})
    training_accuracy.append(s.run(accuracy, feed_dict= {input_X:X_train, 
                                                         input_y: y_train,keep_prob:1}))
    training_loss.append(s.run(loss, {input_X: X_train, 
                                      input_y: y_train,keep_prob:1}))
    
    ## Evaluation of model
    testing_accuracy.append(accuracy_score(y_test.argmax(1), 
                            s.run(predicted_y, {input_X: X_test,keep_prob:1}).argmax(1)))
    print("Epoch:{0}, Train loss: {1:.2f} Train acc: {2:.3f}, Test acc:{3:.3f}".format(epoch,
                                                                    training_loss[epoch],
                                                                    training_accuracy[epoch],
                                                                   testing_accuracy[epoch]))

In [None]:
## Plotting chart of training and testing accuracy as a function of iterations
iterations = list(range(epochs))
plt.plot(iterations, training_accuracy, label='Train')
plt.plot(iterations, testing_accuracy, label='Test')
plt.ylabel('Accuracy')
plt.xlabel('iterations')
plt.show()
print("Train Accuracy: {0:.2f}".format(training_accuracy[-1]))
print("Test Accuracy:{0:.2f}".format(testing_accuracy[-1]))

In [8]:
import pandas as pd
import numpy as np
import dask.dataframe as dd

# Import the electricity price and demand data
df = dd.read_csv('data/Electricity/MRIMMeter/*')
# Convert the dask dataset to a pandas dataframe
df = df.compute().reset_index().drop('index', axis = 1)

In [None]:
# def area(row):
#     if (row['ServiceProvider'] == 'CITIPOWER'):
#         val = 'Inner Melbourne'
#     elif (row['ServiceProvider'] == 'POWERCOR'):
#         val = 'West Victoria'
#     elif (row['ServiceProvider'] == 'TXU'):
#         val = 'East & Northeast Victoria'
#     elif (row['ServiceProvider'] == 'UNITED'):
#         val = 'Southeast Victoria'
        
#     else:
#         val = 'Northwest Victoria'
        
#     return val

# merged['ProfileArea'] = merged.apply(area, axis=1)

In [9]:
# Uncomment the below code to export the final dataframe as csv
df.to_csv('data/mrim_data.csv', index=False)

In [10]:
df

Unnamed: 0,PROFILEAREA,SETTD,DCTC,DAILYT,VAL01,VAL02,VAL03,VAL04,VAL05,VAL06,...,VAL39,VAL40,VAL41,VAL42,VAL43,VAL44,VAL45,VAL46,VAL47,VAL48
0,CITIPOWER,01/01/2016,MRIM,4683409.892,112853.074,104363.755,97343.792,91889.809,87655.710,84326.314,...,98183.323,98045.639,98624.974,96991.198,93626.362,89399.151,89023.848,90937.854,93384.260,88587.762
1,CITIPOWER,02/01/2016,MRIM,4144130.925,77929.991,71042.793,66741.556,64142.109,62648.539,61436.392,...,97929.623,96730.314,97220.276,95825.309,92681.934,88599.713,88396.739,90509.929,93627.695,88861.021
2,CITIPOWER,03/01/2016,MRIM,4041606.415,77963.111,70789.426,65956.136,63012.869,61428.158,59990.272,...,97361.935,97577.840,98754.239,97170.341,92879.757,87043.133,85519.369,86854.749,89913.379,86008.214
3,CITIPOWER,04/01/2016,MRIM,4616195.881,75279.507,68426.319,63795.872,61056.812,59522.298,58509.989,...,102678.321,101282.623,101068.884,98636.803,93252.262,86986.361,84871.066,86469.874,88806.064,84316.744
4,CITIPOWER,05/01/2016,MRIM,4741002.102,73777.193,67108.140,62688.207,60262.346,58984.282,58108.533,...,107847.093,106699.428,107972.371,106113.647,100409.381,93305.393,90399.650,90884.729,93075.553,88424.217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7755,VICAGL,27/03/2020,MRIM,3664284.471,67989.754,62345.774,58427.129,55754.684,53828.163,52736.694,...,112983.899,107637.483,102132.287,96617.361,90491.404,84066.124,77659.121,71903.767,80753.975,75458.512
7756,VICAGL,28/03/2020,MRIM,3550058.705,68166.934,62361.536,58282.243,55563.968,53556.083,51940.222,...,117317.812,111252.409,105175.338,99322.338,93706.852,87625.480,81024.166,75160.148,83452.236,78270.962
7757,VICAGL,29/03/2020,MRIM,3768120.264,70987.767,64965.571,60702.308,57468.594,55047.343,53473.954,...,109857.531,105285.624,100358.961,94778.876,88539.298,81712.953,74875.006,68942.722,78092.730,73797.197
7758,VICAGL,30/03/2020,MRIM,3630628.975,67004.949,61331.331,57437.539,54699.460,52482.356,51285.551,...,109994.933,104739.457,99656.454,94486.382,87854.683,81217.427,74501.693,68247.746,77265.842,72810.983
