In [1]:
import numpy as np
import pandas as pd
import math
import cPickle as pickle

In [2]:
file = '/Users/JH/Desktop/NTU/NTU_Research/data/NEM_Load_Forecasting_Database.xls'

QLD = 'Actual_Data_QLD'
NSW = 'Actual_Data_NSW'
VIC = 'Actual_Data_VIC'
SA = 'Actual_Data_SA'
TAS = 'Actual_Data_TAS'

In [3]:
# Set Classes as Data Container

class Structure:
    def __init__(self):
        self._feature = []
        self._target = []

    @property
    def feature(self):
        return self._feature

    @property
    def target(self):
        return self._target

    @feature.setter
    def feature(self, value):
        self._feature = value

    @target.setter
    def target(self, value):
        self._target = value


class Data:
    def __init__(self):
        pass

    class Train(Structure):
        def __init__(self):
            pass

    class Test(Structure):
        def __init__(self):
            pass


class DataSet:
    def __init__(self):
        pass

    class Raw:
        def __init__(self):
            pass

        class Train(Structure):
            def __init__(self):
                pass

        class Test(Structure):
            def __init__(self):
                pass

    class PreProcessed:
        def __init__(self):
            pass

        class Train(Structure):
            def __init__(self):
                pass

        class Test(Structure):
            def __init__(self):
                pass

In [4]:
# Set Functions

def normalization(data):
    return (data - min(data)) / (max(data) - min(data))


def data_splitter(data, ratio=0.8):
    """
    split data into training data & testing data
    :param data:

    :param ratio:
        training data ratio
    :return:
        train_data, test_data
    """
    splitter = int(len(data) * ratio)
    return np.array(data[:splitter]), np.array(data[splitter + 1:])


def preprocessing_filter(data, nominator, denominator):
    return normalization(data) ** (nominator / denominator)


def preprocessing(data_present, temperature_max, temperature_mean, denominator):
    data_present = list(data_present) + list(
        preprocessing_filter(np.array(data_present), temperature_max, denominator)) + list(
        preprocessing_filter(np.array(data_present), temperature_mean, denominator))

    return np.array(data_present)


def data_alloter(df):
    dataset = DataSet()
    denominator = df['Mean Tem.'].min()

    raw_feature = []
    raw_target = []
    preprocessed_feature = []
    preprocessed_target = []

    for row in range(0, len(df)):
        # if both MaxTemp and MeanTemp are not nan
        if not math.isnan(df['Max Tem.'][row]) and not math.isnan(df['Mean Tem.'][row]):
            if not math.isnan(df['Max Tem.'][row + 1]) and not math.isnan(df['Mean Tem.'][row + 1]):
                powerload_present = normalization(np.array(df.loc[row][5:53]))
                powerload_future = normalization(np.array(df.loc[row + 1][5:53]))

                raw_feature.append(np.array(
                    list(powerload_present) + list([df['Max Tem.'][row + 1]]) + list([df['Mean Tem.'][row + 1]])))
                raw_target.append(np.array(powerload_future))

                preprocessed_powerload_present = preprocessing(powerload_present,
                                                               df['Max Tem.'][row + 1],
                                                               df['Mean Tem.'][row + 1],
                                                               denominator)

                preprocessed_feature.append(preprocessed_powerload_present)
                preprocessed_target.append(np.array(powerload_future))

    dataset.Raw.Train.feature, dataset.Raw.Test.feature = data_splitter(raw_feature)
    dataset.Raw.Train.target, dataset.Raw.Test.target = data_splitter(raw_target)

    dataset.PreProcessed.Train.feature, dataset.PreProcessed.Test.feature = data_splitter(preprocessed_feature)
    dataset.PreProcessed.Train.target, dataset.PreProcessed.Test.target = data_splitter(preprocessed_target)

    return dataset

In [5]:
df = pd.read_excel(file, sheetname=QLD)
dataset = data_alloter(df)

In [6]:
import tensorflow as tf

In [7]:
# Parameters
batch_size = 100
num_steps = 4000
data_showing_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 144 # MNIST data input (img shape: 28*28)
n_classes = 48 # MNIST total classes (0-9 digits)

# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
learning_rate_decayed = tf.placeholder(tf.float32, shape=[])

In [8]:
# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer


# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_decayed).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

In [10]:
# Launch the graph
sess = tf.InteractiveSession()
sess.run(init)

# Training cycle
for step in range(num_steps):
    
    avg_cost = 0.
    total_batch = int(dataset.PreProcessed.Train.feature.shape[0]/batch_size)
    
    if 30000 < step < 80000:
        learning_rate = 0.01 / 10
    elif 80000 <= step < 200000:
        learning_rate = 0.01 / 100
    else:
        learning_rate = 0.01

    # set a offset
    offset = (step * batch_size) % (dataset.PreProcessed.Train.target.shape[0] - batch_size)

    # Generate a minibatch.
    batch_x = dataset.PreProcessed.Train.feature[offset:(offset + batch_size), :]
    batch_y = dataset.PreProcessed.Train.target[offset:(offset + batch_size), :]
    
    feed_dict = {x: batch_x, 
                 y: batch_y,
                 learning_rate_decayed: learning_rate}
    
    _, c = sess.run([optimizer, cost], feed_dict=feed_dict)
    
    # Compute average loss
    avg_cost += c / total_batch
    
    if (step % data_showing_step == 0):
        print "step:", '%04d' % (step+batch_size), "cost=", "{:.9f}".format(avg_cost)

print "Optimization Finished!"

step: 0100 cost= 20987.307291667
step: 0200 cost= 1807950.500000000
step: 0300 cost= 5200889.333333333
step: 0400 cost= 8340340.666666667
step: 0500 cost= 10821931.333333334
step: 0600 cost= 21421960.000000000
step: 0700 cost= 32708048.000000000
step: 0800 cost= 37048056.000000000
step: 0900 cost= 59911461.333333336
step: 1000 cost= 100745301.333333328
step: 1100 cost= 90215946.666666672
step: 1200 cost= 134468256.000000000
step: 1300 cost= 192034645.333333343
step: 1400 cost= 147097760.000000000
step: 1500 cost= 215002944.000000000
step: 1600 cost= 272006080.000000000
step: 1700 cost= 238145258.666666657
step: 1800 cost= 297213781.333333313
step: 1900 cost= 396737962.666666687
step: 2000 cost= 302026880.000000000
step: 2100 cost= 415295146.666666687
step: 2200 cost= 498045952.000000000
step: 2300 cost= 387208704.000000000
step: 2400 cost= 493296298.666666687
step: 2500 cost= 593644928.000000000
step: 2600 cost= 467237930.666666687
step: 2700 cost= 522500906.666666687
step: 2800 cost= 