In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import cPickle as pickle
import random

# Data Parsing


In [2]:
file = '/Users/JH/Desktop/NTU/NTU_Research/data/NEM_Load_Forecasting_Database.xls'

concatenate_number = 2*5*5

QLD = 'Actual_Data_QLD'
NSW = 'Actual_Data_NSW'
VIC = 'Actual_Data_VIC'
SA = 'Actual_Data_SA'
TAS = 'Actual_Data_TAS'

In [3]:
# Set Classes as Data Container

class Data:
    def __init__(self, train, test):
        self.train = train
        self.test = test


class DataSet(object):
    class Temperature(Data):
        def __init__(self, train, test):
            super(self.__class__, self).__init__(train, test)

    class PowerLoad(Data):
        def __init__(self, train, test):
            super(self.__class__, self).__init__(train, test)

In [4]:
# Set Functions

def normalize(array):
    return (array - min(array)) / (max(array) - min(array))


def data_splitter(data, ratio=0.8):
    """
    split data into training data & testing data
    :param data:

    :param ratio:
        training data ratio
    :return:
        train_data, test_data
    """
    splitter = int(len(data) * ratio)
    return data[:splitter], data[splitter + 1:]


def extract_feature(df, dataset):
    """
    assign data to designed data container
    :param df:
        pandas data-frame read from excel data format
    :param dataset:
        defined data container
    :return:
        N/A
    """
    temperature_max_scanner = []
    temperature_mean_scanner = []
    temperature_collector = []

    powerload_scanner = []
    powerload_collector = []

    for row in xrange(0, len(df)):
        if not math.isnan(df['Max Tem.'][row]) and not math.isnan(df['Mean Tem.'][row]):
            temperature_max_scanner.append(df['Max Tem.'][row])
            temperature_mean_scanner.append(df['Mean Tem.'][row])

        if len(temperature_max_scanner) is concatenate_number:
            temperature_collector.append(normalize(np.array(temperature_max_scanner + temperature_mean_scanner)))
            temperature_max_scanner.pop(0)
            temperature_mean_scanner.pop(0)

            for col in xrange(5, 53):
                powerload_scanner.append(df.loc[row][col])
            powerload_collector.append(normalize(np.array(powerload_scanner)))
            del (powerload_scanner[:])

    dataset.Temperature.train, dataset.Temperature.test = data_splitter(np.array(temperature_collector))
    dataset.PowerLoad.train, dataset.PowerLoad.test = data_splitter(np.array(powerload_collector))

In [5]:
df = pd.read_excel(file, sheetname=QLD)
dataset = DataSet()
extract_feature(df, dataset)

### checking parsed data

In [6]:
# Temperature Training Data
print 'data: ',
print dataset.Temperature.train  

print 'shape: ', 
print dataset.Temperature.train.shape # (concatenate_number*2, )

print 'type: ',
print type(dataset.Temperature.train) # numpy.ndarray

print 'length: ',
print len(dataset.Temperature.train)

data:  [[ 0.77848101  0.63291139  0.60126582 ...,  0.24050633  0.09810127
   0.2278481 ]
 [ 0.63291139  0.60126582  0.44936709 ...,  0.09810127  0.2278481
   0.28481013]
 [ 0.60126582  0.44936709  0.24050633 ...,  0.2278481   0.28481013
   0.23734177]
 ..., 
 [ 1.          0.84466019  0.81877023 ...,  0.09708738  0.26213592
   0.3236246 ]
 [ 1.          0.96934866  0.92337165 ...,  0.31034483  0.38314176
   0.25287356]
 [ 0.97683398  0.93050193  1.         ...,  0.38610039  0.25482625
   0.41698842]]
shape:  (302, 100)
type:  <type 'numpy.ndarray'>
length:  302


In [7]:
# Temperature Test Data
print 'data: ',
print dataset.Temperature.test   

print 'shape: ', 
print dataset.Temperature.test.shape # (concatenate_number*2, )

print 'type: ',
print type(dataset.Temperature.test) # numpy.ndarray

print 'length: ',
print len(dataset.Temperature.test)

data:  [[ 1.          0.86100386  0.87644788 ...,  0.41698842  0.35907336
   0.33590734]
 [ 0.8745098   0.89019608  0.89019608 ...,  0.36470588  0.34117647  0.4       ]
 [ 0.89019608  0.89019608  0.89803922 ...,  0.34117647  0.4         0.4627451 ]
 ..., 
 [ 0.33656174  0.37530266  0.52058111 ...,  0.31719128  0.43825666
   0.42857143]
 [ 0.37530266  0.52058111  0.2251816  ...,  0.43825666  0.42857143
   0.57142857]
 [ 0.52058111  0.2251816   0.23970944 ...,  0.42857143  0.57142857
   0.11864407]]
shape:  (75, 100)
type:  <type 'numpy.ndarray'>
length:  75


In [8]:
# PowerLoad Training Data
print 'data: ',
print dataset.PowerLoad.train

print 'shape: ',
print dataset.PowerLoad.train.shape # (48, )

print 'type: ',
print type(dataset.PowerLoad.train) # numpy.ndarray

print 'length: ',
print len(dataset.PowerLoad.train)

data:  [[ 0.38226771  0.29778805  0.25486959 ...,  0.65397454  0.53706761
   0.46696746]
 [ 0.41441512  0.32697913  0.25947223 ...,  0.64816857  0.55084679
   0.49239858]
 [ 0.42177199  0.32388648  0.26732594 ...,  0.62799485  0.54084258
   0.48468403]
 ..., 
 [ 0.51149971  0.42691504  0.3495058  ...,  0.52309447  0.45295571
   0.41703098]
 [ 0.27789055  0.2277305   0.19529237 ...,  0.58314466  0.52474822
   0.46428293]
 [ 0.34871836  0.26742205  0.21171903 ...,  0.6062755   0.52085892
   0.45830197]]
shape:  (302, 48)
type:  <type 'numpy.ndarray'>
length:  302


In [9]:
# PowerLoad Testing Data
print 'data: ',
print dataset.PowerLoad.test

print 'shape: ',
print dataset.PowerLoad.test.shape # (48, )

print 'type: ',
print type(dataset.PowerLoad.test) # numpy.ndarray

print 'length: ',
print len(dataset.PowerLoad.test)

data:  [[ 0.35862207  0.27923092  0.23440817 ...,  0.55433607  0.48503905
   0.42455438]
 [ 0.39805698  0.28611439  0.22244103 ...,  0.60618791  0.5605444
   0.52190276]
 [ 0.54951991  0.43613119  0.31637717 ...,  0.56521739  0.54261517
   0.52702557]
 ..., 
 [ 0.54468085  0.43734602  0.30795073 ...,  0.58566629  0.57206047
   0.54496081]
 [ 0.43033554  0.3437927   0.25947669 ...,  0.53697049  0.47613746
   0.42876664]
 [ 0.31184273  0.24531056  0.18465897 ...,  0.61703224  0.54225866
   0.48886123]]
shape:  (75, 48)
type:  <type 'numpy.ndarray'>
length:  75


# DeepLearning Core

## Training

In [10]:
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
display_step = 10
concatenate_number = 13

# Network Parameters
n_input = 2 * concatenate_number  # Temperature data as input (input matrix shape: 48*concatenate_number)
n_output = 48  # Predicted Power data as output  (48-points)
dropout = 0.75  # Dropout, probability to keep units

In [11]:
# tf Graph input
temperature_map = tf.placeholder(tf.float32, [None, n_input], name='temperature_input')
power_vector_ = tf.placeholder(tf.float32, [None, n_output], name='powerload_output')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')  # dropout (keep probability)

In [12]:
tensor_map = {temperature_map: dataset.Temperature.train, 
              power_vector_: dataset.PowerLoad.train, 
              keep_prob: dropout}

In [13]:
# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32]), name='wc1'),
    
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64]), name='wc2'),
    
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7 * 7 * 64, 1024]), name='wd1'),
    
    # 1024 inputs, 48 outputs
    'out': tf.Variable(tf.random_normal([1024, n_output]), name='wo1')
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32]), name='bc1'),
    'bc2': tf.Variable(tf.random_normal([64]), name='bc2'),
    'bd1': tf.Variable(tf.random_normal([1024]), name='bd1'),
    'out': tf.Variable(tf.random_normal([n_output]), name='bo1')
}

In [14]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [18]:
# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    with tf.name_scope('array_reshape') as array_reshape:
        x = tf.reshape(x, shape=[-1, 2*5, 2*5, 1])

    # Convolution Layer
    with tf.name_scope('conv_layer1') as conv_layer1:
        conv1 = conv2d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    with tf.name_scope('conv_layer2') as conv_layer2:
        conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    with tf.name_scope('fc_layer') as fc_layer:
        fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
        fc1 = tf.nn.relu(fc1)
        # Apply Dropout
        fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    with tf.name_scope('output_layer') as output_layer:
        out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    
    return out

In [21]:
# Construct model
pred = conv_net(temperature_map, weights, biases, keep_prob)

# Define loss and optimizer
with tf.name_scope('cost'):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=power_vector_))
    tf.summary.scalar('cost/', cost)
    
with tf.name_scope('optimization'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

In [None]:
with tf.Session as sess:
    sess.run(init)
    
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./summary', sess.graph)
    
    step = 1
    
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = next_batch(temperature_map, power_vector, batch_size)
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={temperature_map: batch_x, power_vector: batch_y,
                                       keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={temperature_map: batch_x,
                                                              power_vector: batch_y,
                                                              keep_prob: 1.})
            print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
        step += 1
        
    print("Optimization Finished!")