In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import cPickle as pickle

# Data Parsing


In [2]:
file = '/Users/JH/Desktop/NTU/NTU_Research/data/NEM_Load_Forecasting_Database.xls'

concatenate_number = 2*4*4

QLD = 'Actual_Data_QLD'
NSW = 'Actual_Data_NSW'
VIC = 'Actual_Data_VIC'
SA = 'Actual_Data_SA'
TAS = 'Actual_Data_TAS'

In [3]:
# Set Classes as Data Container

class Data:
    def __init__(self, train, test):
        self.train = train
        self.test = test


class DataSet(object):
    class Temperature(Data):
        def __init__(self, train, test):
            super(self.__class__, self).__init__(train, test)

    class PowerLoad(Data):
        def __init__(self, train, test):
            super(self.__class__, self).__init__(train, test)

In [4]:
# Set Functions

def normalize(array):
    return (array - min(array)) / (max(array) - min(array))


def data_splitter(data, ratio=0.8):
    """
    split data into training data & testing data
    :param data:

    :param ratio:
        training data ratio
    :return:
        train_data, test_data
    """
    splitter = int(len(data) * ratio)
    return data[:splitter], data[splitter + 1:]


def extract_feature_vector(df, dataset):
    """
    assign data to designed data container
    :param df:
        pandas data-frame read from excel data format
    :param dataset:
        defined data container
    :return:
        N/A
    """
    temperature_max_scanner = []
    temperature_mean_scanner = []
    temperature_collector = []

    powerload_scanner = []
    powerload_collector = []

    for row in xrange(0, len(df)):
        if not math.isnan(df['Max Tem.'][row]) and not math.isnan(df['Mean Tem.'][row]):
            temperature_max_scanner.append(df['Max Tem.'][row])
            temperature_mean_scanner.append(df['Mean Tem.'][row])

        if len(temperature_max_scanner) is concatenate_number:
            temperature_collector.append(normalize(np.array(temperature_max_scanner + temperature_mean_scanner)))
            temperature_max_scanner.pop(0)
            temperature_mean_scanner.pop(0)

            for col in xrange(5, 53):
                powerload_scanner.append(df.loc[row][col])
            powerload_collector.append(normalize(np.array(powerload_scanner)))
            del (powerload_scanner[:])

    dataset.Temperature.train, dataset.Temperature.test = data_splitter(np.array(temperature_collector))
    dataset.PowerLoad.train, dataset.PowerLoad.test = data_splitter(np.array(powerload_collector))

In [5]:
df = pd.read_excel(file, sheetname=QLD)
dataset = DataSet()
extract_feature_vector(df, dataset)

### checking parsed data

In [6]:
# Temperature Training Data
print 'data: ',
print dataset.Temperature.train  
print 'shape: ', 
print dataset.Temperature.train.shape # (concatenate_number*2, )
print 'type: ',
print type(dataset.Temperature.train) # numpy.ndarray
print

print 'data: ',
print dataset.Temperature.train[0]
print 'shape: ', 
print dataset.Temperature.train[0].shape
print 'type: ',
print type(dataset.Temperature.train[0])
print 

print 'length: ',
print len(dataset.Temperature.train)

data:  [[ 0.77848101  0.63291139  0.60126582 ...,  0.26265823  0.27531646
   0.58544304]
 [ 0.63291139  0.60126582  0.44936709 ...,  0.27531646  0.58544304
   0.33544304]
 [ 0.60126582  0.44936709  0.24050633 ...,  0.58544304  0.33544304
   0.26582278]
 ..., 
 [ 0.90960452  0.70621469  0.46892655 ...,  0.29943503  0.23163842
   0.03389831]
 [ 0.79607843  0.63137255  0.85098039 ...,  0.46666667  0.32941176  0.        ]
 [ 0.63137255  0.85098039  0.9372549  ...,  0.32941176  0.          0.11764706]]
shape:  (316, 64)
type:  <type 'numpy.ndarray'>

data:  [ 0.77848101  0.63291139  0.60126582  0.44936709  0.24050633  0.5443038
  0.56962025  0.28481013  0.24050633  0.43037975  0.42405063  0.51898734
  0.62658228  0.81012658  0.75316456  0.37974684  0.5443038   0.79113924
  0.63291139  0.63291139  0.62658228  0.47468354  0.17721519  0.08860759
  0.37974684  0.44303797  0.33544304  0.17721519  0.46835443  0.48734177
  0.53164557  1.          0.4335443   0.40506329  0.34493671  0.25
  0.167721

In [7]:
# Temperature Test Data
print 'data: ',
print dataset.Temperature.test   

print 'shape: ', 
print dataset.Temperature.test.shape # (concatenate_number*2, )

print 'type: ',
print type(dataset.Temperature.test) # numpy.ndarray

print 'length: ',
print len(dataset.Temperature.test)

data:  [[ 0.9372549   0.89803922  1.         ...,  0.11764706  0.31764706
   0.39215686]
 [ 0.89803922  1.          1.         ...,  0.31764706  0.39215686
   0.25882353]
 [ 1.          1.          0.86666667 ...,  0.39215686  0.25882353
   0.42352941]
 ..., 
 [ 0.29100529  0.28042328  0.25396825 ...,  0.25396825  0.38624339
   0.37566138]
 [ 0.28042328  0.25396825  0.31746032 ...,  0.38624339  0.37566138
   0.53174603]
 [ 0.25396825  0.31746032  0.34391534 ...,  0.37566138  0.53174603
   0.03703704]]
shape:  (79, 64)
type:  <type 'numpy.ndarray'>
length:  79


In [8]:
# PowerLoad Testing Data
print 'data: ',
print dataset.PowerLoad.test
print 'shape: ',
print dataset.PowerLoad.test.shape # 
print 'type: ',
print type(dataset.PowerLoad.test) # numpy.ndarray
print

print 'data: ',
print dataset.PowerLoad.test[0]
print 'shape: ',
print dataset.PowerLoad.test[0].shape # 
print 'type: ',
print type(dataset.PowerLoad.test[0])

print 'length: ',
print len(dataset.PowerLoad.test)

data:  [[ 0.51149971  0.42691504  0.3495058  ...,  0.52309447  0.45295571
   0.41703098]
 [ 0.27789055  0.2277305   0.19529237 ...,  0.58314466  0.52474822
   0.46428293]
 [ 0.34871836  0.26742205  0.21171903 ...,  0.6062755   0.52085892
   0.45830197]
 ..., 
 [ 0.54468085  0.43734602  0.30795073 ...,  0.58566629  0.57206047
   0.54496081]
 [ 0.43033554  0.3437927   0.25947669 ...,  0.53697049  0.47613746
   0.42876664]
 [ 0.31184273  0.24531056  0.18465897 ...,  0.61703224  0.54225866
   0.48886123]]
shape:  (79, 48)
type:  <type 'numpy.ndarray'>

data:  [  5.11499715e-01   4.26915035e-01   3.49505797e-01   2.46626117e-01
   1.76962555e-01   1.08201863e-01   6.36285877e-02   2.53754039e-02
   0.00000000e+00   3.80155864e-04   4.22923399e-03   3.52119369e-02
   6.59570424e-02   1.46692644e-01   2.08135335e-01   3.45989356e-01
   4.92349363e-01   5.78074511e-01   6.42796046e-01   6.75489451e-01
   6.54723437e-01   6.46169930e-01   6.04067668e-01   5.65909523e-01
   5.32265729e-01   4.87

# DeepLearning Core

## Training

In [9]:
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
display_step = 10

# Network Parameters
n_input = 2 * concatenate_number  # Temperature data as input (input matrix shape: ???)
n_output = 48  # Predicted Power data as output  (48-points)
dropout = 0.75  # Dropout, probability to keep units

In [10]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input], name='temperature_input')
y = tf.placeholder(tf.float32, [None, n_output], name='powerload_output')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')  # dropout (keep probability)

In [11]:
tensor_map = {x: dataset.Temperature.train, 
              y: dataset.PowerLoad.train, 
              keep_prob: 1}

In [12]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [13]:
# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    with tf.name_scope('array_reshape') as array_reshape:
        x = tf.reshape(x, shape=[-1, 2*4, 2*4, 1])

    # Convolution Layer
    with tf.name_scope('conv_layer1') as conv_layer1:
        conv1 = conv2d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    with tf.name_scope('conv_layer2') as conv_layer2:
        conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    with tf.name_scope('fc_layer') as fc_layer:
        fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
        fc1 = tf.nn.relu(fc1)
        # Apply Dropout
        fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    with tf.name_scope('output_layer') as output_layer:
        out = tf.nn.sigmoid(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
    
    return out

In [14]:
# Store layers weight & bias
weights = {
    # 3x3 conv, 1 input, 32 outputs(number of filter = 32)
    'wc1': tf.Variable(tf.truncated_normal([3, 3, 1, 32]), name='wc1'),
    
    # 3x3 conv, 32 inputs, 64 outputs(number of filter = 64)
    'wc2': tf.Variable(tf.truncated_normal([3, 3, 32, 64]), name='wc2'),
    
    # fully connected, width*height*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.truncated_normal([4 * 4 * 64, 1024]), name='wd1'),
    
    # 1024 inputs, 48 outputs
    'out': tf.Variable(tf.truncated_normal([1024, n_output]), name='wo1')
}

biases = {
    'bc1': tf.Variable(tf.truncated_normal([32]), name='bc1'),
    'bc2': tf.Variable(tf.truncated_normal([64]), name='bc2'),
    'bd1': tf.Variable(tf.truncated_normal([1024]), name='bd1'),
    'out': tf.Variable(tf.truncated_normal([n_output]), name='bo1')
}

In [15]:
# Construct model
pred = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
with tf.name_scope('cost'):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    tf.summary.scalar('cost/', cost)
    
with tf.name_scope('optimization'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Ecaluation model
with tf.name_scope('evaluation'):
    pass

# Initializing the variables
init = tf.global_variables_initializer()

In [16]:
# next batch data selector
def next_batch(dataset, batch_size, step):
    """
    pick data and build next batch for training
    :dataset
    :param batch_size:
    :return:
    """
    start_idx = (step - 1) * batch_size
    end_idx = step * batch_size
    
    index_checker = end_idx - len(dataset)
    
    if index_checker < 0:
        batch_data = dataset[start_idx : end_idx]
    else:
        batch_data = np.concatenate((dataset[start_idx : len(dataset)], dataset[0 : index_checker]), axis=0)
        
    if len(batch_data) == batch_size:
        return batch_data

In [17]:
with tf.Session() as sess:
    # Initializing the variables
    init = tf.global_variables_initializer()
    sess.run(init)
    
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./summary', sess.graph)
    
    step = 1
    
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#         batch_x = next_batch(temperature_map, batch_size, step)
#         batch_y = next_batch(power_vector_, batch_size, step)

        batch_x = dataset.Temperature.train
        batch_y = dataset.PowerLoad.train
        
    
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})
        
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={temperature_map: batch_x, power_vector_: batch_y, keep_prob: 1.})
            print loss
            #print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
            #     "{:.6f}".format(loss) + ", Training Accuracy= " + \
            #    "{:.5f}".format(acc))
        step += 1
        
    print("Optimization Finished!")

InvalidArgumentError: Input to reshape is a tensor with 51744 values, but the requested shape requires a multiple of 64
	 [[Node: array_reshape/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_temperature_input_0, array_reshape/Reshape/shape)]]

Caused by op u'array_reshape/Reshape', defined at:
  File "/Users/JH/anaconda2/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/Users/JH/anaconda2/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/traitlets/config/application.py", line 589, in launch_instance
    app.start()
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 405, in start
    ioloop.IOLoop.instance().start()
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 260, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 212, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 370, in execute_request
    user_expressions, allow_stdin)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/JH/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-94faebf0460a>", line 2, in <module>
    pred = conv_net(x, weights, biases, keep_prob)
  File "<ipython-input-13-b9ca54e5c289>", line 5, in conv_net
    x = tf.reshape(x, shape=[-1, 2*4, 2*4, 1])
  File "/Users/JH/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2630, in reshape
    name=name)
  File "/Users/JH/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/Users/JH/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/JH/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 51744 values, but the requested shape requires a multiple of 64
	 [[Node: array_reshape/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_temperature_input_0, array_reshape/Reshape/shape)]]


In [None]:
print x