In [1]:
import numpy as np
import pandas as pd
import math
import cPickle as pickle

# Data Parsing


In [2]:
file = '/Users/JH/Desktop/NTU/NTU_Research/data/NEM_Load_Forecasting_Database.xls'

concatenate_number = 2*4*4

QLD = 'Actual_Data_QLD'
NSW = 'Actual_Data_NSW'
VIC = 'Actual_Data_VIC'
SA = 'Actual_Data_SA'
TAS = 'Actual_Data_TAS'

In [3]:
# Set Classes as Data Container

class Structure:
    def __init__(self):
        self._feature = []
        self._target = []

    @property
    def feature(self):
        return self._feature

    @property
    def target(self):
        return self._target

    @feature.setter
    def feature(self, value):
        self._feature = value

    @target.setter
    def target(self, value):
        self._target = value


class Data:
    def __init__(self):
        pass

    class Train(Structure):
        def __init__(self):
            pass

    class Test(Structure):
        def __init__(self):
            pass


class DataSet:
    def __init__(self):
        pass

    class Raw:
        def __init__(self):
            pass

        class Train(Structure):
            def __init__(self):
                pass

        class Test(Structure):
            def __init__(self):
                pass

    class PreProcessed:
        def __init__(self):
            pass

        class Train(Structure):
            def __init__(self):
                pass

        class Test(Structure):
            def __init__(self):
                pass

In [4]:
# Set Functions

def normalization(data):
    return (data - min(data)) / (max(data) - min(data))


def data_splitter(data, ratio=0.8):
    """
    split data into training data & testing data
    :param data:

    :param ratio:
        training data ratio
    :return:
        train_data, test_data
    """
    splitter = int(len(data) * ratio)
    return np.array(data[:splitter]), np.array(data[splitter + 1:])


def preprocessing_filter(data, nominator, denominator):
    return normalization(data) ** (nominator / denominator)


def preprocessing(data_present, temperature_max, temperature_mean, denominator):
    data_present = list(data_present) + list(
        preprocessing_filter(np.array(data_present), temperature_max, denominator)) + list(
        preprocessing_filter(np.array(data_present), temperature_mean, denominator))

    return np.array(data_present)


def data_alloter(df):
    dataset = DataSet()
    denominator = df['Mean Tem.'].min()

    raw_feature = []
    raw_target = []
    preprocessed_feature = []
    preprocessed_target = []

    for row in range(0, len(df)):
        # if both MaxTemp and MeanTemp are not nan
        if not math.isnan(df['Max Tem.'][row]) and not math.isnan(df['Mean Tem.'][row]):
            if not math.isnan(df['Max Tem.'][row + 1]) and not math.isnan(df['Mean Tem.'][row + 1]):
                powerload_present = normalization(np.array(df.loc[row][5:53]))
                powerload_future = normalization(np.array(df.loc[row + 1][5:53]))

                raw_feature.append(np.array(
                    list(powerload_present) + list([df['Max Tem.'][row + 1]]) + list([df['Mean Tem.'][row + 1]])))
                raw_target.append(np.array(powerload_future))

                preprocessed_powerload_present = preprocessing(powerload_present,
                                                               df['Max Tem.'][row + 1],
                                                               df['Mean Tem.'][row + 1],
                                                               denominator)

                preprocessed_feature.append(preprocessed_powerload_present)
                preprocessed_target.append(np.array(powerload_future))

    dataset.Raw.Train.feature, dataset.Raw.Test.feature = data_splitter(raw_feature)
    dataset.Raw.Train.target, dataset.Raw.Test.target = data_splitter(raw_target)

    dataset.PreProcessed.Train.feature, dataset.PreProcessed.Test.feature = data_splitter(preprocessed_feature)
    dataset.PreProcessed.Train.target, dataset.PreProcessed.Test.target = data_splitter(preprocessed_target)

    return dataset

In [5]:
df = pd.read_excel(file, sheetname=QLD)
dataset = data_alloter(df)

### Parsed Data Checking

# DeepLearning Core

## Training

In [6]:
import tensorflow as tf

In [7]:
# Parameters
learning_rate = 0.01
training_iters = 200000
    #batch_size = 128
display_step = 50

# Network Parameters
n_input = 144  # feature data as input (input matrix shape: ???)
n_output = 48  # target data as output  (48-points)
dropout = 0.75  # Dropout, probability to keep units

In [8]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input], name='feature_input')
y = tf.placeholder(tf.float32, [None, n_output], name='target_output')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')  # dropout (keep probability)

In [9]:
tensor_map = {x: dataset.PreProcessed.Train.feature, 
              y: dataset.PreProcessed.Train.target, 
              keep_prob: 1}

In [10]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [11]:
dataset.PreProcessed.Train.feature[0].shape

(144,)

In [12]:
# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    with tf.name_scope('array_reshape') as array_reshape:
        x = tf.reshape(x, shape=[-1, 12, 12, 1])

    # Convolution Layer
    with tf.name_scope('conv_layer1') as conv_layer1:
        conv1 = conv2d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    with tf.name_scope('conv_layer2') as conv_layer2:
        conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    with tf.name_scope('fc_layer') as fc_layer:
        fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
        fc1 = tf.nn.relu(fc1)
        # Apply Dropout
        fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    with tf.name_scope('output_layer') as output_layer:
        out = tf.nn.sigmoid(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
    
    return out

In [13]:
# Store layers weight & bias
weights = {
    # 3x3 conv, 1 input, 32 outputs(number of filter = 32)
    'wc1': tf.Variable(tf.truncated_normal([3, 3, 1, 32]), name='wc1'),
    
    # 3x3 conv, 32 inputs, 64 outputs(number of filter = 64)
    'wc2': tf.Variable(tf.truncated_normal([3, 3, 32, 64]), name='wc2'),
    
    # fully connected, width*height*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.truncated_normal([3 * 3 * 64, 64*9]), name='wd1'),
    
    # 1024 inputs, 48 outputs
    'out': tf.Variable(tf.truncated_normal([64*9, n_output]), name='wo1')
}

biases = {
    'bc1': tf.Variable(tf.truncated_normal([32]), name='bc1'),
    'bc2': tf.Variable(tf.truncated_normal([64]), name='bc2'),
    'bd1': tf.Variable(tf.truncated_normal([64*9]), name='bd1'),
    'out': tf.Variable(tf.truncated_normal([n_output]), name='bo1')
}

In [14]:
# Construct model
pred = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
with tf.name_scope('cost'):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    tf.summary.scalar('cost/', cost)
    
with tf.name_scope('optimization'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluation model
with tf.name_scope('evaluation'):
    rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y, pred))))

# Initializing the variables
init = tf.global_variables_initializer()

In [15]:
# next batch data selector
def next_batch(dataset, batch_size, step):
    """
    pick data and build next batch for training
    :dataset
    :param batch_size:
    :return:
    """
    start_idx = (step - 1) * batch_size
    end_idx = step * batch_size
    
    index_checker = end_idx - len(dataset)
    
    if index_checker < 0:
        batch_data = dataset[start_idx : end_idx]
    else:
        batch_data = np.concatenate((dataset[start_idx : len(dataset)], dataset[0 : index_checker]), axis=0)
        
    if len(batch_data) == batch_size:
        return batch_data

In [16]:
with tf.Session() as sess:
    sess.run(init)
    
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./summary', sess.graph)
    
    step = 1
    
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#         batch_x = next_batch(temperature_map, batch_size, step)
#         batch_y = next_batch(power_vector_, batch_size, step)

        batch_x = dataset.PreProcessed.Train.feature
        batch_y = dataset.PreProcessed.Train.target
        
    
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})
        
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
            acc = sess.run(rmse, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
            
            print "Iter " + str(step),
            print "cost : " + "{:.6f}".format(loss),
            print "rmse : " + "{:.6f}".format(acc)
            #print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
            #     "{:.6f}".format(loss) + ", Training Accuracy= " + \
            #    "{:.5f}".format(acc))
        step += 1
        
    print("Optimization Finished!")
    
    print("Testing Accuracy [RMSE] :", \
        sess.run(rmse, feed_dict={x: dataset.PreProcessed.Test.feature,
                                  y: dataset.PreProcessed.Test.target,
                                  keep_prob: 1.}))

Iter 2500 cost : 109.265778 rmse : 0.503096
Iter 5000 cost : 108.769264 rmse : 0.487693
Iter 7500 cost : 108.697159 rmse : 0.494173
Iter 10000 cost : 108.321022 rmse : 0.473012
Iter 12500 cost : 108.321022 rmse : 0.473012
Iter 15000 cost : 108.321022 rmse : 0.473012
Iter 17500 cost : 108.321022 rmse : 0.473012
Iter 20000 cost : 108.321022 rmse : 0.473012
Iter 22500 cost : 108.321022 rmse : 0.473012
Iter 25000 cost : 108.321022 rmse : 0.473012
Iter 27500 cost : 108.321022 rmse : 0.473012
Iter 30000 cost : 108.321022 rmse : 0.473012
Iter 32500 cost : 108.321022 rmse : 0.473012
Iter 35000 cost : 108.321022 rmse : 0.473012
Iter 37500 cost : 108.321022 rmse : 0.473012
Iter 40000 cost : 108.321022 rmse : 0.473012
Iter 42500 cost : 108.321022 rmse : 0.473012
Iter 45000 cost : 108.321022 rmse : 0.473012
Iter 47500 cost : 108.321022 rmse : 0.473012
Iter 50000 cost : 108.321022 rmse : 0.473012
Iter 52500 cost : 108.321022 rmse : 0.473012
Iter 55000 cost : 108.321022 rmse : 0.473012
Iter 57500 co

## Testing

model load -> predict -> draw graph