In [1]:
import pandas as pd
import os
#import matplotlib.pyplot as plt
import tensorflow as tf
from datetime import datetime
from tensorflow.contrib.layers import fully_connected, batch_norm
import math
import numpy as np
import random

In [2]:
df = pd.read_csv('./DL_final_project/DL_Taiwan_data/sinica/201701_Taiwan.csv',
                 usecols=['Date','Time','PM2.5','PM10','PM1','Temperature','Humidity','lon','lat'])
#把201701_Taiwan.csv的header的' lat',' lon'改成'lat','lon' (多了空格)
train_for_taiwan = True # filter out non-Taiwan
#len(df)

In [3]:
if train_for_taiwan:
    df = df[(df['lat']>= 22 )& (df['lat'] <= 25)&(df['lon']>=120)&(df['lon']<=122)]
df = df.assign(Timestamp = pd.to_datetime(df['Date']+' '+df['Time']))
df = df.assign(Hour = df['Timestamp'].dt.hour)

In [4]:
df_X = df[['Hour','PM10','PM1','Temperature','Humidity','lon','lat']]
df_X = df_X.values
df_Y = df[['PM2.5']]
df_Y = df_Y.values

In [5]:
root_logdir = "tf_logs"
batch_log_step = 50
early_stopping_epochs = 10

In [6]:
def dnn(X_1, y_1, X_2, y_2, X_3, y_3):
    tf.reset_default_graph()
    
    # check folder
    export_check_num = 1
    export_dir = './final_model/'
    while os.path.exists(export_dir):
        export_check_num += 1
        export_dir = './final_model_'+str(export_check_num)+'/'
    
    print("model will be exported in",export_dir)
    
    # logs
    start_time = datetime.now()
    now = start_time.strftime("%Y%m%d_%H%M%S")
    logdir = "./{}/run-{}".format(root_logdir, now)
    print("now=",now)
    
    # dnn graph defs
    n_input = 7
    n_epochs = 100
    n_hidden = [100,100,10]
    act_fn = tf.nn.sigmoid
    learning_rate = 0.001
    batch_normalization = False
    batch_size = 10000
    #batch_size = 1000 #小範圍測試用
    mult_bias = 1000

    # I/O
    with tf.name_scope("Input"):
        X = tf.placeholder(tf.float32, [None, n_input], name="X")
        is_training = tf.placeholder(tf.bool, shape=(), name="is_training")
    with tf.name_scope("Output"):
        y = tf.placeholder(tf.float32, [None, 1], name="y")
        y_biased = y/mult_bias
    
    # batch norm 
    he_init = tf.contrib.layers.variance_scaling_initializer()
    with tf.name_scope("BatchNormArgs"):
        bn_params = {
            'is_training': is_training,
            'decay': 0.99,
            'updates_collections': None,
            'scale': True
        }
    
    # DNN
    with tf.name_scope("DNN"):
        with tf.contrib.framework.arg_scope(
                [fully_connected],
                weights_initializer = he_init,
                normalizer_fn = batch_norm if batch_normalization else None,
                normalizer_params = bn_params if batch_normalization else None
                ):
            h1=fully_connected(X ,n_hidden[0],activation_fn=act_fn,scope="h1")
            h2=fully_connected(h1,n_hidden[1],activation_fn=act_fn,scope="h2")
            h3=fully_connected(h2,n_hidden[2],activation_fn=act_fn,scope="h3")
            logits=fully_connected(h3, 1, activation_fn=act_fn,scope="out")
    
    with tf.name_scope("Cost"):
        #cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name="cost")
        cost = tf.losses.mean_squared_error(logits, y_biased)
    with tf.name_scope("AdamOptimizer"):
        optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
        minimizer = optimizer.minimize(cost)
    
    init = tf.global_variables_initializer()
    with tf.name_scope("ModelSaver"):
        saver = tf.train.Saver()
    
    with tf.name_scope("Predict"):
        predict = tf.multiply(logits, mult_bias, name="predict")
        
    with tf.name_scope("Error"):
        ave_of_batch_y = tf.reduce_mean(y_biased)
        error = tf.abs(logits - y_biased)/ave_of_batch_y
        relative_err = tf.reduce_mean(error)
    
    with tf.name_scope("Summaries-Train"):
        cost_summary = tf.summary.scalar('cost_function',cost)
        error_summary = tf.summary.scalar('relative_err',relative_err)
    with tf.name_scope("Summaries-Validation"):
        v_cost_summary = tf.summary.scalar('v_cost_function',cost)
        v_error_summary = tf.summary.scalar('v_relative_err',relative_err)
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    
    with tf.Session() as sess:
        sess.run(init)
        
        total_batch = len(X_1)//batch_size
        #total_batch = validation_idx_start//batch_size
        print("Total batch:",total_batch)
        #X_va, y_va = dfa_X[validation_idx_start:test_idx_start], dfa_Y[validation_idx_start:test_idx_start]
        X_va = X_2
        y_va = y_2
        X_test = X_3
        y_test = y_3
        
        best_va_err_triggered = False
        best_va_err = 0
        early_stopping_triggered = False
        stopping_epoch = 0
        step = 0
        save_path = ""
        best_save_path = ""
        
        perm = np.arange(len(X_1))
        np.random.shuffle(perm)
        X_t = X_1[perm]
        y_t = y_1[perm]
        
        for epoch in range(n_epochs):
            for batch in range(total_batch):
                X_ba = X_t[batch*batch_size: (batch+1)*batch_size]
                y_ba = y_t[batch*batch_size: (batch+1)*batch_size]
                _, c = sess.run([minimizer, cost], feed_dict = {is_training: True, X:X_ba, y:y_ba})
                
                if batch % batch_log_step == 0:
                    print("Epoch %4d batch %5d"%(epoch,batch))
                    cost_summary_str = cost_summary.eval(feed_dict={is_training:False, X:X_ba, y:y_ba})
                    step = epoch * total_batch + batch
                    file_writer.add_summary(cost_summary_str, step)
                    error_summary_str = error_summary.eval(feed_dict={is_training:False, X:X_ba, y:y_ba})
                    file_writer.add_summary(error_summary_str, step)
            
            save_path = saver.save(sess, "./checkpoint/model_"+now+".ckpt")
            va_err, va_cost = sess.run([relative_err, cost], feed_dict={is_training:False, X:X_va, y:y_va})
            va_l = sess.run(predict, feed_dict={is_training:False, X:X_va, y:y_va})
            rnd_result_idx = random.randint(0, len(X_va)-1)
            
            print(va_l[rnd_result_idx], y_va[rnd_result_idx]) # print 1st prediction result
            v_error_summary_str = v_error_summary.eval(feed_dict={is_training:False, X:X_va, y:y_va})
            file_writer.add_summary(v_error_summary_str, step)
            v_cost_summary_str = v_cost_summary.eval(feed_dict={is_training:False, X:X_va, y:y_va})
            file_writer.add_summary(v_cost_summary_str, step)
            print("Epoch %4d val.cost %3.6f val.err %3.2f%%"%(epoch,va_cost,va_err*100),end=" ")
            
            if best_va_err_triggered:
                if va_err < best_va_err:
                    print("best")
                    stopping_epoch = 0
                    best_va_err = va_err
                    
                    best_save_path = saver.save(sess, "./best_model/model_"+now)
                else:
                    stopping_epoch += 1
                    print("stopping %3d"%stopping_epoch)
                if stopping_epoch >= early_stopping_epochs:
                    early_stopping_triggered = True
                    print("Early stopping triggered: Step: %10d, val.err %3.2f%%"%(step, va_err*100))
            else:
                best_va_err = va_err
                best_va_err_triggered = True
                print("best")
                best_save_path = saver.save(sess, "./best_model/model_"+now)
            
            if early_stopping_triggered:
                break
        
        finish_time = datetime.now()
        print("best model saved to:", best_save_path)
        file_writer.close()
        elapse_time = finish_time - start_time
        total_seconds = elapse_time.total_seconds()
        print("Total time:", total_seconds)
        
        #X_test = dfa_X[test_idx_start:idx_end]
        #y_test = dfa_Y[test_idx_start:idx_end]
        saver.restore(sess, "./best_model/model_"+now)
        best_err = relative_err.eval({is_training: False, X: X_test, y: y_test})
        print("Test Err: %3.2f%%"%(best_err*100))
        
        
        builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
        builder.add_meta_graph_and_variables(sess, ["tag"], signature_def_map= {
            "model": tf.saved_model.signature_def_utils.predict_signature_def(
                inputs= {"X": X, "is_training": is_training},
                outputs= {"predict": predict})
        })
        builder.save()
        
        """
        print("Predicting...")
        predict_values = sess.run(predict, feed_dict={is_training: False, X: X_w})
        print("Predict End")
        return predict_values
        """  

In [7]:
# df2: validation
# df3: test
df_v = pd.read_csv('./DL_final_project/DL_Taiwan_data/sinica/201702_Taiwan.csv',
                 usecols=['Date','Time','PM2.5','PM10','PM1','Temperature','Humidity','lon','lat'])
test_idx_start = math.floor(len(df_v)*0.5)
df2 = df_v[:test_idx_start]
df3 = df_v[test_idx_start:]
if train_for_taiwan:
    df2 = df2[(df2['lat']>= 22 )& (df2['lat'] <= 25)&(df2['lon']>=120)&(df2['lon']<=122)]
df2 = df2.assign(Timestamp = pd.to_datetime(df2['Date']+' '+df2['Time']))
df3 = df3.assign(Timestamp = pd.to_datetime(df3['Date']+' '+df3['Time']))
df2 = df2.assign(Hour = df2['Timestamp'].dt.hour)
df3 = df3.assign(Hour = df3['Timestamp'].dt.hour)
df2_X = df2[['Hour','PM10','PM1','Temperature','Humidity','lon','lat']]
df2_y = df2[['PM2.5']]
df3_X = df3[['Hour','PM10','PM1','Temperature','Humidity','lon','lat']]
df3_y = df3[['PM2.5']]
df2_X = df2_X.values
df2_y = df2_y.values
df3_X = df3_X.values
df3_y = df3_y.values

In [8]:
dnn(df_X, df_Y, df2_X, df2_y, df3_X, df3_y)

model will be exported in ./final_model/
now= 20180608_010508
Total batch: 493
Epoch    0 batch     0
Epoch    0 batch    50
Epoch    0 batch   100
Epoch    0 batch   150
Epoch    0 batch   200
Epoch    0 batch   250
Epoch    0 batch   300
Epoch    0 batch   350
Epoch    0 batch   400
Epoch    0 batch   450
[ 40.47767639] [33]
Epoch    0 val.cost 0.000362 val.err 28.16% best
Epoch    1 batch     0
Epoch    1 batch    50
Epoch    1 batch   100
Epoch    1 batch   150
Epoch    1 batch   200
Epoch    1 batch   250
Epoch    1 batch   300
Epoch    1 batch   350
Epoch    1 batch   400
Epoch    1 batch   450
[ 31.44128609] [24]
Epoch    1 val.cost 0.000252 val.err 20.28% best
Epoch    2 batch     0
Epoch    2 batch    50
Epoch    2 batch   100
Epoch    2 batch   150
Epoch    2 batch   200
Epoch    2 batch   250
Epoch    2 batch   300
Epoch    2 batch   350
Epoch    2 batch   400
Epoch    2 batch   450
[ 27.41244507] [1]
Epoch    2 val.cost 0.000231 val.err 18.08% best
Epoch    3 batch     0
Ep

Epoch   27 batch   100
Epoch   27 batch   150
Epoch   27 batch   200
Epoch   27 batch   250
Epoch   27 batch   300
Epoch   27 batch   350
Epoch   27 batch   400
Epoch   27 batch   450
[ 45.41850281] [43]
Epoch   27 val.cost 0.000170 val.err 11.53% best
Epoch   28 batch     0
Epoch   28 batch    50
Epoch   28 batch   100
Epoch   28 batch   150
Epoch   28 batch   200
Epoch   28 batch   250
Epoch   28 batch   300
Epoch   28 batch   350
Epoch   28 batch   400
Epoch   28 batch   450
[ 41.8649292] [42]
Epoch   28 val.cost 0.000170 val.err 11.49% best
Epoch   29 batch     0
Epoch   29 batch    50
Epoch   29 batch   100
Epoch   29 batch   150
Epoch   29 batch   200
Epoch   29 batch   250
Epoch   29 batch   300
Epoch   29 batch   350
Epoch   29 batch   400
Epoch   29 batch   450
[ 55.21756744] [56]
Epoch   29 val.cost 0.000169 val.err 11.46% best
Epoch   30 batch     0
Epoch   30 batch    50
Epoch   30 batch   100
Epoch   30 batch   150
Epoch   30 batch   200
Epoch   30 batch   250
Epoch   30 b