## Data Processing done

Data:
- Order: calculate supply and demand
- Weather: PM 2.5, weather etc.
- POI: point of interest, refers to facilities such as resturant, theatres etc.
- Traffic: four levels of traffic jam.
- cluster: Map district hash to district id.

All data are precocessed at this point.

intuitively, weather, taffic, POI should all affect didi's request volume.

In [1]:
import numpy as np
import pandas as pd
import os
import string


In [2]:
cols = ['time','weather','temperature','PM25','district_ID','date','week','req','ans','gap']
df = pd.read_csv('/resources/data/DIDI/summaries/output.csv', 
                 names = cols, )

droping = ['req','ans', 'date']
df.drop(droping, axis=1, inplace=True)

df = df.rename(columns={'gap' : 'y'})

## Look at our data

In [3]:
df.head()

Unnamed: 0,time,weather,temperature,PM25,district_ID,week,y
0,1,1,4.0,177,1,4,9.0
1,1,1,4.0,177,2,4,1.0
2,1,1,4.0,177,3,4,1.0
3,1,1,4.0,177,4,4,3.0
4,1,1,4.0,177,5,4,0.0


In [4]:
#utilitiy functions

def onehot_encode(df, cols):
    return pd.get_dummies(df, columns=cols)

def normalize(df, df_ref, columns):
    for col in columns:
        df[col] = (df[col] - df_ref[col].mean()) / df_ref[col].std()
    return df

def MAPE(D, pred, act):
    D['pred'] = pred
    D['act'] = act
    D['delta'] = [np.abs((D.iloc[i]['act'] - D.iloc[i]['pred'])/D.iloc[i]['act']) if D.iloc[i]['act']> 0 else 0
                  for i in xrange(D.shape[0])]
    subMAPE = D['delta'].mean()
    return subMAPE

In [5]:
normal_col = ['PM25', 'temperature']
normalizers = set(df[normal_col])

#normalize none categorical columns.
#df = normalize(df,df,normalizers)

df.head()

Unnamed: 0,time,weather,temperature,PM25,district_ID,week,y
0,1,1,4.0,177,1,4,9.0
1,1,1,4.0,177,2,4,1.0
2,1,1,4.0,177,3,4,1.0
3,1,1,4.0,177,4,4,3.0
4,1,1,4.0,177,5,4,0.0


In [6]:
# one-hot ecoding catoegorical columns.
df = onehot_encode(df, ['week'])

df.head()

Unnamed: 0,time,weather,temperature,PM25,district_ID,y,week_0,week_1,week_2,week_3,week_4,week_5,week_6
0,1,1,4.0,177,1,9.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1,1,4.0,177,2,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1,1,4.0,177,3,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1,1,4.0,177,4,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1,1,4.0,177,5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [7]:
#cols = df.co['time','weather','temperature','PM25','districtID', 'week']
cols = (list(set(df.columns).difference(['y'])))
index = np.array(df[['district_ID', 'time']])

X = np.array(df[cols])
y = np.array(df['y'])


# Prepare the Training, Validation and Test Data

## Objective Function

$$ MAPE = \frac 1 D \sum_{d=1}^D \frac 1 T \sum_{t=1}^T abs \left( \frac {\hat y_{dt} - y_{dt}} {y_{dt}} \right) \forall y_{dt} > 0$$ where $D$ is the total number of districts, $T$ is the total number of time intervals for a given district, $\hat y_{dt}$ is our estimate of the gap and $y_{dt}$ the actual gap for district $d$ at time $t$. Notice that we exclude all data points with zero actual gap from the calculation of MAPE, and that we take the sum over the time intervals within a district before we sum over districts. 

Mape is non convex and hard to differentiate, approximate it with abs distance.

In [8]:
X.shape

(317526, 12)

In [9]:
y.shape

(317526,)

## train/validation split.

In [10]:
X_train = X[:280000,]
y_train = y[:280000,]
index_train = index[:280000,]

X_train.shape

(280000, 12)

In [11]:
X_val = X[280000:317526,]
y_val = y[280000:317526,]
index_val = index[280000:317526,]
index_val = pd.DataFrame(index_val, columns=['district_ID', 'UTC'])

X_val.shape

(37526, 12)

In [12]:
df.head()

Unnamed: 0,time,weather,temperature,PM25,district_ID,y,week_0,week_1,week_2,week_3,week_4,week_5,week_6
0,1,1,4.0,177,1,9.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1,1,4.0,177,2,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1,1,4.0,177,3,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1,1,4.0,177,4,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1,1,4.0,177,5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Just guessing 1s

In [13]:
l = y_val.shape[0]
MAPE(index_val, np.ones(l), y_val)

0.23467365435568543

# One-layer network, 200 neurons.

## Activation function candidates:
- Linear
- RELU 
- ELU
- Sigmoid
- Softmax <<<<

In [14]:
def I(X):
    return X

import tensorflow as tf
    
num_labels = 1
pop_size = y_train.shape[0]
features = X_train.shape[1]
batch_size = 128

num_hidden_nodes = 200

with tf.Graph().as_default() as graph:

    X = tf.placeholder(tf.float32, shape=(None, features))
    y = tf.placeholder(tf.float32, shape=(None, ))
    Xv = tf.constant(X_val, tf.float32)
    #Xt = tf.constant(X_test, tf.float32)
    
    '''
    Choose the flavor of your neuron
        set F = flavor, where flavor is the neuron flavor. Choose the flavor from the following list
            linear: I
            RELU: tf.nn.relu
            ELU: tf.nn.elu
            sigmoid: tf.nn.sigmoid
            softmax: tf.nn.softmax
    '''
    F = tf.nn.softmax
        
    with tf.name_scope('hidden'):
        fros = features
        tos = num_hidden_nodes
        weights = tf.Variable(tf.truncated_normal([fros, tos], stddev= 1/ tf.sqrt(float(fros)),  
                                                  name='weights'))
        biases = tf.Variable(tf.zeros([tos]),  name='biases')
        H = F(tf.matmul(X, weights) + biases)
        Hv = F(tf.matmul(Xv, weights) + biases)
        #Ht = F(tf.matmul(Xt, weights) + biases)
    
    with tf.name_scope('output'):
        F = tf.nn.elu
        fros = num_hidden_nodes
        tos = num_labels
        weights = tf.Variable(tf.truncated_normal([fros, tos], stddev= 1/ tf.sqrt(float(fros)),  
                                                  name='weights'))
        biases = tf.Variable(tf.zeros([tos]),  name='biases')
        yfit = F(tf.matmul(H, weights) + biases)
        yval = F(tf.matmul(Hv, weights) + biases)
        #ytest = F(tf.matmul(Ht, weights) + biases)        
    
    # Optimizer.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.1, global_step, 1000, 0.5)
    loss = tf.reduce_mean(tf.abs((y - yfit)/(y + 1./y)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
import time
num_steps = 20001
start = time.time()
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    for step in xrange(num_steps):
        # Generate a minibatch.
        indices = np.random.choice(pop_size, batch_size, replace=False)
        batch_data = X_train[indices, :]
        batch_labels = y_train[indices]
        # prep and feed
        feed_dict = {X : batch_data, y : batch_labels}
        _, l, predictions = session.run([optimizer, loss, yfit], feed_dict=feed_dict)
        
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Validation MAPE: %.5f" % MAPE((index_val.copy()), yval.eval(), y_val))
    #print np.mean(ytest.eval())
    #print np.std(ytest.eval())

print ('This took %.2f seconds' % (time.time()-start))

Initialized
Minibatch loss at step 0: 0.422364
Validation MAPE: 0.48064
Minibatch loss at step 500: 0.216691
Validation MAPE: 0.23651
Minibatch loss at step 1000: 0.202043
Validation MAPE: 0.23589
Minibatch loss at step 1500: 0.271510
Validation MAPE: 0.23534
Minibatch loss at step 2000: 0.266236
Validation MAPE: 0.23542
Minibatch loss at step 2500: 0.222944
Validation MAPE: 0.23503
Minibatch loss at step 3000: 0.225983
Validation MAPE: 0.23486
Minibatch loss at step 3500: 0.301049
Validation MAPE: 0.23482
Minibatch loss at step 4000: 0.257672
Validation MAPE: 0.23473
Minibatch loss at step 4500: 0.248577
Validation MAPE: 0.23474
Minibatch loss at step 5000: 0.221139
Validation MAPE: 0.23475
Minibatch loss at step 5500: 0.253877
Validation MAPE: 0.23472
Minibatch loss at step 6000: 0.254884
Validation MAPE: 0.23469
Minibatch loss at step 6500: 0.257774
Validation MAPE: 0.23472
Minibatch loss at step 7000: 0.282337
Validation MAPE: 0.23469
Minibatch loss at step 7500: 0.270973
Validatio

# Two-layer network, 200 neurons each.  

## activation function candidates:

- Linear
- RELU <<<<
- ELU
- Sigmoid
- Softmax

In [14]:
def I(X):
    return X

import tensorflow as tf
    
num_labels = 1
pop_size = y_train.shape[0]
features = X_train.shape[1]
batch_size = 128

num_hidden_nodes = 200

with tf.Graph().as_default() as graph:
    
    X = tf.placeholder(tf.float32, shape=(None, features))
    y = tf.placeholder(tf.float32, shape=(None, ))
    Xv = tf.constant(X_val, tf.float32)
    #Xt = tf.constant(X_test, tf.float32)
    
    
    '''
    Choose the flavor of your neuron in the first line of each name scope below
        set F = flavor, where flavor is the neuron flavor. Choose the flavor from the following list
            linear: I
            RELU: tf.nn.relu
            ELU: tf.nn.elu
            sigmoid: tf.nn.sigmoid
            softmax: tf.nn.softmax
    '''
    
    with tf.name_scope('hidden1'):
        F = tf.nn.softmax
        fros = features
        tos = num_hidden_nodes
        weights = tf.Variable(tf.truncated_normal([fros, tos], stddev= 1/ tf.sqrt(float(fros)),  
                                                  name='weights'))
        biases = tf.Variable(tf.zeros([tos]),  name='biases')
        H = F(tf.matmul(X, weights) + biases)
        Hv = F(tf.matmul(Xv, weights) + biases)
        #Ht = F(tf.matmul(Xt, weights) + biases)

        
    with tf.name_scope('hidden2'):
        F = tf.nn.relu
        fros = num_hidden_nodes
        tos = num_hidden_nodes
        weights = tf.Variable(tf.truncated_normal([fros, tos], stddev= 1/ tf.sqrt(float(fros)),  
                                                  name='weights'))
        biases = tf.Variable(tf.zeros([tos]),  name='biases')
        H = F(tf.matmul(H, weights) + biases)
        Hv = F(tf.matmul(Hv, weights) + biases)
        #Ht = F(tf.matmul(Ht, weights) + biases)

        
    with tf.name_scope('output'):
        F = tf.nn.elu
        fros = num_hidden_nodes
        tos = num_labels
        weights = tf.Variable(tf.truncated_normal([fros, tos], stddev= 1/ tf.sqrt(float(fros)),  
                                                  name='weights'))
        biases = tf.Variable(tf.zeros([tos]),  name='biases')
        yfit = F(tf.matmul(H, weights) + biases)
        yval = F(tf.matmul(Hv, weights) + biases)
        #ytest = F(tf.matmul(Ht, weights) + biases)        
    
    # Optimizer.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.1, global_step, 1000, 0.5)
    loss = tf.reduce_mean(tf.abs((y - yfit)/(y + 1./y)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
import time
num_steps = 20001
start = time.time()
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    for step in xrange(num_steps):
        # Generate a minibatch.
        indices = np.random.choice(pop_size, batch_size, replace=False)
        batch_data = X_train[indices, :]
        batch_labels = y_train[indices]
        # prep and feed
        feed_dict = {X : batch_data, y : batch_labels}
        _, l, predictions = session.run([optimizer, loss, yfit], feed_dict=feed_dict)
        
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Validation MAPE: %.5f" % MAPE((index_val.copy()), yval.eval(), y_val))
    #print np.mean(ytest.eval())
    #print np.std(ytest.eval())

print ('This took %.2f seconds' % (time.time()-start))

Initialized
Minibatch loss at step 0: 0.345919
Validation MAPE: 0.46793
Minibatch loss at step 500: 0.197590
Validation MAPE: 0.23552
Minibatch loss at step 1000: 0.274812
Validation MAPE: 0.23672


KeyboardInterrupt: 