In [None]:
# coding=utf-8

# All of the baseline method: MLP, Random Forest, Decision Tree

from __future__ import print_function
import os
import numpy as np
import sklearn

from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.tree import DecisionTreeRegressor as DTR
from sklearn.svm import NuSVR

from utils.metrics import get_MSE, get_RMSE, get_MAE, get_NRMSE


class config:
    city_name = 'shanghai'
config.time_slots = {'beijing':1440, 'shanghai':336}[config.city_name]

train_list = np.arange(int(config.time_slots*0), int(config.time_slots*0.7))
valid_list = np.arange(int(config.time_slots*0.7), int(config.time_slots*0.85))
test_list = np.arange(int(config.time_slots*0.85), int(config.time_slots*1))
print(config.__dict__)

cate_list = range(14)
cateName = np.array(['Restaurant','Shopping','Recreation & Sports','Hotel','Business','Residence','Life Service','Transport','Car Service','Education','Medical Care','Scenic Spot','Government','Factory'])

In [None]:
config.dataset_path = os.path.join('/data2/shaoerzhuo/DeepFlowGen/Dataset', config.city_name)
# [num_regions, time_slots]
config.inflow = np.load('{0}/{1}_inflow.npy'.format(config.dataset_path, config.city_name))[1:, :config.time_slots]
config.inflow /= config.inflow.max()
# [num_regions, time_slots]
config.outflow = np.load('{0}/{1}_outflow.npy'.format(config.dataset_path, config.city_name))[1:, :config.time_slots]
config.outflow /= config.outflow.max()

# [num_regions, time_slots, num_cate]
config.checkin_inside = np.load('{0}/{1}_checkin_inside.npy'.format(config.dataset_path, config.city_name))[1:, :config.time_slots, cate_list]
config.checkin_outside = np.load('{0}/{1}_checkin_outside.npy'.format(config.dataset_path, config.city_name))[1:, :config.time_slots, cate_list]

# [num_regions, num_cate]
config.poi_inside = np.load('{0}/{1}_poi_inside.npy'.format(config.dataset_path, config.city_name))[1:, cate_list]
config.poi_outside = np.load('{0}/{1}_poi_outside.npy'.format(config.dataset_path, config.city_name))[1:, cate_list]
for iter in range(14):
    config.poi_inside[:, iter] /= config.poi_inside[:, iter].mean()
    config.poi_outside[:, iter] /= config.poi_outside[:, iter].mean()
    
class DFGDataset(Dataset):
    def __init__(self, config, time_list):
        super(DFGDataset, self).__init__()
        self.config = config
        self.time_list = time_list
        self.num_regions = self.config.inflow.shape[0]
        
    def __getitem__(self, index):
        t = self.time_list[np.mod(index, len(self.time_list))]
        region = int(index / len(self.time_list))
        return_dict = {
            'inflow':torch.Tensor([config.inflow[region, t]]),
            'outflow':torch.Tensor([config.outflow[region, t]]),
            'checkin_inside':torch.Tensor(config.checkin_inside[region, t]),
            'checkin_outside':torch.Tensor(config.checkin_outside[region, t]),
            'poi_inside':torch.Tensor(config.poi_inside[region]),
            'poi_outside':torch.Tensor(config.poi_outside[region]),
            't':torch.LongTensor([t % 48])
        }
        return return_dict

    def __len__(self):
        return len(self.time_list) * self.num_regions
    
train_loader = DataLoader(DFGDataset(config, train_list), num_workers=5, batch_size=len(train_list) * self.num_regions, shuffle=True)
valid_loader = DataLoader(DFGDataset(config, valid_list), num_workers=5, batch_size=config.batch_size, shuffle=False)
test_loader = DataLoader(DFGDataset(config, test_list), num_workers=5, batch_size=config.batch_size, shuffle=False)

In [None]:
def regress(regressor, trainInput, trainTarget, testInput, testTarget, inflow_max):
    regressor.fit(trainInput, trainTarget.flatten())
    
    trainPredict = regressor.predict(trainInput)
    NMSE_train = nmse(trainTarget, trainPredict)
    RMSE_train = rmse(trainTarget, trainPredict) * inflow_max
    MAE_train = mae(trainTarget, trainPredict) * inflow_max
    
    testPredict = regressor.predict(testInput)
    NMSE_test = nmse(testTarget, testPredict)
    RMSE_test = rmse(testTarget, testPredict) * inflow_max
    MAE_test = mae(testTarget, testPredict) * inflow_max
    
    return NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test

In [None]:
# The performance of RF on each city.

rfr = RFR(n_estimators=50)
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(rfr, trainInput=trainInputNS, trainTarget=trainTargetS, testInput=testInputNS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('Random Forest - without outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

rfr = RFR(n_estimators=50)
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(rfr, trainInput=trainInputOS, trainTarget=trainTargetS, testInput=testInputOS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('Random Forest - with outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

rfr = RFR(n_estimators=50)
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(rfr, trainInput=trainInputNB, trainTarget=trainTargetB, testInput=testInputNB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('Random Forest - without outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

rfr = RFR(n_estimators=50)
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(rfr, trainInput=trainInputOB, trainTarget=trainTargetB, testInput=testInputOB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('Random Forest - with outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

In [None]:
# The performance of decision tree on each city.

dtr = DTR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(dtr, trainInput=trainInputNS, trainTarget=trainTargetS, testInput=testInputNS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('Decision Tree - without outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

dtr = DTR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(dtr, trainInput=trainInputOS, trainTarget=trainTargetS, testInput=testInputOS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('Decision Tree - with outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

dtr = DTR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(dtr, trainInput=trainInputNB, trainTarget=trainTargetB, testInput=testInputNB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('Decision Tree - without outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

dtr = DTR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(dtr, trainInput=trainInputOB, trainTarget=trainTargetB, testInput=testInputOB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('Decision Tree - with outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

In [None]:
# The performance of support vector machine on each city.

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputNS, trainTarget=trainTargetS, testInput=testInputNS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('SVM - without outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputOS, trainTarget=trainTargetS, testInput=testInputOS, testTarget=testTargetS, inflow_max=inflow_max_s)
print('SVM - with outsise POI in shanghai:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputNB, trainTarget=trainTargetB, testInput=testInputNB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('SVM - without outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputOB, trainTarget=trainTargetB, testInput=testInputOB, testTarget=testTargetB, inflow_max=inflow_max_b)
print('SVM - with outsise POI in beijing:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputNZ, trainTarget=trainTargetZ, testInput=testInputNZ, testTarget=testTargetZ, inflow_max=inflow_max_z)
print('SVM - without outsise POI in shenzhen:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])

svm = NuSVR()
NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test = regress(svm, trainInput=trainInputOZ, trainTarget=trainTargetZ, testInput=testInputOZ, testTarget=testTargetZ, inflow_max=inflow_max_z)
print('SVM - with outsise POI in shenzhen:')
print([round(each, 6) for each in [NMSE_train, RMSE_train, MAE_train, NMSE_test, RMSE_test, MAE_test]])