In [None]:
#coding=utf-8
from __future__ import print_function
import setproctitle
setproctitle.setproctitle('BeijingFlow@shaoerzhuo')

import os, tqdm, torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from utils.datasets import DFGDataset, collate_fn
import utils.metrics as metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
import lightgbm as gbm

In [None]:
use_LightGBM = True
use_RF = True

for city in ['beijing']:
    for iter in ['inflow', 'outflow']:
        for poi_cate in [14]:
            for beta in [1]:
                flow_max = {'shanghai':{'inflow':8179.6667, 'outflow':9417.3333}, 'beijing':{'inflow':1583.9167, 'outflow':1925.8333}}[city][iter]
                print('{0} - {1} - {2}'.format(iter, poi_cate, city))
                class config:
                    cuda_num = 0
                    cityname = city
                    io = iter
                    time_slice = 48
                    epoch_num = 10000
                    batch_size = 32
                    interval = 50
                #print(config.__dict__)

                checkin_cate = 14
                poi_cate = poi_cate

                dataset_config = {
                    'cityname' : config.cityname,
                    'dataset_path' : os.path.join('/data2/shaoerzhuo/DeepFlowGen/Dataset', config.cityname, 'dataset'),
                    'beta':beta,
                    'i/o' : config.io,
                    'poi_cate' : poi_cate
                }

                train_dataset = DFGDataset(dataset_config, 'train')
                valid_dataset = DFGDataset(dataset_config, 'valid')
                test_dataset = DFGDataset(dataset_config, 'test')

                train_loader = DataLoader(train_dataset, num_workers=5, batch_size=100000, shuffle=True, collate_fn=collate_fn)
                valid_loader = DataLoader(valid_dataset, num_workers=5, batch_size=100000, shuffle=False, collate_fn=collate_fn)
                test_loader = DataLoader(test_dataset, num_workers=5, batch_size=100000, shuffle=False, collate_fn=collate_fn)

                for batch in train_loader:
                    train_input = np.concatenate([batch['poi'].numpy(), batch['t'].numpy()], axis=1)
                    train_target = batch['flow'].numpy()[:, 0]
                    break

                for batch in valid_loader:
                    valid_input = np.concatenate([batch['poi'].numpy(), batch['t'].numpy()], axis=1)
                    valid_target = batch['flow'].numpy()[:, 0]
                    break

                if use_RF:
                    model = RandomForestRegressor(n_estimators=50)
                    model.fit(train_input, train_target)
                    valid_pred = model.predict(valid_input)
                    MAE = metrics.get_MAE(valid_pred, valid_target) * flow_max
                    RMSE = metrics.get_RMSE(valid_pred, valid_target) * flow_max
                    NRMSE = metrics.get_NRMSE(valid_pred, valid_target)
                    print('Epoch={0}\tMAE=\t{1:.4f}\tRMSE=\t{2:.4f}\tNRMSE=\t{3:.4f}'.format(0, MAE, RMSE, NRMSE))
                
                if use_LightGBM:
                    model = gbm.LGBMRegressor(n_estimators={'beijing':1000, 'shanghai':1000}[city])
                    model.fit(train_input, train_target)
                    valid_pred = model.predict(valid_input)
                    MAE = metrics.get_MAE(valid_pred, valid_target) * flow_max
                    RMSE = metrics.get_RMSE(valid_pred, valid_target) * flow_max
                    NRMSE = metrics.get_NRMSE(valid_pred, valid_target)
                    print('Epoch={0}\tMAE=\t{1:.4f}\tRMSE=\t{2:.4f}\tNRMSE=\t{3:.4f}'.format(0, MAE, RMSE, NRMSE))