In [None]:
#coding=utf-8
import setproctitle
setproctitle.setproctitle('BeijingFlow@shaoerzhuo')
from __future__ import print_function
import os, tqdm, torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from utils.datasets import DFGDataset, collate_fn
import utils.metrics as metrics

class config:
    cuda_num = 0
    cityname = 'beijing'
    io = 'inflow'
    time_slice = 48
    epoch_num = 10000
    batch_size = 2**12
    interval = 50
print(config.__dict__)

checkin_cate = 14
poi_cate = 14

dataset_config = {
    'cityname' : config.cityname,
    'dataset_path' : os.path.join('/data2/shaoerzhuo/DeepFlowGen/Dataset', config.cityname, 'dataset'),
    'i/o' : config.io,
    'poi_cate' : poi_cate
}

flow_max = {'shanghai':{'inflow':8179.6667, 'outflow':9417.3333}, 'beijing':{'inflow':1583.9167, 'outflow':1925.8333}}[config.cityname][config.io]

train_dataset = DFGDataset(dataset_config, 'train')
valid_dataset = DFGDataset(dataset_config, 'valid')
test_dataset = DFGDataset(dataset_config, 'test')

train_loader = DataLoader(train_dataset, num_workers=10, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, num_workers=10, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, num_workers=10, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.len_time_vec = 24
        self.hidden_dims = 128
        self.cuda_num = config.cuda_num
        
        self.time_embedding = nn.Embedding(48, self.len_time_vec)
        self.dense1 = nn.Sequential(nn.Linear(poi_cate * 2 + self.len_time_vec, self.hidden_dims), nn.Sigmoid())
        self.dense_block = nn.Sequential(
            nn.Linear(self.hidden_dims, self.hidden_dims), nn.Sigmoid(), 
            nn.Linear(self.hidden_dims, self.hidden_dims), nn.Sigmoid(), 
            nn.Linear(self.hidden_dims, self.hidden_dims), nn.Sigmoid(), 
            nn.Linear(self.hidden_dims, self.hidden_dims), nn.Sigmoid(), 
        )
        self.dense2 = nn.Sequential(nn.Linear(self.hidden_dims, 1), nn.Sigmoid())
        
    def forward(self, batch):
        poi = batch['poi'].cuda(self.cuda_num)
        t = batch['t'].cuda(self.cuda_num)
        
        time_vec = self.time_embedding(t)[:, 0]
        out = torch.cat([time_vec, poi], dim=1)
        out = self.dense1(out)
        out = self.dense_block(out)
        out = self.dense2(out)
        
        return {'total_crowd_flow' : out}

In [None]:
model = MLP().cuda(config.cuda_num)
model.criterion = nn.MSELoss().cuda(config.cuda_num)
lr = 1e-4

valid_list = [np.inf]
min_string = ''
    
for epoch in tqdm.tqdm(range(config.epoch_num), ncols=70, ascii=True):
    if epoch % 500 == 0:
        lr /= 2
        model.optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-8)
    for batch in train_loader:
        model.optimizer.zero_grad()
        return_dict = model(batch)
        loss = model.criterion(return_dict['total_crowd_flow'], batch['flow'].cuda(config.cuda_num))
        loss.backward()
        model.optimizer.step()
    if epoch % config.interval == config.interval-1:
        pred_total = []
        target_total = []
        for batch in valid_loader:
            pred_total.append(model(batch)['total_crowd_flow'].detach().cpu().numpy())
            target_total.append(batch['flow'].numpy())
        pred_total = np.concatenate(pred_total, axis=0)
        target_total = np.concatenate(target_total, axis=0)
        MAE = metrics.get_MAE(pred_total, target_total) * flow_max
        RMSE = metrics.get_RMSE(pred_total, target_total) * flow_max
        NRMSE = metrics.get_NRMSE(pred_total, target_total)
        print('Epoch={0}\tMAE=\t{1:.4f}\tRMSE=\t{2:.4f}\tNRMSE=\t{3:.4f}'.format(epoch, MAE, RMSE, NRMSE))