In [None]:
#coding=utf-8
from __future__ import print_function
import setproctitle
setproctitle.setproctitle('BeijingFlow@shaoerzhuo')

import os, tqdm, torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from utils.datasets import DFGDataset, collate_fn
import utils.metrics as metrics
from model.correlation import get_CORR_numpy, get_CORR_numpy2, get_CORR_torch, get_CORR_torch2

class config:
    cuda_num = 0
    cityname = 'shanghai'
    io = 'outflow'
    time_slice = 48
    epoch_num = 10000
    batch_size = 2**12
    interval = 50
print(config.__dict__)

checkin_cate = 14
poi_cate = 14

dataset_config = {
    'cityname' : config.cityname,
    'dataset_path' : os.path.join('/data2/shaoerzhuo/DeepFlowGen/Dataset', config.cityname, 'dataset'),
    'i/o' : config.io,
    'max_value' : {'beijing':{'inflow':1587.8500, 'outflow':1929.5500}, 'shanghai':{'inflow':8179.6667, 'outflow':9417.3333}}[config.cityname][config.io],
    'poi_cate' : poi_cate,
    'beta' : 1
}

flow_max = {'shanghai':{'inflow':8179.6667, 'outflow':9417.3333}, 'beijing':{'inflow':1583.9167, 'outflow':1925.8333}}[config.cityname][config.io]

train_dataset = DFGDataset(dataset_config, 'train')
valid_dataset = DFGDataset(dataset_config, 'valid')
test_dataset = DFGDataset(dataset_config, 'test')

train_loader = DataLoader(train_dataset, num_workers=10, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, num_workers=10, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, num_workers=10, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
poi = train_loader.dataset.poi[:, :14]
for iter in range(14):
    poi[:,iter] /= poi[:,iter].sum()
flow = train_loader.dataset.flow
checkin = train_loader.dataset.checkin

In [None]:
from sklearn.decomposition import NMF
from copy import deepcopy
nmf = NMF(n_components=14, max_iter=1000)
v = nmf.fit_transform(flow.T).T
u = deepcopy(nmf.components_).T

p = nmf.fit_transform(poi.T).T
u_new = nmf.components_.T

In [None]:
flow_pred = np.matmul(u_new, v)
flow_pred *= (flow.mean() / flow_pred.mean())

iacf = (u[:,:,np.newaxis] * v[np.newaxis]).transpose([0,2,1])
checkin_pred = np.zeros([poi.shape[0], 48, 14])

In [None]:
from sklearn.linear_model import LinearRegression
for t in range(48):
    lr = LinearRegression().fit(iacf[:, t], checkin[:, t])
    checkin_pred[:, t] = lr.predict(iacf[:, t])
    
print(metrics.get_RMSE(flow_pred, flow) * dataset_config['max_value'])
print(metrics.get_NRMSE(flow_pred, flow))
print(get_CORR_numpy2(checkin_pred, checkin))