In [3]:
import copy
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Lasso, SGDRegressor
from sklearn.neural_network import MLPRegressor

# from nnpred import nn_train, nn_pred, nn_finetune
from nnpred import *

# from statsmodels.tsa.arima.model import ARIMA
from pmdarima.arima import auto_arima



from utils import  MAPE, SMAPE, RMSE, MAE

### read data

In [4]:
city_list =['Perth', 'HK', 'PALO', 'DUNDEE', 'BOULDER','Auchterarder','Kinross','Pitlochry','Crieff','Aberfeldy','Dunkeld','Blairgowrie']
city_num = 12

data_list = []
file_list = [
    './Data/STATION/A1-PERTH.csv',
    './Data/STATION/A2-XIANGGANG.csv',
    './Data/STATION/A3-PALO.csv',
    './Data/STATION/A4-DUNDEE.csv',
    './Data/STATION/A5-BOULDER.csv',
    './Data/STATION/A6-Auchterarder.csv',
    './Data/STATION/A7-Kinross.csv',
    './Data/STATION/A8-Pitlochry.csv',
    './Data/STATION/A9-Crieff.csv',
    './Data/STATION/A10-Aberfeldy.csv',
    './Data/STATION/A11-Dunkeld.csv',
    './Data/STATION/A12-Blairgowrie.csv'

]

for file in file_list:

    data = pd.read_csv(file).values
    data_list.append(data[:,1:])
    

# prediction settings

his_len = 6
pred_len = 3

# models

# model_name = 'RF'
# model_name = 'MLP'
# model_name = 'LASSO'
# model_name = 'SGD'
model_name = 'LSTM'
# model_name = 'GRU'

In [5]:
for i in range(city_num):

    print(city_list[i], data_list[i].mean())

Perth 1.2238425925925926
HK 1.1412760416666667
PALO 0.7468253968253968
DUNDEE 1.0719832735961767
BOULDER 0.22690058479532163
Auchterarder 0.26666666666666666
Kinross 0.3506944444444444
Pitlochry 0.45092592592592595
Crieff 0.5555555555555556
Aberfeldy 0.5513888888888889
Dunkeld 0.24444444444444444
Blairgowrie 0.26944444444444443


### get train and test data

In [6]:
def get_train_test (data, mode, his_len = 12, pred_len = 3, train_rate = 0.8):

    train_num = int(data.shape[0] * train_rate)
    XS, YS = [], []
    if mode == 'train':
        for i in range(train_num - pred_len - his_len + 1):
            x = data[i:i + his_len, :]
            y = data[i + his_len:i + his_len + pred_len, :]
            XS.append(x), YS.append(y)
    elif mode == 'test':
        for i in range(train_num - his_len,
                       data.shape[0] - pred_len - his_len + 1):
            x = data[i:i + his_len, :]
            y = data[i + his_len:i + his_len + pred_len, :]
            XS.append(x), YS.append(y)

    XS, YS = np.array(XS), np.array(YS)
    XS, YS = np.squeeze(XS), np.squeeze(YS)
    
    return XS, YS

In [7]:
# train test split

train_list = []
test_list = []
scaler_list = []
sensor_num_list = []

for data in data_list:

    # data = np.reshape(data, (-1,1))
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    
    train_x, train_y = get_train_test (data, 'train', his_len, pred_len, train_rate = 0.8 )
    test_x, test_y = get_train_test (data, 'test', his_len, pred_len, train_rate = 0.8)

    sensor_num_list.append(data.shape[1])

    train_x = np.reshape(train_x, (-1, his_len))
    train_y = np.reshape(train_y, (-1, pred_len))
    test_x = np.reshape(test_x, (-1, his_len))
    test_y = np.reshape(test_y, (-1, pred_len))



    train_list.append([train_x, train_y])
    test_list.append([test_x, test_y])
    scaler_list.append(scaler)


In [8]:
train_list[0][0].shape

(3408, 6)

### training and testing

In [43]:

# source, target, metric
transfer_rec = np.zeros((city_num, city_num, 4))
wo_transfer_rec = np.zeros((city_num, 4))

for source_idx in range(city_num):
    # train on the source city
    # source_idx = 0
    print('-------------------------------')
    print('-------Source city: {:s}-------'.format(city_list[source_idx]))
    print('-------------------------------')

    source_x, source_y = train_list[source_idx]
    print(source_x.shape, source_y.shape)


    if model_name == 'RF':
        model = RandomForestRegressor(max_depth=2, random_state=0, warm_start=True)
        model.fit(source_x, source_y)
    elif model_name == 'MLP':
        model = MLPRegressor(random_state=0, warm_start=True)
        model.fit(source_x, source_y)
    elif model_name == 'LASSO':
        model = Lasso(random_state=0, warm_start=True)
        model.fit(source_x, source_y)
    elif model_name == 'SGD':
        model = SGDRegressor(random_state=0, warm_start=True)
        model.fit(source_x, np.mean(source_y, axis=1))
    elif model_name == 'LSTM' or model_name == 'GRU':
        model = nn_train(source_x, source_y, model_name, learning_rate=1e-2)

    else:
        pass

    # test on the source city
    test_x, test_y = test_list[source_idx]

    if model_name == 'RF' or model_name == 'MLP' or model_name == 'LASSO':
        pred_y = model.predict(test_x)
    elif model_name == 'SGD':
        pred_y = model.predict(test_x)
        pred_y = np.reshape(pred_y, (-1, 1))
        pred_y = pred_y.repeat(pred_len ,axis=1)
    elif model_name == 'LSTM' or model_name == 'GRU':
        pred_y = nn_pred(test_x, test_y, model)

    else:
        pass

    test_y = np.reshape(test_y, (-1, sensor_num_list[source_idx], pred_len))
    pred_y = np.reshape(pred_y, (-1, sensor_num_list[source_idx], pred_len))

    test_y = scaler_list[source_idx].inverse_transform(test_y.transpose(0,2,1))
    pred_y = scaler_list[source_idx].inverse_transform(pred_y.transpose(0,2,1))
    pred_y = np.around(pred_y.astype(np.float32))
    pred_y [pred_y < 0] = 0

    mape, smape, rmse, mae = MAPE(test_y, pred_y), SMAPE(test_y, pred_y), RMSE(test_y, pred_y), MAE(test_y, pred_y)

    print('Test MAPE1: SMAPE: RMSE:  MAE: | {:.4f} | {:.4f} | {:.4f} | {:.4f}'.format(mape, smape, rmse, mae))

    transfer_rec[source_idx, source_idx, 0] = mape
    transfer_rec[source_idx, source_idx, 1] = smape 
    transfer_rec[source_idx, source_idx, 2] = rmse 
    transfer_rec[source_idx, source_idx, 3] = mae 


    # transfer to other cities
    for target_idx in range(city_num):

        if target_idx == source_idx:
            continue
    # target_idx = 1
        print('-------Target city: {:s}--------'.format(city_list[target_idx]))


        ###################
        #transfer
        ###################
        source_model = copy.copy(model)

        target_x, target_y = train_list[target_idx]
        # target_x = target_x[-24+his_len:, ]
        # target_y = target_y[-24+his_len:, ]
        target_x = target_x[(-24+his_len)*sensor_num_list[target_idx]:, ]
        target_y = target_y[(-24+his_len)*sensor_num_list[target_idx]:, ]
        print(target_x.shape, target_y.shape)

        test_x, test_y = test_list[target_idx]
        # transfer train and test
        if model_name == 'RF' or model_name == 'MLP'  or model_name == 'LASSO':
            source_model.fit(target_x, target_y)
            pred_y = source_model.predict(test_x)

        elif model_name == 'SGD':
            source_model.fit(target_x, np.mean(target_y, axis=1))
            pred_y = source_model.predict(test_x)
            pred_y = np.reshape(pred_y, (-1, 1))
            pred_y = pred_y.repeat(pred_len ,axis=1)
        elif model_name == 'LSTM' or model_name == 'GRU':
            source_model = nn_finetune(test_x, test_y, source_model)
            pred_y = nn_pred(test_x, test_y, source_model)

        else:
            pass

        test_y = np.reshape(test_y, (-1, sensor_num_list[target_idx], pred_len))
        pred_y = np.reshape(pred_y, (-1, sensor_num_list[target_idx], pred_len))
        test_y = scaler_list[target_idx].inverse_transform(test_y.transpose(0,2,1))
        pred_y = scaler_list[target_idx].inverse_transform(pred_y.transpose(0,2,1))
        pred_y = np.around(pred_y.astype(np.float32))
        pred_y [pred_y < 0] = 0

        mape, smape, rmse, mae = MAPE(test_y, pred_y), SMAPE(test_y, pred_y), RMSE(test_y, pred_y), MAE(test_y, pred_y)

        print('Transfer Test MAPE1: SMAPE: RMSE:  MAE: | {:.4f} | {:.4f} | {:.4f} | {:.4f}'.format(mape, smape, rmse, mae))

        transfer_rec[source_idx, target_idx, 0] = mape
        transfer_rec[source_idx, target_idx, 1] = smape 
        transfer_rec[source_idx, target_idx, 2] = rmse 
        transfer_rec[source_idx, target_idx, 3] = mae 


        ###################
        # with out transfer
        ###################
        test_x, test_y = test_list[target_idx]

        if model_name == 'RF':
            wo_trans_model = RandomForestRegressor(max_depth=2, random_state=0)
            wo_trans_model.fit(target_x, target_y)
            pred_y = wo_trans_model.predict(test_x)

        elif model_name == 'MLP':
            wo_trans_model = MLPRegressor(random_state=0)
            wo_trans_model.fit(target_x, target_y)
            pred_y = wo_trans_model.predict(test_x)

        elif model_name == 'LASSO':
            wo_trans_model = MLPRegressor(random_state=0)
            wo_trans_model.fit(target_x, target_y)
            pred_y = wo_trans_model.predict(test_x)
        elif model_name == 'SGD':
            wo_trans_model = SGDRegressor(random_state=0)
            wo_trans_model.fit(target_x, np.mean(target_y, axis=1))
            pred_y = wo_trans_model.predict(test_x)
            pred_y = np.reshape(pred_y, (-1, 1))
            pred_y = pred_y.repeat(pred_len ,axis=1)
        elif model_name == 'LSTM' or model_name == 'GRU':
            wo_trans_model = nn_train(target_x, target_y, model_name, learning_rate=1e-2)
            pred_y = nn_pred(test_x, test_y, wo_trans_model)

        else:
            pass

        test_y = np.reshape(test_y, (-1, sensor_num_list[target_idx], pred_len))
        pred_y = np.reshape(pred_y, (-1, sensor_num_list[target_idx], pred_len))

        test_y = scaler_list[target_idx].inverse_transform(test_y.transpose(0,2,1))
        pred_y = scaler_list[target_idx].inverse_transform(pred_y.transpose(0,2,1))
        pred_y = np.around(pred_y.astype(np.float32))
        pred_y [pred_y < 0] = 0

        mape, smape, rmse, mae = MAPE(test_y, pred_y), SMAPE(test_y, pred_y), RMSE(test_y, pred_y), MAE(test_y, pred_y)

        print('Wo Transfer Test MAPE1: SMAPE: RMSE:  MAE: | {:.4f} | {:.4f} | {:.4f} | {:.4f}'.format(mape, smape, rmse, mae))

        wo_transfer_rec[target_idx, 0] = mape
        wo_transfer_rec[target_idx, 1] = smape 
        wo_transfer_rec[target_idx, 2] = rmse 
        wo_transfer_rec[target_idx, 3] = mae 

-------------------------------
-------Source city: Perth-------
-------------------------------
(3408, 6) (3408, 3)
Early stopping at epoch: 69
epoch 69 train loss: 0.5080679432924233 validation loss: 0.5376105904579163
Test MAPE1: SMAPE: RMSE:  MAE: | 0.2824 | 0.4091 | 0.8395 | 0.4811
-------Target city: HK--------
(180, 6) (180, 3)
Early stopping at epoch: 55
epoch 55 train loss: 0.8996727693150615 validation loss: 0.47997573018074036
Transfer Test MAPE1: SMAPE: RMSE:  MAE: | 0.2075 | 0.1908 | 0.8220 | 0.5421
Early stopping at epoch: 24
epoch 24 train loss: 0.8132673501968384 validation loss: 0.9421013593673706
Wo Transfer Test MAPE1: SMAPE: RMSE:  MAE: | 0.3196 | 0.4577 | 0.7618 | 0.5827
-------Target city: PALO--------
(126, 6) (126, 3)
Early stopping at epoch: 23
epoch 23 train loss: 0.8119732737541199 validation loss: 1.0458014011383057
Transfer Test MAPE1: SMAPE: RMSE:  MAE: | 0.5027 | 0.8253 | 1.0028 | 0.7739
Early stopping at epoch: 45
epoch 45 train loss: 0.6855440735816956 

### write result to file

In [44]:
metric_list = ['mape', 'smape', 'rmse', 'mae']

for i in range(len(metric_list)):

    save_df = pd.DataFrame(transfer_rec[:,:,i], index = city_list, columns= city_list)
    save_df.index.name = 'source \ target'
    save_df.loc['wo_trans'] = wo_transfer_rec[:,i]

    save_df.to_csv('res/station/' + model_name + '_pred_' + str(pred_len) + '_' + metric_list[i] + '.csv')