In [2]:
import pandas as pd
import torch
import torch.utils.data as utils
import torch.nn.functional as F
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import math
import numpy as np
import pandas as pd
import time
import visdom
import pickle
from sklearn import neighbors
from sklearn import linear_model
from sklearn import svm
from sklearn import multioutput
from sklearn import neural_network
from sklearn import tree
from sklearn import ensemble
from sklearn.cluster import KMeans
from tqdm import tqdm, tqdm_notebook
from pathlib import Path
import csv

from numba import njit, prange, jit

import multiprocessing
from joblib import Parallel, delayed

from fastprogress import master_bar, progress_bar
import matplotlib.pyplot as plt

In [2]:
class Novel_Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, active_col, global_mean, base="GRU", filtered=True, ratio_cal=False):
        super(Novel_Model,self).__init__()
        
        active_size = len(active_col)
        decayed_size = input_size - active_size
        decayed_col = [x for x in range(inputs.shape[-1]) if x not in active_col]
        
        self.hidden_size = hidden_size
        self.base = base
        self.filtered = filtered
        self.ratio_cal = ratio_cal
        
        if base=="GRU":
            self.gru = nn.GRUCell(input_size + input_size, hidden_size, bias = True)
        elif base=="LSTM":
            self.lstm = nn.LSTMCell(input_size + input_size, hidden_size, bias = True)
            
        if (input_size == len(active_col)) or (len(active_col)==0):
            self.is_together = True
        else:
            self.is_together = False
        
        self.linear = nn.Linear(hidden_size,output_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        
        if self.is_together:
            self.gamma_x_l = nn.Linear(input_size, input_size)
            self.ratio1 = nn.Linear(input_size + input_size, input_size)
            self.ratio2 = nn.Linear(input_size, input_size)
            self.gamma_x_weight = Variable(torch.ones(input_size))     
            self.gamma_x_sig = nn.Linear(input_size, input_size)
            global_mean = np.array(global_mean)        
            self.global_mean = Variable(torch.tensor(global_mean).type(torch.FloatTensor))
        else:           
            self.gamma_x_l_a = nn.Linear(active_size, active_size)
            self.gamma_x_l_d = nn.Linear(decayed_size, decayed_size)
            self.gamma_x_sig = nn.Linear(active_size, active_size)
            self.gamma_x_weight = Variable(torch.ones(active_size))        
            self.zeros_d = Variable(torch.zeros(decayed_size))
            global_mean = np.array(global_mean)        
            self.global_mean = Variable(torch.tensor(global_mean[decayed_col]).type(torch.FloatTensor))
        
        self.gamma_h_l = nn.Linear(hidden_size, hidden_size)        
            
        self.zeros = Variable(torch.zeros(input_size))
        
        if self.filtered:
            self.cong_filter = nn.Linear(hidden_size, hidden_size)
            self.free_filter = nn.Linear(hidden_size, hidden_size)
        self.filter = nn.Linear(hidden_size, hidden_size)
        
        
        self.active_col = active_col
        
    def init_hidden(self,batch_size):
        hidden = Variable(torch.ones(batch_size,self.hidden_size))
        if self.base=="GRU":
            return hidden
        elif self.base == "LSTM":
            cell = Variable(torch.zeros(batch_size,self.hidden_size))
            return hidden, cell
        
    

    
    def forward(self,inputs):        
        step_size = inputs.size(2)
        
        active_col = self.active_col
        decayed_col = [x for x in range(inputs.shape[-1]) if x not in active_col]
        
        X = torch.squeeze(inputs[:,0,:,:], dim = 1)       
        X_last_obsv = torch.squeeze(inputs[:,1,:,:], dim = 1)        
        Mask = torch.squeeze(inputs[:,2,:,:], dim = 1)
        Delta = torch.squeeze(inputs[:,3,:,:], dim = 1)
        x_imputed = torch.squeeze(inputs[:,4,:,:], dim = 1)
        
        if self.is_together:
            pass
        else:
            im_decayed = x_imputed[:,:,decayed_col]
            im_active = x_imputed[:,:,active_col]

            in_decayed = X[:,:,decayed_col]
            in_active = X[:,:,active_col]

            lo_decayed = X_last_obsv[:,:,decayed_col]
            lo_active = X_last_obsv[:,:,active_col]

            ma_decayed = Mask[:,:,decayed_col]
            ma_active = Mask[:,:,active_col]

            dt_decayed = Delta[:,:,decayed_col]
            dt_active = Delta[:,:,active_col]
        
        if self.filtered:
            h_filter = torch.squeeze(inputs[:,5,:,:], dim = 1)        
        
        
        if self.base=="GRU":
            hidden= self.init_hidden(inputs.size(0))        
        elif self.base=="LSTM":
            (hidden, cell) = self.init_hidden(inputs.size(0))
        
        for i in range(step_size):
            
            if self.is_together:
                x = torch.squeeze(X[:,i:i+1,:], dim = 1)
                last_ob = torch.squeeze(X_last_obsv[:,i:i+1,:], dim = 1)
                mask = torch.squeeze(Mask[:,i:i+1,:], dim = 1)
                delta = torch.squeeze(Delta[:,i:i+1,:], dim = 1)
                x_imp = torch.squeeze(x_imputed[:,i:i+1,:], dim = 1) 

                delta_sig = torch.sigmoid(self.gamma_x_sig(self.gamma_x_weight))
                delta_x = torch.exp(-torch.max(self.zeros, self.gamma_x_l(delta)))             
                delta_h = torch.exp(-torch.max(self.zeros, self.gamma_h_l(delta)))         

                x_a = (delta_sig * last_ob) + ((1-delta_sig) * x_imp)
                x_d = mask * x + (1-mask) * (delta_x * last_ob + (1-delta_x) * self.global_mean)                
                x_before = torch.cat((x_a, x_d),1)
                x1 = self.ratio1(x_before)
                x = self.ratio2(x1)            
                
                if self.filtered:
                    h_filter_cong = torch.squeeze(h_filter[:,i:i+1,:], dim=1)
                    h_filter_free = torch.ones(h_filter_cong.size())-h_filter_cong

                x_input = torch.cat((x, mask),1)
            else:

                in_d = torch.squeeze(in_decayed[:,i:i+1,:], dim = 1)
                in_a = torch.squeeze(in_active[:,i:i+1,:], dim = 1)

                lo_d = torch.squeeze(lo_decayed[:,i:i+1,:], dim = 1)
                lo_a = torch.squeeze(lo_active[:,i:i+1,:], dim = 1)

                ma_d = torch.squeeze(ma_decayed[:,i:i+1,:], dim = 1)
                ma_a = torch.squeeze(ma_active[:,i:i+1,:], dim = 1)

                dt_d = torch.squeeze(dt_decayed[:,i:i+1,:], dim = 1)
                dt_a = torch.squeeze(dt_active[:,i:i+1,:], dim = 1)
                dt = torch.cat((dt_a, dt_d), 1)

                im_a = torch.squeeze(im_active[:,i:i+1,:], dim = 1) 

                if self.filtered:
                    h_filter_cong = torch.squeeze(h_filter[:,i:i+1,:], dim=1)
                    h_filter_free = torch.ones(h_filter_cong.size())-h_filter_cong

                delta_sig = torch.sigmoid(self.gamma_x_sig(self.gamma_x_weight))
                delta_xa = torch.sigmoid(self.gamma_x_l_a(self.gamma_x_weight))
                delta_xd = torch.exp(-torch.max(self.zeros_d, self.gamma_x_l_d(dt_d)))
                delta_h = torch.exp(-torch.max(self.zeros, self.gamma_h_l(dt)))
                
                x_a = ma_a * in_a + (1-ma_a) * (delta_xa * lo_a + (1-delta_xa) * im_a)
                x_d = ma_d * in_d + (1-ma_d) * (delta_xd * lo_d + (1-delta_xd) * self.global_mean)
                in_concat = torch.cat((x_a,x_d), 1)
                ma_concat = torch.cat((ma_a, ma_d), 1)
                
                x_input = torch.cat((in_concat, ma_concat),1)
            
            hidden = delta_h * hidden
            
            if self.base=="GRU":
                hidden = self.gru(x_input, hidden)
            if self.base=="LSTM":
                (hidden, cell)= self.lstm(x_input, (hidden,cell))
                
            if self.filtered:            
                adj_hidden_cong = self.cong_filter(hidden)
                adj_hidden_free = self.free_filter(hidden)
                hidden = adj_hidden_cong * h_filter_cong + adj_hidden_free * h_filter_free
            
            hidden = self.filter(hidden)
        
        output = self.linear(hidden)
        
        if self.ratio_cal:
            add = (output[:,0]*output[:,2])+(output[:,1]*(1-output[:,2]))
            add_col = add.unsqueeze(1)
            output_add = torch.cat((output, add_col),1)
            return output_add
        else:
            return output

In [24]:
class PrepareDataset_renew:
    
    def __init__(self, 
                 low_speed_matrix, high_speed_matrix, mean_speed_matrix,
                 low_count_matrix, high_count_matrix, all_count_matrix,                 
                 input_links, label_link,
                 thresholds,
                 train_proportion = 0.6, valid_proportion = 0.2, time_unit = 3, seed = 1024,
                 batch = 1000, seq_length = 7
                ):
        
        
        self.input_links = input_links
        self.label_link = label_link
        self.input_links.append(self.label_link)
        self.time_unit = time_unit
        
        empty_df = pd.DataFrame(index = all_count_matrix.index, columns = all_count_matrix.columns)[self.input_links]
                
        self.seed = seed
        self.train_proportion = train_proportion
        self.valid_proportion = valid_proportion
        self.batch = batch
        self.seq_length = seq_length
        
        self.low_speed = empty_df.copy()
        self.low_speed.update(low_speed_matrix)
        
        self.high_speed = empty_df.copy()
        self.high_speed.update(high_speed_matrix)
        
        self.mean_speed = empty_df.copy()
        self.mean_speed.update(mean_speed_matrix)
        
        self.low_count = empty_df.copy()
        self.low_count.update(low_count_matrix)
        
        self.high_count = empty_df.copy()
        self.high_count.update(high_count_matrix)
        
        self.all_count = empty_df.copy()
        self.all_count.update(all_count_matrix)
        
        self.low_ratio = self.low_count.fillna(0)/self.all_count
        self.label_nan_index = self.mean_speed[self.mean_speed[self.label_link].isnull()].index
        
        self.threshold = thresholds.loc[self.input_links, "threshold"].to_numpy()
        
    def delta_time(self, inputs):
        result = inputs.copy()
        result = result.isnull().astype(int)
        result.iloc[0,:]=0
        for i in tqdm_notebook(range(1, result.shape[0]), desc="Delta Time"):
            delta = (result.index[i]-result.index[i-1]).total_seconds()/60    
            result.iloc[i,:] = result.iloc[i,:]*(delta + result.iloc[i-1,:])            
        return result
    
    def data_cut(self, inputs, name="Data Cut"):
        #input: Pandas DF
        cutted = np.zeros([inputs.shape[0]-self.seq_length+1, self.seq_length, inputs.shape[1]])
        for i in tqdm_notebook(range(cutted.shape[0]), desc=name):
            cutted[i] = inputs.iloc[i:i+self.seq_length]
        return cutted
    
    def random_forest(self,inputs):
        fit_input = self.data_cut(inputs)
        fit_size = fit_input.shape[1]*fit_input.shape[2]
        fit_input = fit_input.reshape(-1, fit_size)
        fit_df = pd.DataFrame(fit_input)
        fit_isnan = np.isnan(fit_input)
        fit_isnan_uni = np.unique(fit_isnan, axis=0)
        init = np.zeros((fit_input.shape[1],)).astype(bool)
        address_init = np.where((fit_isnan==init).all(axis=1))
        if address_init[0].shape[0]>10000:   
            fit_init = fit_input[address_init[0]]
        else:
            fit_init = fit_input[address_init]
        pbar = tqdm_notebook(range(fit_isnan_uni.shape[0]))
        pbar.set_description("random forest ")
        for isnan in fit_isnan_uni:
            if (isnan==init).all():
                pass
            elif isnan.sum()==fit_size:
                pass            
            else:           
                address = np.where((fit_isnan==isnan).all(axis=1))[0]                
                fit_X = fit_init[:,np.where(1-isnan)[0]]
                fit_Y = fit_init[:,np.where(isnan)[0]]
                if fit_Y.shape[0]==fit_Y.size:
                    fit_Y = fit_Y.ravel()
                regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=-1) 
                index = np.random.choice(fit_X.shape[0], int(fit_X.shape[0]*0.4))
                regressor.fit(fit_X[index], fit_Y[index])
                pred_X = fit_input[:, np.where(1-isnan)[0]][address]
                pred_Y = regressor.predict(pred_X)
                if pred_Y.shape[0]==pred_Y.size:
                    pred_Y = pred_Y.reshape(pred_Y.size,1)
                fit_df.loc[address, isnan]=pred_Y
            pbar.update()        
        
        result_1 = pd.DataFrame(index = inputs.index, columns = fit_df.columns)
        result_2 = pd.DataFrame(index = inputs.index, columns = inputs.columns)
        for i in range(self.seq_length):
            result_1.iloc[i:i+fit_df.shape[0],i*inputs.shape[1]:(i+1)*inputs.shape[1]] = fit_df.iloc[:,i*inputs.shape[1]:(i+1)*inputs.shape[1]].to_numpy()

        for j in range(result_2.columns.shape[0]):    
            result_2.iloc[:,j] = result_1.iloc[:,list(range(j,result_2.columns.shape[0]*self.seq_length,result_2.columns.shape[0]))].mean(axis=1).to_numpy()        
        
        return result_2
    
    def moving_average(self,inputs):
        result = inputs.copy()
        for j in range(result.shape[1]):
            for i in range(self.seq_length):
                if pd.isnull(result.iloc[i+self.seq_length,j]):
                    result.iloc[i,j] = result.iloc[:i,j].mean()
                
                
        for j in range(result.shape[1]):
            for i in range(result.shape[0]-self.seq_length):                
                if pd.isnull(result.iloc[i+self.seq_length,j]):
                    result.iloc[i+self.seq_length,j] = result.iloc[i:i+self.seq_length,j].mean()
                    
        return result
    
    def last_observ(self, inputs):
        result = inputs.copy()
        result.iloc[0,:] = result.iloc[0,:].fillna(0)
        for i in tqdm_notebook(range(1, result.shape[0]), desc="Last Observed"):            
            result.iloc[i,:] = result.iloc[i-1,:] * result.iloc[i,:].isnull() + result.iloc[i,:].fillna(0)            
        return result

    def estm_prepare_matrix(self,inputs):
        fit_input = self.data_cut(inputs, "Estimation Prepare")
        size_1d = fit_input.shape[1]
        size_2d = fit_input.shape[2]
        fit_size = size_1d * size_2d
        fit_input = fit_input.reshape(-1, fit_size)
        fit_Y = fit_input[:,-1]
        fit_X = fit_input[:,0:-1]
        regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=-1)                
        index = np.random.choice(fit_X.shape[0], int(fit_X.shape[0]*0.6))
        regressor.fit(fit_X[index], fit_Y[index])
        new_label = regressor.predict(fit_X)
        new_input = np.concatenate((fit_X, new_label.reshape(-1,1)),axis=1)
        new_input = new_input.reshape(-1, size_1d, size_2d)
        print("Random Forest MAE: ", np.round(np.abs(new_label-fit_Y).mean(),5))
        
        return new_input, fit_Y
    
    
    def fore_prepare_matrix(self, inputs):
        fit_input = self.data_cut(inputs, "Forecast Prepare")
        size_1d = fit_input.shape[1]
        size_2d = fit_input.shape[2]
        fit_size = size_1d * size_2d
        fit_input = fit_input.reshape(-1, fit_size)
        fit_Y = fit_input[1:,-1]
        fit_X = fit_input[:-1,:]         
        new_input = fit_X.reshape(-1,size_1d, size_2d)        
        return new_input, fit_Y
    
    def pre_prepare(self, low_spd_rf=None, hig_spd_rf=None, low_rto_rf=None, all_spd_rf=None, result=False, max_speed=120):
        self.low_spd = self.low_speed.drop(self.label_nan_index)[self.input_links]
        self.hig_spd = self.high_speed.drop(self.label_nan_index)[self.input_links]
        self.low_rto = self.low_ratio.drop(self.label_nan_index)[self.input_links]
        self.all_spd = self.mean_speed.drop(self.label_nan_index)[self.input_links]
        
        self.low_spd_im = pd.DataFrame(index = self.all_spd.index, columns = self.all_spd.columns)
        self.hig_spd_im = pd.DataFrame(index = self.all_spd.index, columns = self.all_spd.columns)
        self.low_rto_im = pd.DataFrame(index = self.all_spd.index, columns = self.all_spd.columns)
        self.all_spd_im = pd.DataFrame(index = self.all_spd.index, columns = self.all_spd.columns)
        
        if low_spd_rf is None:
            df = self.random_forest(self.low_spd)
            df = self.moving_average(df)
            self.low_spd_im.update(df)
        else:
            self.low_spd_im.update(low_spd_rf[self.input_links])
            
        if hig_spd_rf is None:
            df = self.random_forest(self.hig_spd)
            df = self.moving_average(df)
            self.low_spd_im.update(df)
        else:
            self.hig_spd_im.update(hig_spd_rf[self.input_links])
            
        if low_rto_rf is None:
            df = self.random_forest(self.low_rto)
            df = self.moving_average(df)
            self.low_spd_im.update(df)
        else:
            self.low_rto_im.update(low_rto_rf[self.input_links])
            
        if all_spd_rf is None:
            df = self.random_forest(self.all_spd)
            df = self.moving_average(df)
            self.low_spd_im.update(df)
        else:
            self.all_spd_im.update(all_spd_rf[self.input_links])
            
        self.low_spd_ma = (~self.low_spd.isnull()).astype(int)
        self.hig_spd_ma = (~self.hig_spd.isnull()).astype(int)
        self.low_rto_ma = (~self.low_rto.isnull()).astype(int)
        self.all_spd_ma = (~self.all_spd.isnull()).astype(int)
        
        self.filter = (self.all_spd_im>=self.threshold).astype(int)
        
        self.concat_df = pd.concat((self.low_spd/max_speed, self.hig_spd/max_speed, self.low_rto, self.all_spd/max_speed), axis=1)
        self.concat_ma = (~self.concat_df.isnull()).astype(int)
        self.concat_lo = self.last_observ(self.concat_df)
        self.concat_dt = self.delta_time(self.concat_df)     
        self.concat_im = pd.concat((self.low_spd_im/max_speed, self.hig_spd_im/max_speed, self.low_rto_im, self.all_spd_im/max_speed), axis=1)
        self.concat_ft = pd.concat((self.filter, self.filter, self.filter, self.filter), axis = 1)
        
        
        
        self.low_spd_im_st = self.low_spd_im/max_speed
        self.hig_spd_im_st = self.hig_spd_im/max_speed        
        self.all_spd_im_st = self.all_spd_im/max_speed
        
        each = int(self.concat_df.shape[1]/4)
        
        self.low_spd_lo = self.concat_lo.iloc[:,0*each:1*each]
        self.hig_spd_lo = self.concat_lo.iloc[:,1*each:2*each]
        self.low_rto_lo = self.concat_lo.iloc[:,2*each:3*each]
        self.all_spd_lo = self.concat_lo.iloc[:,3*each:4*each]        
        
        self.low_spd_dt = self.concat_dt.iloc[:,0*each:1*each]
        self.hig_spd_dt = self.concat_dt.iloc[:,1*each:2*each]
        self.low_rto_dt = self.concat_dt.iloc[:,2*each:3*each]
        self.all_spd_dt = self.concat_dt.iloc[:,3*each:4*each]
        
        self.global_mean = self.concat_df.mean()
        
    def make_dataloader_estm(self):
        self.data_input = np.nan_to_num(self.data_cut(self.concat_df, "Data Input"))
        self.mask_input = self.data_cut(self.concat_ma, "Mask Input")
        self.laob_input = self.data_cut(self.concat_lo, "Last Observation")
        self.delt_input = self.data_cut(self.concat_dt, "Delta Time")      
        self.filt_input = self.data_cut(self.concat_ft, "Filter")
        
        
        #label data input에서 제거
        '''
        self.data_input[:, self.data_input.shape[1]-1, self.data_input.shape[2]-1] = 0
        self.mask_input[:, self.data_input.shape[1]-1, self.data_input.shape[2]-1] = 0
        self.laob_input[:, self.data_input.shape[1]-1, self.data_input.shape[2]-1] = self.laob_input[:, self.data_input.shape[1]-2, self.data_input.shape[2]-1]
        self.delt_input[:, self.data_input.shape[1]-1, self.data_input.shape[2]-1] = self.delt_input[:, self.data_input.shape[1]-2, self.data_input.shape[2]-1]+self.time_unit        
        '''
        #2,5,8,11
        indexing = len(self.input_links)
        index_list = [indexing-1, 2*indexing-1, 3*indexing-1, 4*indexing-1]
        
        self.data_input[:, self.data_input.shape[1]-1, index_list] = 0
        self.mask_input[:, self.data_input.shape[1]-1, index_list] = 0
        self.laob_input[:, self.data_input.shape[1]-1, index_list] = self.laob_input[:, self.data_input.shape[1]-2, index_list]
        self.delt_input[:, self.data_input.shape[1]-1, index_list] = self.delt_input[:, self.data_input.shape[1]-2, index_list]+self.time_unit        
        
        self.impu_spd_low, self.spdlw_label = self.estm_prepare_matrix(self.low_spd_im_st)
        self.impu_spd_hig, self.spdhi_label = self.estm_prepare_matrix(self.hig_spd_im_st)
        self.impu_low_rto, self.lowrt_label = self.estm_prepare_matrix(self.low_rto_im)
        self.impu_spd_all, self.speed_label = self.estm_prepare_matrix(self.all_spd_im_st)
        
        self.impu_input = np.concatenate((self.impu_spd_low, self.impu_spd_hig, self.impu_low_rto, self.impu_spd_all), axis=2)       
        
        
        sample_size = self.data_input.shape[0]
        train_index = int(np.floor(sample_size * self.train_proportion))
        valid_index = int(np.floor(sample_size * (self.train_proportion + self.valid_proportion)))
        
        
        train_i = self.concat_df.index[self.seq_length-1:train_index+self.seq_length-1]
        valid_i = self.concat_df.index[train_index+self.seq_length-1:valid_index+self.seq_length-1]
        test_i = self.concat_df.index[valid_index+self.seq_length-1:]
        
        train_size = int(np.floor(train_i.size/self.batch)*self.batch)
        valid_size = int(np.floor(valid_i.size/self.batch)*self.batch)
        test_size = int(np.floor((test_i.size-self.seq_length+1)/self.batch)*self.batch)
        
        train_i = train_i[:train_size]
        valid_i = valid_i[:valid_size]
        test_i = test_i[:test_size]
        
        data_branch = np.expand_dims(self.data_input, axis = 1)
        mask_branch = np.expand_dims(self.mask_input, axis = 1)
        laob_branch = np.expand_dims(self.laob_input, axis = 1)
        delt_branch = np.expand_dims(self.delt_input, axis = 1)
        impu_branch = np.expand_dims(self.impu_input, axis = 1)
        filt_branch = np.expand_dims(self.filt_input, axis = 1)
        
        total_dataset = np.concatenate((data_branch, mask_branch, laob_branch, delt_branch, impu_branch, filt_branch), axis=1)
        
        train_data, train_label = total_dataset[:train_index], self.speed_label[:train_index]
        valid_data, valid_label = total_dataset[train_index:valid_index], self.speed_label[train_index:valid_index]
        test_data, test_label = total_dataset[valid_index:], self.speed_label[valid_index:]

        train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
        valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
        test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)

        train_dataset = utils.TensorDataset(train_data, train_label)
        valid_dataset = utils.TensorDataset(valid_data, valid_label)
        test_dataset = utils.TensorDataset(test_data, test_label)

        train_dataloader = utils.DataLoader(train_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        valid_dataloader = utils.DataLoader(valid_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        test_dataloader = utils.DataLoader(test_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        
        return train_dataloader, valid_dataloader, test_dataloader, train_i, valid_i, test_i
        
    
    def make_dataloader_fore(self):
        self.data_input = np.nan_to_num(self.data_cut(self.concat_df, "Data Input"))[:-1]
        self.mask_input = self.data_cut(self.concat_ma, "Mask Input")[:-1]
        self.laob_input = self.data_cut(self.concat_lo, "Last Observation")[:-1]
        self.delt_input = self.data_cut(self.concat_dt, "Delta Time")[:-1]
        self.filt_input = self.data_cut(self.concat_ft, "Filter")[:-1]
        
        self.impu_spd_low, self.spdlw_label = self.fore_prepare_matrix(self.low_spd_im_st)
        self.impu_spd_hig, self.spdhi_label = self.fore_prepare_matrix(self.hig_spd_im_st)
        self.impu_low_rto, self.lowrt_label = self.fore_prepare_matrix(self.low_rto_im)
        self.impu_spd_all, self.speed_label = self.fore_prepare_matrix(self.all_spd_im_st)
        
        self.impu_input = np.concatenate((self.impu_spd_low, self.impu_spd_hig, self.impu_low_rto, self.impu_spd_all), axis=2)
        
        self.global_mean = self.concat_df.mean()
        
        sample_size = self.data_input.shape[0]
        train_index = int(np.floor(sample_size * self.train_proportion))
        valid_index = int(np.floor(sample_size * (self.train_proportion + self.valid_proportion)))
        
        train_i = self.concat_df.index[:-1][self.seq_length-1:train_index+self.seq_length-1]
        valid_i = self.concat_df.index[:-1][train_index+self.seq_length-1:valid_index+self.seq_length-1]
        test_i = self.concat_df.index[:-1][valid_index+self.seq_length-1:]
        
        train_size = int(np.floor(train_i.size/self.batch)*self.batch)
        valid_size = int(np.floor(valid_i.size/self.batch)*self.batch)
        test_size = int(np.floor((test_i.size-self.seq_length+1)/self.batch)*self.batch)
        
        train_i = train_i[:train_size]
        valid_i = valid_i[:valid_size]
        test_i = test_i[:test_size]
        
        data_branch = np.expand_dims(self.data_input, axis = 1)
        mask_branch = np.expand_dims(self.mask_input, axis = 1)
        laob_branch = np.expand_dims(self.laob_input, axis = 1)
        delt_branch = np.expand_dims(self.delt_input, axis = 1)
        impu_branch = np.expand_dims(self.impu_input, axis = 1)
        filt_branch = np.expand_dims(self.filt_input, axis = 1)
        
        total_dataset = np.concatenate((data_branch, mask_branch, laob_branch, delt_branch, impu_branch, filt_branch), axis=1)
        
        train_data, train_label = total_dataset[:train_index], self.speed_label[:train_index]
        valid_data, valid_label = total_dataset[train_index:valid_index], self.speed_label[train_index:valid_index]
        test_data, test_label = total_dataset[valid_index:], self.speed_label[valid_index:]

        train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
        valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
        test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)

        train_dataset = utils.TensorDataset(train_data, train_label)
        valid_dataset = utils.TensorDataset(valid_data, valid_label)
        test_dataset = utils.TensorDataset(test_data, test_label)

        train_dataloader = utils.DataLoader(train_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        valid_dataloader = utils.DataLoader(valid_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        test_dataloader = utils.DataLoader(test_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        
        return train_dataloader, valid_dataloader, test_dataloader, train_i, valid_i, test_i
    
    
   
    def one_step_prepare_matrix(self, inputs):
        new_input = inputs.loc[:,self.label_link].iloc[self.seq_length-1:,:-1].to_numpy().astype(float)
        new_label = inputs.loc[:,self.label_link].iloc[self.seq_length-1:,-1].to_numpy().astype(float)
        
        return new_input, new_label
    
    def make_dataloader_one_step(self):
        
        total_dataset, self.speed_label = self.one_step_prepare_matrix(self.concat_im)
        
        sample_size = total_dataset.shape[0]
        train_index = int(np.floor(sample_size * self.train_proportion))
        valid_index = int(np.floor(sample_size * (self.train_proportion + self.valid_proportion)))
        
        train_data, train_label = total_dataset[:train_index], self.speed_label[:train_index]
        valid_data, valid_label = total_dataset[train_index:valid_index], self.speed_label[train_index:valid_index]
        test_data, test_label = total_dataset[valid_index:], self.speed_label[valid_index:]
        
        train_i = self.concat_im.index[:train_index]
        valid_i = self.concat_im.index[train_index:valid_index]
        test_i = self.concat_im.index[:valid_index:]
        
        train_size = int(np.floor(train_i.size/self.batch)*self.batch)
        valid_size = int(np.floor(valid_i.size/self.batch)*self.batch)
        test_size = int(np.floor(test_i.size/self.batch)*self.batch)
        
        train_i = train_i[:train_size]
        valid_i = valid_i[:valid_size]
        test_i = test_i[:test_size]

        train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
        valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
        test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)

        train_dataset = utils.TensorDataset(train_data, train_label)
        valid_dataset = utils.TensorDataset(valid_data, valid_label)
        test_dataset = utils.TensorDataset(test_data, test_label)

        train_dataloader = utils.DataLoader(train_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        valid_dataloader = utils.DataLoader(valid_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        test_dataloader = utils.DataLoader(test_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        
        return train_dataloader, valid_dataloader, test_dataloader, train_i, valid_i, test_i
    
    def two_step_prepare_matrix(self,inputs):
        inputs = inputs.iloc[:,:-len(self.input_links)]
        fit_input = self.data_cut(inputs, "Two Step Prepare")        
        size_1d = fit_input.shape[1]
        size_2d = fit_input.shape[2]
        fit_size = size_1d * size_2d
        fit_input = fit_input.reshape(-1, fit_size)
        Y_index = [fit_size-int(size_2d/3)*2-1, fit_size-int(size_2d/3)-1, fit_size-1]
        X_index = [x for x in range(fit_size) if x not in Y_index]        
        fit_Y = fit_input[:,Y_index]
        fit_X = fit_input[:,X_index]
        regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=-1)                
        index = np.random.choice(fit_X.shape[0], int(fit_X.shape[0]*0.6))
        regressor.fit(fit_X[index], fit_Y[index])
        new_label = regressor.predict(fit_X)
        new_input = fit_input.copy()
        new_input[:,Y_index] = new_label        
        #new_input = np.concatenate((fit_X, new_label.reshape(-1,3),axis=1)        
        new_input = new_input.reshape(-1, size_1d, size_2d)
        print("Random Forest MAE: ", np.round(np.abs(new_label-fit_Y).mean(),5))
        
        return new_input, fit_Y
    
    def make_dataloader_two_step(self):
        self.data_twost = np.nan_to_num(self.data_cut(self.concat_df.iloc[:,:-len(self.input_links)], "Data Input"))
        self.mask_twost = self.data_cut(self.concat_ma.iloc[:,:-len(self.input_links)], "Mask Input")
        self.laob_twost = self.data_cut(self.concat_lo.iloc[:,:-len(self.input_links)], "Last Observation")
        self.delt_twost = self.data_cut(self.concat_dt.iloc[:,:-len(self.input_links)], "Delta Time")
        self.filt_twost = self.data_cut(self.concat_ft.iloc[:,:-len(self.input_links)], "Filter")
        
        self.impu_twost, self.concat_labels = self.two_step_prepare_matrix(self.concat_im)
        self.mean_twost = self.concat_df.iloc[:,:-len(self.input_links)].mean()
        
        #2,5,8
        indexing = len(self.input_links)
        index_list = [indexing-1, 2*indexing-1, 3*indexing-1]
        
        self.data_twost[:, self.data_twost.shape[1]-1, index_list] = 0
        self.mask_twost[:, self.data_twost.shape[1]-1, index_list] = 0
        self.laob_twost[:, self.data_twost.shape[1]-1, index_list] = self.laob_twost[:, self.data_twost.shape[1]-2, index_list]
        self.delt_twost[:, self.data_twost.shape[1]-1, index_list] = self.delt_twost[:, self.data_twost.shape[1]-2, index_list]+self.time_unit  
        
        sample_size = self.data_twost.shape[0]
        train_index = int(np.floor(sample_size * self.train_proportion))
        valid_index = int(np.floor(sample_size * (self.train_proportion + self.valid_proportion)))
        
        train_i = self.concat_df.index[:-1][self.seq_length-1:train_index+self.seq_length-1]
        valid_i = self.concat_df.index[:-1][train_index+self.seq_length-1:valid_index+self.seq_length-1]
        test_i = self.concat_df.index[:-1][valid_index+self.seq_length-1:]
        
        train_size = int(np.floor(train_i.size/self.batch)*self.batch)
        valid_size = int(np.floor(valid_i.size/self.batch)*self.batch)
        test_size = int(np.floor((test_i.size-self.seq_length+1)/self.batch)*self.batch)
        
        train_i = train_i[:train_size]
        valid_i = valid_i[:valid_size]
        test_i = test_i[:test_size]
        
        data_branch = np.expand_dims(self.data_twost, axis = 1)
        mask_branch = np.expand_dims(self.mask_twost, axis = 1)
        laob_branch = np.expand_dims(self.laob_twost, axis = 1)
        delt_branch = np.expand_dims(self.delt_twost, axis = 1)
        impu_branch = np.expand_dims(self.impu_twost, axis = 1)
        filt_branch = np.expand_dims(self.filt_twost, axis = 1)
        
        total_dataset = np.concatenate((data_branch, mask_branch, laob_branch, delt_branch, impu_branch, filt_branch), axis=1)
        
        train_data, train_label = total_dataset[:train_index], self.concat_labels[:train_index]
        valid_data, valid_label = total_dataset[train_index:valid_index], self.concat_labels[train_index:valid_index]
        test_data, test_label = total_dataset[valid_index:], self.concat_labels[valid_index:]

        train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
        valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
        test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)

        train_dataset = utils.TensorDataset(train_data, train_label)
        valid_dataset = utils.TensorDataset(valid_data, valid_label)
        test_dataset = utils.TensorDataset(test_data, test_label)

        train_dataloader = utils.DataLoader(train_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        valid_dataloader = utils.DataLoader(valid_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        test_dataloader = utils.DataLoader(test_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        
        return train_dataloader, valid_dataloader, test_dataloader, train_i, valid_i, test_i
    
    def calcul_prepare_matrix(self,inputs):        
        fit_input = self.data_cut(inputs, "Two Step Prepare")        
        size_1d = fit_input.shape[1]
        size_2d = fit_input.shape[2]
        fit_size = size_1d * size_2d
        fit_input = fit_input.reshape(-1, fit_size)
        Y_index = [fit_size-int(size_2d/4)*3-1, fit_size-int(size_2d/4)*2-1, fit_size-int(size_2d/4)-1, fit_size-1]
        X_index = [x for x in range(fit_size) if x not in Y_index]        
        fit_Y = fit_input[:,Y_index]
        fit_X = fit_input[:,X_index]
        regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=-1)                
        index = np.random.choice(fit_X.shape[0], int(fit_X.shape[0]*0.6))
        regressor.fit(fit_X[index], fit_Y[index])
        new_label = regressor.predict(fit_X)
        new_input = fit_input.copy()
        new_input[:,Y_index] = new_label        
        #new_input = np.concatenate((fit_X, new_label.reshape(-1,3),axis=1)        
        new_input = new_input.reshape(-1, size_1d, size_2d)
        print("Random Forest MAE: ", np.round(np.abs(new_label-fit_Y).mean(),5))
        
        return new_input, fit_Y
    
    def make_dataloader_calcul(self):
        self.data_twost = np.nan_to_num(self.data_cut(self.concat_df.iloc[:,:-len(self.input_links)], "Data Input"))
        self.mask_twost = self.data_cut(self.concat_ma.iloc[:,:-len(self.input_links)], "Mask Input")
        self.laob_twost = self.data_cut(self.concat_lo.iloc[:,:-len(self.input_links)], "Last Observation")
        self.delt_twost = self.data_cut(self.concat_dt.iloc[:,:-len(self.input_links)], "Delta Time")
        self.filt_twost = self.data_cut(self.concat_ft.iloc[:,:-len(self.input_links)], "Filter")
        
        self.impu_twost, self.concat_labels = self.calcul_prepare_matrix(self.concat_im)
        self.mean_twost = self.concat_df.iloc[:,:-len(self.input_links)].mean()
        
        #2,5,8
        indexing = len(self.input_links)
        index_list = [indexing-1, 2*indexing-1, 3*indexing-1]
        
        self.data_twost[:, self.data_twost.shape[1]-1, index_list] = 0
        self.mask_twost[:, self.data_twost.shape[1]-1, index_list] = 0
        self.laob_twost[:, self.data_twost.shape[1]-1, index_list] = self.laob_twost[:, self.data_twost.shape[1]-2, index_list]
        self.delt_twost[:, self.data_twost.shape[1]-1, index_list] = self.delt_twost[:, self.data_twost.shape[1]-2, index_list]+self.time_unit  
        
        sample_size = self.data_twost.shape[0]
        train_index = int(np.floor(sample_size * self.train_proportion))
        valid_index = int(np.floor(sample_size * (self.train_proportion + self.valid_proportion)))
        
        train_i = self.concat_df.index[:-1][self.seq_length-1:train_index+self.seq_length-1]
        valid_i = self.concat_df.index[:-1][train_index+self.seq_length-1:valid_index+self.seq_length-1]
        test_i = self.concat_df.index[:-1][valid_index+self.seq_length-1:]
        
        train_size = int(np.floor(train_i.size/self.batch)*self.batch)
        valid_size = int(np.floor(valid_i.size/self.batch)*self.batch)
        test_size = int(np.floor((test_i.size-self.seq_length+1)/self.batch)*self.batch)
        
        train_i = train_i[:train_size]
        valid_i = valid_i[:valid_size]
        test_i = test_i[:test_size]
        
        data_branch = np.expand_dims(self.data_twost, axis = 1)
        mask_branch = np.expand_dims(self.mask_twost, axis = 1)
        laob_branch = np.expand_dims(self.laob_twost, axis = 1)
        delt_branch = np.expand_dims(self.delt_twost, axis = 1)
        impu_branch = np.expand_dims(self.impu_twost[:,:,:-len(self.input_links)], axis = 1)
        filt_branch = np.expand_dims(self.filt_twost, axis = 1)
        
        total_dataset = np.concatenate((data_branch, mask_branch, laob_branch, delt_branch, impu_branch, filt_branch), axis=1)
        
        train_data, train_label = total_dataset[:train_index], self.concat_labels[:train_index]
        valid_data, valid_label = total_dataset[train_index:valid_index], self.concat_labels[train_index:valid_index]
        test_data, test_label = total_dataset[valid_index:], self.concat_labels[valid_index:]

        train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
        valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
        test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)

        train_dataset = utils.TensorDataset(train_data, train_label)
        valid_dataset = utils.TensorDataset(valid_data, valid_label)
        test_dataset = utils.TensorDataset(test_data, test_label)

        train_dataloader = utils.DataLoader(train_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        valid_dataloader = utils.DataLoader(valid_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        test_dataloader = utils.DataLoader(test_dataset, batch_size=self.batch, shuffle=False, drop_last=True)
        
        return train_dataloader, valid_dataloader, test_dataloader, train_i, valid_i, test_i