In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
from sklearn.metrics import mean_absolute_percentage_error as mape

In [2]:
n_pred_t = 5

In [3]:
def mase(y_true, y_pred):
    n_samples = len(y_true)
    diff = np.abs(np.diff(y_true))  # 计算分母
    scale = np.sum(diff) / (n_samples - 1)
    error = np.abs(y_true - y_pred) # 计算分子
    mase = np.mean(error / scale)
    return mase

In [4]:
df = pd.read_csv("China_tourist.csv",encoding = 'utf-8',usecols=['time','Japan', 'Korea', 'Singapore','出境游'])
col = df.columns
time = df.iloc[:,0]

In [5]:
df.出境游 =[12.58801072, 15.94186903, 14.12392481, 10.77129966, 12.98798976,
       14.65532027, 16.71567184, 16.91461645, 15.14439181, 13.76879549,
       13.85497123, 14.17556064, 14.57302275, 13.84585365, 15.74285615,
       14.72574786, 14.92740008, 15.79515993, 16.42627757, 16.33169033,
       14.47822046, 14.23553347, 14.02198903, 14.38812131, 13.89046825,
       14.87659086, 15.65035524, 14.99816637, 14.74728945, 15.57458648,
       15.8868293 , 15.54038165, 13.27676353, 12.37191989, 12.33090131,
       12.49814567, 11.08444987,  8.76364472,  6.95113917,  4.97812542,
        3.97042339,  3.89424768,  3.60294413,  3.58219753,  2.9175151 ,
        2.98820059,  3.04431712,  3.14921897,  2.3728255 ,  2.57593706,
        2.96124693,  2.4611323 ,  1.83105417,  1.61948817,  1.90949814,
        1.63693581,  1.2136655 ,  1.13287022,  1.3225591 ,  0.8986272 ,
        0.79444609,  1.69685212,  1.59643327,  1.94836067,  2.04393939,
        1.42456502,  1.53760031,  1.21367606,  1.0001705 ,  1.15304488,
        1.12908332,  1.28249738,  2.34787776]

In [6]:
# 标准化
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
df = scaler.fit_transform(df.iloc[:,1:])
df = pd.DataFrame(df)
df.columns=col[1:]
df['time'] = time

In [7]:
# 出境游平稳化
def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')

    for key,val in r[4].items():
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.")    

In [8]:
begin = 36 # 202001
end = 60 # 202201
yuce = 68 # 202209

In [9]:
df["出境游差分"] = df.iloc[:,3:4].diff().fillna(0)

In [10]:
adfuller_test(df.出境游差分, name="出境游差分")

    Augmented Dickey-Fuller Test on "出境游差分" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -7.0018
 No. Lags Chosen       = 0
 Critical value 1%     = -3.525
 Critical value 5%     = -2.903
 Critical value 10%    = -2.589
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


In [11]:
df["Japan差分"] = df.iloc[:,0:1].diff().fillna(0)
adfuller_test(df.Japan差分, name="Japan差分")

    Augmented Dickey-Fuller Test on "Japan差分" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -9.1925
 No. Lags Chosen       = 0
 Critical value 1%     = -3.525
 Critical value 5%     = -2.903
 Critical value 10%    = -2.589
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


In [12]:
df["Korea差分"] = df.iloc[:,1:2].diff().fillna(0)
adfuller_test(df.Korea差分, name="Korea差分")

    Augmented Dickey-Fuller Test on "Korea差分" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -6.9856
 No. Lags Chosen       = 0
 Critical value 1%     = -3.525
 Critical value 5%     = -2.903
 Critical value 10%    = -2.589
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


In [13]:
df["Singapore差分"] = df.iloc[:,2:3].diff().fillna(0)
adfuller_test(df.Singapore差分, name="Singapore差分")

    Augmented Dickey-Fuller Test on "Singapore差分" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -3.0768
 No. Lags Chosen       = 5
 Critical value 1%     = -3.532
 Critical value 5%     = -2.906
 Critical value 10%    = -2.59
 => P-Value = 0.0283. Rejecting Null Hypothesis.
 => Series is Stationary.


In [14]:
df['x1'] = np.nan
df['x2'] = np.nan
df['x3'] = np.nan

In [15]:
from skforecast.ForecasterAutoreg import ForecasterAutoreg
import xgboost as xgb
forecaster = ForecasterAutoreg(
                regressor = xgb.XGBRegressor(),
                lags      = 9
             )
train = df.出境游差分.iloc[0:begin]

forecaster.fit(train)
steps = len(df.iloc[begin:,])
forecast_x1 = forecaster.predict(steps=steps)
#forecast_x1
df.x1.iloc[:begin] = train
df.x1.iloc[begin:] = forecast_x1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.x1.iloc[:begin] = train
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.x1.iloc[begin:] = forecast_x1


In [16]:
# 再把x2后面的值给预测了
train = df.出境游差分.iloc[begin:end]
forecaster.fit(train)
steps = len(df.iloc[end:,])
forecast_x2 = forecaster.predict(steps=steps)
df.x2.iloc[begin:end] = train
df.x2.iloc[end:] = forecast_x2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.x2.iloc[begin:end] = train
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.x2.iloc[end:] = forecast_x2


In [17]:
df.x3.iloc[end:] = df.出境游差分.iloc[end:]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.x3.iloc[end:] = df.出境游差分.iloc[end:]


In [18]:
# 开始建模

In [19]:
import statsmodels.api as sm
df_y = df[['Japan差分','Korea差分','Singapore差分']]
mod1 = sm.tsa.VARMAX(df_y.iloc[:begin,0:3], order=(1,0), trend='n', exog=df.x1.iloc[:begin,])
res1 = mod1.fit()

In [20]:
mod2 = sm.tsa.VARMAX(df_y.iloc[begin:end,0:3], order=(1,0), trend='n', exog=df.x2.iloc[begin:end,])
res2 = mod2.fit()



# 三阶段

In [21]:
mod3 = sm.tsa.VARMAX(df_y.iloc[end:yuce,0:3], order=(1,0),exog=df.x3.iloc[end:yuce,])
res3 = mod3.fit()
y_pred33 = res3.predict()
y_pred4 = res3.forecast(n_pred_t,exog=df.x3.iloc[yuce:yuce+n_pred_t])



In [22]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1]
y_pred = y_pred4.iloc[:,1]

df1 = pd.DataFrame([rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)],index = ['RMSE','MAPE','MASE'],columns = ['未组合'])

In [23]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0]
y_pred = y_pred4.iloc[:,0]

df0 = pd.DataFrame([rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)],index = ['RMSE','MAPE','MASE'],columns = ['未组合'])

In [24]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2]
y_pred = y_pred4.iloc[:,2]

df2 = pd.DataFrame([rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)],index = ['RMSE','MAPE','MASE'],columns = ['未组合'])

In [25]:
# x1预测36-72
y_pred14 = res1.forecast(len(df.iloc[begin:,]),exog=df.x1.iloc[begin:,])
# x2预测60-72
y_pred24 = res2.forecast(len(df.iloc[end:,]),exog=df.x2.iloc[end:,])

In [26]:
#mod1和mod2和mod3的OLS组合，300：450
from sklearn.ensemble import RandomForestRegressor
X_m1m2m3_0 = pd.concat([y_pred14['Japan差分'].loc[end:yuce-1],y_pred24['Japan差分'].loc[end:yuce-1],y_pred33['Japan差分'].loc[end:]],axis = 1)
X_m1m2m3_0.columns = ['Japan_x1','Japan_x2','Japan_x3']

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor1.fit(X_m1m2m3_0, df_y['Japan差分'][end:yuce])
# 预测
X_m1m2m3_00 = pd.concat([y_pred14['Japan差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Japan差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Japan差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_00.columns = ['Japan_x1','Japan_x2','Japan_x3']
ypred0 = regressor1.predict(X_m1m2m3_00)


X_m1m2m3_1 = pd.concat([y_pred14['Korea差分'].loc[end:yuce-1],y_pred24['Korea差分'].loc[end:yuce-1],y_pred33['Korea差分'].loc[end:]],axis = 1)
X_m1m2m3_1.columns = ['Korea_x1','Korea_x2','Korea_x3']
regressor2 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor2.fit(X_m1m2m3_1, df_y['Korea差分'][end:yuce])
# 预测
X_m1m2m3_11 = pd.concat([y_pred14['Korea差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Korea差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Korea差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_11.columns = ['Korea_x1','Korea_x2','Korea_x3']
ypred1 = regressor2.predict(X_m1m2m3_11)


X_m1m2m3_2 = pd.concat([y_pred14['Singapore差分'].loc[end:yuce-1],y_pred24['Singapore差分'].loc[end:yuce-1],y_pred33['Singapore差分'].loc[end:]],axis = 1)
X_m1m2m3_2.columns = ['Singapore_x1','Singapore_x2','Singapore_x3']
regressor3 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor3.fit(X_m1m2m3_2, df_y['Singapore差分'][end:yuce])
# 预测
X_m1m2m3_22 = pd.concat([y_pred14['Singapore差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Singapore差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Singapore差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_22.columns = ['Singapore_x1','Singapore_x2','Singapore_x3']
ypred2 = regressor3.predict(X_m1m2m3_22)

In [27]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = ypred0

df0['组合123'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [28]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = ypred1

df1['组合123'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [29]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = ypred2

df2['组合123'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [30]:
#mod2和mod3的OLS组合，300：450
from sklearn.ensemble import RandomForestRegressor
X_m1m2m3_0 = pd.concat([y_pred24['Japan差分'].loc[end:yuce-1],y_pred33['Japan差分'].loc[end:]],axis = 1)
X_m1m2m3_0.columns = ['Japan_x2','Japan_x3']

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor1.fit(X_m1m2m3_0, df_y['Japan差分'][end:yuce])
# 预测
X_m1m2m3_00 = pd.concat([y_pred24['Japan差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Japan差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_00.columns = ['Japan_x2','Japan_x3']
ypred0 = regressor1.predict(X_m1m2m3_00)


X_m1m2m3_1 = pd.concat([y_pred24['Korea差分'].loc[end:yuce-1],y_pred33['Korea差分'].loc[end:]],axis = 1)
X_m1m2m3_1.columns = ['Korea_x2','Korea_x3']
regressor2 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor2.fit(X_m1m2m3_1, df_y['Korea差分'][end:yuce])
# 预测
X_m1m2m3_11 = pd.concat([y_pred24['Korea差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Korea差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_11.columns = ['Korea_x2','Korea_x3']
ypred1 = regressor2.predict(X_m1m2m3_11)

X_m1m2m3_2 = pd.concat([y_pred24['Singapore差分'].loc[end:yuce-1],y_pred33['Singapore差分'].loc[end:]],axis = 1)
X_m1m2m3_2.columns = ['Singapore_x2','Singapore_x3']
regressor3 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor3.fit(X_m1m2m3_2, df_y['Singapore差分'][end:yuce])
# 预测
X_m1m2m3_22 = pd.concat([y_pred24['Singapore差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Singapore差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_22.columns = ['Singapore_x2','Singapore_x3']
ypred2 = regressor3.predict(X_m1m2m3_22)

In [31]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = ypred0

df0['组合23'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [32]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = ypred1

df1['组合23'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [33]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = ypred2

df2['组合23'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [34]:
#mod1和mod3的OLS组合，300：450
from sklearn.ensemble import RandomForestRegressor
X_m1m2m3_0 = pd.concat([y_pred14['Japan差分'].loc[end:yuce-1],y_pred33['Japan差分'].loc[end:]],axis = 1)
X_m1m2m3_0.columns = ['Japan_x1','Japan_x3']

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor1.fit(X_m1m2m3_0, df_y['Japan差分'][end:yuce])
# 预测
X_m1m2m3_00 = pd.concat([y_pred14['Japan差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Japan差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_00.columns = ['Japan_x1','Japan_x3']
ypred0 = regressor1.predict(X_m1m2m3_00)


X_m1m2m3_1 = pd.concat([y_pred14['Korea差分'].loc[end:yuce-1],y_pred33['Korea差分'].loc[end:]],axis = 1)
X_m1m2m3_1.columns = ['Korea_x1','Korea_x3']
regressor2 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor2.fit(X_m1m2m3_1, df_y['Korea差分'][end:yuce])
# 预测
X_m1m2m3_11 = pd.concat([y_pred14['Korea差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Korea差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_11.columns = ['Korea_x1','Korea_x3']
ypred1 = regressor2.predict(X_m1m2m3_11)


X_m1m2m3_2 = pd.concat([y_pred14['Singapore差分'].loc[end:yuce-1],y_pred33['Singapore差分'].loc[end:]],axis = 1)
X_m1m2m3_2.columns = ['Singapore_x1','Singapore_x3']
regressor3 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor3.fit(X_m1m2m3_2, df_y['Singapore差分'][end:yuce])
# 预测
X_m1m2m3_22 = pd.concat([y_pred14['Singapore差分'].loc[yuce:yuce+n_pred_t-1],y_pred4['Singapore差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_22.columns = ['Singapore_x1','Singapore_x3']
ypred2 = regressor3.predict(X_m1m2m3_22)

In [35]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = ypred0

df0['组合13'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [36]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = ypred1

df1['组合13'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [37]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = ypred2

df2['组合13'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [38]:
#mod1和mod2，300：450
from sklearn.ensemble import RandomForestRegressor
X_m1m2m3_0 = pd.concat([y_pred14['Japan差分'].loc[end:yuce-1],y_pred24['Japan差分'].loc[end:yuce-1]],axis = 1)
X_m1m2m3_0.columns = ['Japan_x1','Japan_x2']

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor1.fit(X_m1m2m3_0, df_y['Japan差分'][end:yuce])
# 预测
X_m1m2m3_00 = pd.concat([y_pred14['Japan差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Japan差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_00.columns = ['Japan_x1','Japan_x2']
ypred0 = regressor1.predict(X_m1m2m3_00)


X_m1m2m3_1 = pd.concat([y_pred14['Korea差分'].loc[end:yuce-1],y_pred24['Korea差分'].loc[end:yuce-1]],axis = 1)
X_m1m2m3_1.columns = ['Korea_x1','Korea_x2']
regressor2 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor2.fit(X_m1m2m3_1, df_y['Korea差分'][end:yuce])
# 预测
X_m1m2m3_11 = pd.concat([y_pred14['Korea差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Korea差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_11.columns = ['Korea_x1','Korea_x2']
ypred1 = regressor2.predict(X_m1m2m3_11)


X_m1m2m3_2 = pd.concat([y_pred14['Singapore差分'].loc[end:yuce-1],y_pred24['Singapore差分'].loc[end:yuce-1]],axis = 1)
X_m1m2m3_2.columns = ['Singapore_x1','Singapore_x2']
regressor3 = RandomForestRegressor(n_estimators=20, random_state=0)
regressor3.fit(X_m1m2m3_2, df_y['Singapore差分'][end:yuce])
# 预测
X_m1m2m3_22 = pd.concat([y_pred14['Singapore差分'].loc[yuce:yuce+n_pred_t-1],y_pred24['Singapore差分'].loc[yuce:yuce+n_pred_t-1]],axis = 1)
X_m1m2m3_22.columns = ['Singapore_x1','Singapore_x2']
ypred2 = regressor3.predict(X_m1m2m3_22)

In [39]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = ypred1

df1['组合12'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [40]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = ypred0

df0['组合12'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [41]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = ypred2

df2['组合12'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [42]:
# 如果不用组合，直接用x1x2x3呢

In [43]:
mod4 = sm.tsa.VARMAX(df_y.iloc[end:yuce,], order=(1,0),exog=df.iloc[end:yuce,9:12])
res4 = mod4.fit()
y_pred5 = res4.forecast(n_pred_t,exog=df.iloc[yuce:yuce+n_pred_t,9:12])



In [44]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = y_pred5.iloc[:,1].values

df1['VAR_x1x2x3'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [45]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = y_pred5.iloc[:,0].values

df0['VAR_x1x2x3'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [46]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = y_pred5.iloc[:,2].values

df2['VAR_x1x2x3'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [47]:
# 如果用所有出境游，不分段呢

In [48]:
mod5 = sm.tsa.VARMAX(df_y.iloc[0:yuce,], order=(1,0),exog=df.iloc[0:yuce,5])
res5 = mod5.fit()
y_pred6 = res5.forecast(n_pred_t,exog=df.iloc[yuce:yuce+n_pred_t,5])



In [49]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,0].values
y_pred = y_pred6.iloc[:,0].values

df0['VAR_出境游all'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [50]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,1].values
y_pred = y_pred6.iloc[:,1].values

df1['VAR_出境游all'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

In [51]:
y_true = df_y.iloc[yuce:yuce+n_pred_t,2].values
y_pred = y_pred6.iloc[:,2].values

df2['VAR_出境游all'] = [rmse(y_true,y_pred),mape(y_true,y_pred),mase(y_true,y_pred)]

# VAR

In [52]:
model = VAR(df_y[0:yuce])
model_fitted = model.fit(1)

In [53]:
forecast_input = df_y[0:yuce].values
# Forecast
fc = model_fitted.forecast(y=forecast_input, steps=n_pred_t)
df_forecast = pd.DataFrame(fc, index=df_y.index[yuce:yuce+n_pred_t], columns=df_y.columns + '_2d')
df_forecast

Unnamed: 0,Japan差分_2d,Korea差分_2d,Singapore差分_2d
68,-0.013051,-0.021235,-0.039454
69,-0.022775,-0.036935,-0.027104
70,-0.025879,-0.037206,-0.031877
71,-0.025212,-0.03743,-0.031289
72,-0.025357,-0.037478,-0.031267


In [54]:
# 组合的rmse
rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d'])
mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d'])
mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d'])

df0['VAR'] = [rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d']),
              mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d']),
              mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Japan差分_2d'])]

In [55]:
# 组合的rmse
rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d'])
mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d'])
mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d'])

df1['VAR'] = [rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d']),
              mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d']),
              mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Korea差分_2d'])]

In [56]:
# 组合的rmse
rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d'])
mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d'])
mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d'])

df2['VAR'] = [rmse(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d']),
              mape(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d']),
              mase(df_y.iloc[yuce:yuce+n_pred_t,0],df_forecast.loc[yuce:yuce+n_pred_t-1,'Singapore差分_2d'])]

In [57]:
df0.columns = ["VARX3","组合123","组合23","组合13","组合12","VARX123","VARX","VAR"]

In [58]:
df1.columns = ["VARX3","组合123","组合23","组合13","组合12","VARX123","VARX","VAR"]

In [59]:
df2.columns = ["VARX3","组合123","组合23","组合13","组合12","VARX123","VARX","VAR"]

In [60]:
r = pd.concat([df0,df1,df2])
r[['VAR','VARX','VARX3','VARX123','组合123','组合12','组合13','组合23']].T

Unnamed: 0,RMSE,MAPE,MASE,RMSE.1,MAPE.1,MASE.1,RMSE.2,MAPE.2,MASE.2
VAR,0.035885,5.601342,1.411579,0.046926,8.114949,1.909427,0.04529,7.159443,1.829687
VARX,0.047723,6.391578,1.648222,0.062096,5.88612,1.881266,0.035311,3.871319,1.924692
VARX3,0.021138,1.967811,0.804644,0.024592,3.135383,0.88957,0.036092,1.268839,1.506304
VARX123,0.044713,8.334617,1.742355,0.118558,14.17561,4.670397,0.035267,4.286332,2.043572
组合123,0.009334,0.743425,0.278294,0.016373,1.292657,0.525408,0.029769,0.995147,1.272376
组合12,0.013944,2.325306,0.506096,0.018686,1.897917,0.635762,0.023785,1.297941,1.125216
组合13,0.01074,1.134112,0.344269,0.019448,1.362683,0.621356,0.02816,1.706222,1.419662
组合23,0.013085,1.482091,0.459218,0.020757,1.709878,0.695066,0.031684,1.113442,1.410923
