### 数据分区及建模

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import pickle

pdata=pd.read_csv("data/pdata.csv")
pdata = pdata.reset_index()
pdata = pdata.drop(columns='index')
train_set = pdata.loc[0:149,]
test_set = pdata.loc[149:,]
test_set.to_csv("data/test_set.csv", index=False)

x = np.column_stack((train_set.month,train_set.r1_value,train_set.r4_value,
                     train_set.r6_value,train_set.r8_value,train_set.r12_value))
X = sm.add_constant(x)
model = sm.OLS(train_set.value, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,value,R-squared:,0.853
Model:,OLS,Adj. R-squared:,0.847
Method:,Least Squares,F-statistic:,138.0
Date:,"Wed, 15 Jul 2020",Prob (F-statistic):,6.39e-57
Time:,08:09:46,Log-Likelihood:,-1354.1
No. Observations:,150,AIC:,2722.0
Df Residuals:,143,BIC:,2743.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2630.7190,1907.483,1.379,0.170,-1139.788,6401.226
x1,410.6209,93.913,4.372,0.000,224.984,596.257
x2,-0.0067,0.033,-0.201,0.841,-0.072,0.059
x3,0.0730,0.035,2.076,0.040,0.004,0.142
x4,-0.0197,0.037,-0.539,0.591,-0.092,0.053
x5,0.1035,0.039,2.654,0.009,0.026,0.181
x6,0.6617,0.060,11.103,0.000,0.544,0.780

0,1,2,3
Omnibus:,1.331,Durbin-Watson:,1.868
Prob(Omnibus):,0.514,Jarque-Bera (JB):,1.01
Skew:,0.19,Prob(JB):,0.604
Kurtosis:,3.132,Cond. No.,648000.0


### 移除掉x2和x4，重新构建模型

In [3]:
x = np.column_stack((train_set.month,train_set.r4_value,train_set.r8_value,train_set.r12_value))
X = sm.add_constant(x)
model = sm.OLS(train_set.value, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,value,R-squared:,0.852
Model:,OLS,Adj. R-squared:,0.848
Method:,Least Squares,F-statistic:,209.3
Date:,"Wed, 15 Jul 2020",Prob (F-statistic):,3.65e-59
Time:,08:09:46,Log-Likelihood:,-1354.3
No. Observations:,150,AIC:,2719.0
Df Residuals:,145,BIC:,2734.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1741.1626,1201.878,1.449,0.150,-634.302,4116.627
x1,425.1202,86.591,4.910,0.000,253.978,596.263
x2,0.0770,0.034,2.261,0.025,0.010,0.144
x3,0.1085,0.038,2.893,0.004,0.034,0.183
x4,0.6573,0.059,11.222,0.000,0.542,0.773

0,1,2,3
Omnibus:,1.428,Durbin-Watson:,1.893
Prob(Omnibus):,0.49,Jarque-Bera (JB):,1.106
Skew:,0.2,Prob(JB):,0.575
Kurtosis:,3.128,Cond. No.,319000.0


### 加入二次项、三次项，重新建模

In [4]:
x = np.column_stack((train_set.month,train_set.r4_value,
                     train_set.r4_value**2,
                     train_set.r4_value**3,
                     train_set.r8_value,
                     train_set.r12_value))
X = sm.add_constant(x)
model = sm.OLS(train_set.value, X).fit()

with open('data/model.pkl', 'wb') as f:
    pickle.dump(model, f)

model.summary()

0,1,2,3
Dep. Variable:,value,R-squared:,0.859
Model:,OLS,Adj. R-squared:,0.854
Method:,Least Squares,F-statistic:,175.0
Date:,"Wed, 15 Jul 2020",Prob (F-statistic):,2.4200000000000003e-59
Time:,08:09:46,Log-Likelihood:,-1351.0
No. Observations:,150,AIC:,2714.0
Df Residuals:,144,BIC:,2732.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1528,0.030,-5.146,0.000,-0.212,-0.094
x1,438.3667,85.160,5.148,0.000,270.042,606.691
x2,0.5917,0.176,3.371,0.001,0.245,0.939
x3,-3.141e-05,1.07e-05,-2.938,0.004,-5.25e-05,-1.03e-05
x4,5.215e-10,1.8e-10,2.893,0.004,1.65e-10,8.78e-10
x5,0.1065,0.037,2.893,0.004,0.034,0.179
x6,0.6609,0.057,11.503,0.000,0.547,0.774

0,1,2,3
Omnibus:,0.868,Durbin-Watson:,1.867
Prob(Omnibus):,0.648,Jarque-Bera (JB):,0.852
Skew:,0.179,Prob(JB):,0.653
Kurtosis:,2.913,Cond. No.,1650000000000000.0
