In [572]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [573]:
import datetime as dt

import numpy as np
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from plotly.offline import iplot

# Исходные данные

In [574]:
word_table = """
№ 	год 	квартал 	IP
1	2007	1	137,73
2	2007	2	140,21
3	2007	3	145,53
4	2007	4	154,41
5	2008	1	146,07
6	2008	2	146,37
7	2008	3	148,42
8	2008	4	149,4
9	2009	1	123,41
10	2009	2	126,5
11	2009	3	134,09
12	2009	4	143,07
13	2010	1	133,2
14	2010	2	135,73
15	2010	3	139,67
16	2010	4	153,49
17	2011	1	139,83
18	2011	2	143,89
19	2011	3	147,05
20	2011	4	159,11
21	2012	1	145,91
22	2012	2	147,07
23	2012	3	151,93
24	2012	4	164,08
25	2013	1	144,06
26	2013	2	148,1
27	2013	3	152,69
28	2013	4	166,12
29	2014	1	145,52
30	2014	2	150,76
31	2014	3	154,83
32	2014	4	169,7
33	2015	1	144,92
34	2015	2	143,33
35	2015	3	148,34
36	2015	4	163,18
37	2016	1	143,92
38	2016	2	144,79
39	2016	3	148,12
40	2016	4	166,19
"""

In [575]:
series = pd.Series(
    data=[float(row.split('\t')[-1].replace(',', '.')) for row in word_table.strip().split('\n')[1:]],
    index=pd.date_range(dt.datetime(2007, 1, 1), dt.datetime(2016, 12, 31), freq='Q'),
    name='IP'
)
series.head()

2007-03-31    137.73
2007-06-30    140.21
2007-09-30    145.53
2007-12-31    154.41
2008-03-31    146.07
Freq: Q-DEC, Name: IP, dtype: float64

# График

In [576]:
fig = px.line(x=series.index, y=series.array)
iplot(fig)

# Линейная модель сезонных индексов

In [577]:
decomposition = sm.tsa.seasonal_decompose(series, model='additive')

In [578]:
fig = fig.add_scatter(x=series.index, y=decomposition.trend, name='decompose trend')
iplot(fig)

In [579]:
Y = series - decomposition.seasonal
X = pd.DataFrame(np.arange(1, len(series) + 1), columns=['X'], index=Y.index)
X['const'] = 1
seasonal_linear_results = sm.OLS(Y, X).fit()

In [580]:
print(seasonal_linear_results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.338
Model:                            OLS   Adj. R-squared:                  0.320
Method:                 Least Squares   F-statistic:                     19.37
Date:                Wed, 10 Nov 2021   Prob (F-statistic):           8.44e-05
Time:                        02:03:11   Log-Likelihood:                -126.17
No. Observations:                  40   AIC:                             256.3
Df Residuals:                      38   BIC:                             259.7
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
X              0.3508      0.080      4.401      0.0

## Прогноз

In [581]:
periods = 2 * 4
exog = pd.concat([X, pd.DataFrame(
    data=zip(np.repeat(1, periods), np.arange(len(X) + 1, len(X) + periods + 1)),
    columns=['const', 'X'],
    index=pd.date_range(dt.datetime(2017, 1, 1), periods=periods, freq='Q')
)])
exog.head()

Unnamed: 0,X,const
2007-03-31,1,1
2007-06-30,2,1
2007-09-30,3,1
2007-12-31,4,1
2008-03-31,5,1


In [582]:
linear_predicts = seasonal_linear_results.predict(exog)

arr = decomposition.seasonal.iloc[:4].array
seasonal = np.hstack([arr for _ in range(len(linear_predicts) // 4 + 1)])[:len(linear_predicts)]
seasonal_predicts = linear_predicts + seasonal

In [583]:
fig = fig.add_scatter(
    x=linear_predicts.index,
    y=linear_predicts.array,
    name='T[2007, 2018]',
    mode='lines'
)
fig = fig.add_scatter(
    x=seasonal_predicts.index,
    y=seasonal_predicts.array,
    name='T+S[2007, 2018]',
    mode='lines'
)

In [584]:
iplot(fig)

# Линейная модель роста

In [585]:
df = pd.DataFrame(series)
df['t'] = range(1, df.shape[0] + 1)
df.head()

Unnamed: 0,IP,t
2007-03-31,137.73,1
2007-06-30,140.21,2
2007-09-30,145.53,3
2007-12-31,154.41,4
2008-03-31,146.07,5


In [586]:
ones_zeroes = pd.Series([1 if i % 4 == 0 else 0 for i in range(df.shape[0])])
ones_zeroes.index = df.index
df['d1'] = ones_zeroes

ones_zeroes = ones_zeroes.shift(1, fill_value=0)
ones_zeroes.index = df.index
df['d2'] = ones_zeroes

ones_zeroes = ones_zeroes.shift(1, fill_value=0)
ones_zeroes.index = df.index
df['d3'] = ones_zeroes

df.head()

Unnamed: 0,IP,t,d1,d2,d3
2007-03-31,137.73,1,1,0,0
2007-06-30,140.21,2,0,1,0
2007-09-30,145.53,3,0,0,1
2007-12-31,154.41,4,0,0,0
2008-03-31,146.07,5,1,0,0


In [587]:
Y = df[['IP']]
Y.head()

Unnamed: 0,IP
2007-03-31,137.73
2007-06-30,140.21
2007-09-30,145.53
2007-12-31,154.41
2008-03-31,146.07


In [588]:
X = df[['t', 'd1', 'd2', 'd3']]
X['const'] = 1
X.head()

Unnamed: 0,t,d1,d2,d3,const
2007-03-31,1,1,0,0,1
2007-06-30,2,0,1,0,1
2007-09-30,3,0,0,1,1
2007-12-31,4,0,0,0,1
2008-03-31,5,1,0,0,1


In [589]:
model = sm.OLS(Y, X).fit()

In [590]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                     IP   R-squared:                       0.676
Model:                            OLS   Adj. R-squared:                  0.638
Method:                 Least Squares   F-statistic:                     18.21
Date:                Wed, 10 Nov 2021   Prob (F-statistic):           3.58e-08
Time:                        02:03:12   Log-Likelihood:                -126.14
No. Observations:                  40   AIC:                             262.3
Df Residuals:                      35   BIC:                             270.7
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
t              0.3512      0.083      4.213      0.0

## Прогноз

In [591]:
exog = pd.DataFrame(
    None,
    index=pd.date_range(dt.date(year=2017, month=1, day=1), end=dt.date(year=2018, month=12, day=31), freq='Q'),
    columns=X.columns
)
exog['const'] = 1
exog['t'] = range(X['t'].iloc[-1] + 1, X['t'].iloc[-1] + 1 + len(exog))
exog.loc[:, ['d1', 'd2', 'd3']] = np.vstack([
    np.vstack([np.eye(3), np.zeros(3)])
    for _ in range(len(exog) // 4)]
)
exog = exog.astype(int)
exog = pd.concat([X, exog])
exog.tail()

Unnamed: 0,t,d1,d2,d3,const
2017-12-31,44,0,0,0,1
2018-03-31,45,1,0,0,1
2018-06-30,46,0,1,0,1
2018-09-30,47,0,0,1,1
2018-12-31,48,0,0,0,1


In [592]:
predicts = model.predict(exog)

In [593]:
fig = fig.add_scatter(x=predicts.index, y=predicts.array, mode='lines', line={'dash': 'longdash'},  name='модель роста')

In [594]:
iplot(fig)
