In [1]:
%load_ext autoreload
%autoreload 2

%load_ext rpy2.ipython



In [11]:
import datetime as dt

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
from IPython.display import Markdown
from plotly.offline import iplot

from src.stats import LinearRegression, add_constant

# Исходные данные

In [12]:
word_table = """
№ 	год 	квартал 	IP
1	2007	1	137,73
2	2007	2	140,21
3	2007	3	145,53
4	2007	4	154,41
5	2008	1	146,07
6	2008	2	146,37
7	2008	3	148,42
8	2008	4	149,4
9	2009	1	123,41
10	2009	2	126,5
11	2009	3	134,09
12	2009	4	143,07
13	2010	1	133,2
14	2010	2	135,73
15	2010	3	139,67
16	2010	4	153,49
17	2011	1	139,83
18	2011	2	143,89
19	2011	3	147,05
20	2011	4	159,11
21	2012	1	145,91
22	2012	2	147,07
23	2012	3	151,93
24	2012	4	164,08
25	2013	1	144,06
26	2013	2	148,1
27	2013	3	152,69
28	2013	4	166,12
29	2014	1	145,52
30	2014	2	150,76
31	2014	3	154,83
32	2014	4	169,7
33	2015	1	144,92
34	2015	2	143,33
35	2015	3	148,34
36	2015	4	163,18
37	2016	1	143,92
38	2016	2	144,79
39	2016	3	148,12
40	2016	4	166,19
"""

In [13]:
series = pd.Series(
    data=[float(row.split('\t')[-1].replace(',', '.')) for row in word_table.strip().split('\n')[1:]],
    index=pd.date_range(dt.datetime(2007, 1, 1), dt.datetime(2016, 12, 31), freq='Q'),
    name='IP'
)
series.head()

2007-03-31    137.73
2007-06-30    140.21
2007-09-30    145.53
2007-12-31    154.41
2008-03-31    146.07
Freq: Q-DEC, Name: IP, dtype: float64

# График

In [15]:
fig = go.Figure()
fig = fig.add_scatter(x=series.index, y=series.array, name='IP', mode='lines')
iplot(fig)

# Аддитивная модель сезонных индексов

In [16]:
decomposition = sm.tsa.seasonal_decompose(series, model='additive')

In [18]:
fig = fig.add_scatter(x=series.index, y=decomposition.trend, name='seasonal decompose')
iplot(fig)

2007-03-31    137.73
2007-06-30    140.21
2007-09-30    145.53
2007-12-31    154.41
2008-03-31    146.07
2008-06-30    146.37
2008-09-30    148.42
2008-12-31    149.40
2009-03-31    123.41
2009-06-30    126.50
2009-09-30    134.09
2009-12-31    143.07
2010-03-31    133.20
2010-06-30    135.73
2010-09-30    139.67
2010-12-31    153.49
2011-03-31    139.83
2011-06-30    143.89
2011-09-30    147.05
2011-12-31    159.11
2012-03-31    145.91
2012-06-30    147.07
2012-09-30    151.93
2012-12-31    164.08
2013-03-31    144.06
2013-06-30    148.10
2013-09-30    152.69
2013-12-31    166.12
2014-03-31    145.52
2014-06-30    150.76
2014-09-30    154.83
2014-12-31    169.70
2015-03-31    144.92
2015-06-30    143.33
2015-09-30    148.34
2015-12-31    163.18
2016-03-31    143.92
2016-06-30    144.79
2016-09-30    148.12
2016-12-31    166.19
Freq: Q-DEC, Name: IP, dtype: float64


# Пример

In [70]:
example = pd.Series(
    data=[6, 4.4, 5, 9, 7.2, 4.8, 6, 10, 8, 5.6, 6.4, 11, 9, 6.6, 7, 10.8],
    index=pd.date_range(dt.datetime(2020, 1, 1), periods=16, freq='Q')
)
example.head()

2020-03-31    6.0
2020-06-30    4.4
2020-09-30    5.0
2020-12-31    9.0
2021-03-31    7.2
Freq: Q-DEC, dtype: float64

In [71]:
example_fig = go.Figure()
example_fig = example_fig.add_scatter(x=example.index, y=example.array, name='Y', mode='lines')
iplot(example_fig)

In [72]:
example_decompose = sm.tsa.seasonal_decompose(example)

In [78]:
print(example_decompose.seasonal)

2020-03-31    0.581250
2020-06-30   -1.977083
2020-09-30   -1.293750
2020-12-31    2.689583
2021-03-31    0.581250
2021-06-30   -1.977083
2021-09-30   -1.293750
2021-12-31    2.689583
2022-03-31    0.581250
2022-06-30   -1.977083
2022-09-30   -1.293750
2022-12-31    2.689583
2023-03-31    0.581250
2023-06-30   -1.977083
2023-09-30   -1.293750
2023-12-31    2.689583
Freq: Q-DEC, Name: seasonal, dtype: float64


In [73]:
example_fig = example_fig.add_scatter(x=example.index, y=example_decompose.trend, name='decompose trend')
iplot(example_fig)

In [74]:
Y = example - example_decompose.seasonal
X = sm.add_constant(np.arange(1, len(example) + 1))
example_linear = sm.OLS(Y, X).fit()

In [75]:
print(example_linear.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.915
Model:                            OLS   Adj. R-squared:                  0.909
Method:                 Least Squares   F-statistic:                     150.6
Date:                Tue, 09 Nov 2021   Prob (F-statistic):           7.00e-09
Time:                        21:02:45   Log-Likelihood:                -1.2708
No. Observations:                  16   AIC:                             6.542
Df Residuals:                      14   BIC:                             8.087
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.7154      0.147     38.916      0.0


kurtosistest only valid for n>=20 ... continuing anyway, n=16



In [76]:
predicts = example_linear.predict()

In [77]:
example_fig = example_fig.add_scatter(x=example.index, y=predicts, name='T')
example_fig = example_fig.add_scatter(x=example.index, y=predicts + example_decompose.seasonal, name='T + S')
iplot(example_fig)

# Аддитивная линейная модель роста

In [30]:
MODEL_DATA = pd.DataFrame(series)
MODEL_DATA['t'] = range(1, MODEL_DATA.shape[0] + 1)
MODEL_DATA

Unnamed: 0,IP,t
2007-03-31,137.73,1
2007-06-30,140.21,2
2007-09-30,145.53,3
2007-12-31,154.41,4
2008-03-31,146.07,5
2008-06-30,146.37,6
2008-09-30,148.42,7
2008-12-31,149.4,8
2009-03-31,123.41,9
2009-06-30,126.5,10


In [32]:
ones_zeroes = pd.Series([1 if i % 4 == 0 else 0 for i in range(MODEL_DATA.shape[0])])
ones_zeroes.index = MODEL_DATA.index
MODEL_DATA['d1'] = ones_zeroes

ones_zeroes = ones_zeroes.shift(1, fill_value=0)
ones_zeroes.index = MODEL_DATA.index
MODEL_DATA['d2'] = ones_zeroes

ones_zeroes = ones_zeroes.shift(1, fill_value=0)
ones_zeroes.index = MODEL_DATA.index
MODEL_DATA['d3'] = ones_zeroes

MODEL_DATA.head()

Unnamed: 0,IP,t,d1,d2,d3
2007-03-31,137.73,1,1,0,0
2007-06-30,140.21,2,0,1,0
2007-09-30,145.53,3,0,0,1
2007-12-31,154.41,4,0,0,0
2008-03-31,146.07,5,1,0,0


In [35]:

Y = MODEL_DATA[['IP']]
Y.head()

Unnamed: 0,IP
2007-03-31,137.73
2007-06-30,140.21
2007-09-30,145.53
2007-12-31,154.41
2008-03-31,146.07


In [36]:
X = add_constant(MODEL_DATA[['t', 'd1', 'd2', 'd3']])
X.head()

Unnamed: 0,const,t,d1,d2,d3
2007-03-31,1.0,1,1,0,0
2007-06-30,1.0,2,0,1,0
2007-09-30,1.0,3,0,0,1
2007-12-31,1.0,4,0,0,0
2008-03-31,1.0,5,1,0,0


In [37]:
model = LinearRegression(Y, X)

In [38]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                     IP   R-squared:                       0.676
Model:                            OLS   Adj. R-squared:                  0.638
Method:                 Least Squares   F-statistic:                     18.21
Date:                Mon, 08 Nov 2021   Prob (F-statistic):           3.58e-08
Time:                        20:13:14   Log-Likelihood:                -126.14
No. Observations:                  40   AIC:                             262.3
Df Residuals:                      35   BIC:                             270.7
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        151.1477      2.652     56.990      0.0

## Прогноз

In [81]:
exog = pd.DataFrame(
    None,
    index=pd.date_range(dt.date(year=2017, month=1, day=1), end=dt.date(year=2018, month=12, day=31), freq='Q'),
    columns=X.columns
)
exog['const'] = 1
exog['t'] = range(X['t'].iloc[-1] + 1, X['t'].iloc[-1] + 1 + len(exog))
exog.loc[:, ['d1', 'd2', 'd3']] = np.vstack([
    np.vstack([np.eye(3), np.zeros(3)])
    for _ in range(len(exog) // 4)]
)
exog = exog.astype(int)
exog

Unnamed: 0,const,t,d1,d2,d3
2017-03-31,1,41,1,0,0
2017-06-30,1,42,0,1,0
2017-09-30,1,43,0,0,1
2017-12-31,1,44,0,0,0
2018-03-31,1,45,1,0,0
2018-06-30,1,46,0,1,0
2018-09-30,1,47,0,0,1
2018-12-31,1,48,0,0,0


In [82]:
predicts = model.results.predict(exog)
predicts

2017-03-31    148.184333
2017-06-30    150.402333
2017-09-30    154.794333
2017-12-31    166.602333
2018-03-31    149.589303
2018-06-30    151.807303
2018-09-30    156.199303
2018-12-31    168.007303
Freq: Q-DEC, dtype: float64

In [83]:
fig = px.line(x=series.index, y=series.array)

In [84]:
plot_predicts = predicts.copy()
plot_predicts[series.index[-1]] = series.iloc[-1]
plot_predicts = plot_predicts.sort_index()

In [85]:
fig = fig.add_scatter(x=plot_predicts.index, y=plot_predicts.array, mode='lines', name='Прогноз')

In [86]:
iplot(fig)

In [1]:
import numpy as np

In [7]:
row_1 = np.array([0, 3, 2, 1, 0, 2])
row_2 = np.array([0, 8, 13, 0, 1, 6])
row_3 = np.array([1, 1, 2, 0, 0, 1])

In [8]:
row_1 * -8/3 + row_2

array([ 0.        ,  0.        ,  7.66666667, -2.66666667,  1.        ,
        0.66666667])

In [9]:
row_1 * -1/3 + row_3

array([ 1.        ,  0.        ,  1.33333333, -0.33333333,  0.        ,
        0.33333333])