# Small example of time series analysis with multiple groups
We use this source on [stackoverflow][1] to reproduce results

[1]: https://stackoverflow.com/questions/55545501/how-to-perform-time-series-analysis-that-contains-multiple-groups-in-python-usin

In [1]:
import pandas as pd
from prophet import Prophet

In [2]:

data = {'Date':['2017-01-01', '2017-02-01', '2017-03-01',
                '2017-04-01', '2017-05-01', '2017-06-01',
                '2017-01-01', '2017-02-01', '2017-03-01',
                '2017-04-01', '2017-05-01', '2017-06-01',
                '2017-01-01', '2017-02-01', '2017-03-01',
                '2017-04-01', '2017-05-01', '2017-06-01'],
        'Group':['A', 'A', 'A', 'A', 'A', 'A',
                 'B', 'B', 'B', 'B', 'B', 'B',
                 'C', 'C', 'C', 'C', 'C', 'C'],
        'Amount':['12.1','13','15','17','21','23',
                  '44.5', '43.1', '45.9', '44.5', '43.1', '45.9',
                  '150', '147', '143', '137', '136', '129']}
df = pd.DataFrame(data)
df

Unnamed: 0,Date,Group,Amount
0,2017-01-01,A,12.1
1,2017-02-01,A,13.0
2,2017-03-01,A,15.0
3,2017-04-01,A,17.0
4,2017-05-01,A,21.0
5,2017-06-01,A,23.0
6,2017-01-01,B,44.5
7,2017-02-01,B,43.1
8,2017-03-01,B,45.9
9,2017-04-01,B,44.5


# Наименования колонок, как принято в prophet

In [3]:
df = df.rename(columns={'Date': 'ds', 'Amount':'y'})
df.head()

Unnamed: 0,ds,Group,y
0,2017-01-01,A,12.1
1,2017-02-01,A,13.0
2,2017-03-01,A,15.0
3,2017-04-01,A,17.0
4,2017-05-01,A,21.0


познакомимся с параметрами по умолчанию в Prophet

In [4]:
?Prophet

[1;31mInit signature:[0m
[0mProphet[0m[1;33m([0m[1;33m
[0m    [0mgrowth[0m[1;33m=[0m[1;34m'linear'[0m[1;33m,[0m[1;33m
[0m    [0mchangepoints[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mn_changepoints[0m[1;33m=[0m[1;36m25[0m[1;33m,[0m[1;33m
[0m    [0mchangepoint_range[0m[1;33m=[0m[1;36m0.8[0m[1;33m,[0m[1;33m
[0m    [0myearly_seasonality[0m[1;33m=[0m[1;34m'auto'[0m[1;33m,[0m[1;33m
[0m    [0mweekly_seasonality[0m[1;33m=[0m[1;34m'auto'[0m[1;33m,[0m[1;33m
[0m    [0mdaily_seasonality[0m[1;33m=[0m[1;34m'auto'[0m[1;33m,[0m[1;33m
[0m    [0mholidays[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mseasonality_mode[0m[1;33m=[0m[1;34m'additive'[0m[1;33m,[0m[1;33m
[0m    [0mseasonality_prior_scale[0m[1;33m=[0m[1;36m10.0[0m[1;33m,[0m[1;33m
[0m    [0mholidays_prior_scale[0m[1;33m=[0m[1;36m10.0[0m[1;33m,[0m[1;33m
[0m    [0mchangepoint_prior_scale[0m[1;33m=[0m[1;36m0.05

Assuming that your groups are independent from each other and you want to get one prediction for each group, you can group the dataframe by "Group" column and run forecast for each group

Проведем некоторые эксперименты

In [5]:
grouped = df.groupby('Group')
for g in grouped.groups:
    # будет создаваться новый объект с элементами из этой группы
    group = grouped.get_group(g)
    print(group)
    group.loc[1]['y'] = 25    
    print(type(group))
    print(group.shape)
    print(group)
    print(df)
    break
    
    

           ds Group     y
0  2017-01-01     A  12.1
1  2017-02-01     A    13
2  2017-03-01     A    15
3  2017-04-01     A    17
4  2017-05-01     A    21
5  2017-06-01     A    23
<class 'pandas.core.frame.DataFrame'>
(6, 3)
           ds Group     y
0  2017-01-01     A  12.1
1  2017-02-01     A    25
2  2017-03-01     A    15
3  2017-04-01     A    17
4  2017-05-01     A    21
5  2017-06-01     A    23
            ds Group     y
0   2017-01-01     A  12.1
1   2017-02-01     A    13
2   2017-03-01     A    15
3   2017-04-01     A    17
4   2017-05-01     A    21
5   2017-06-01     A    23
6   2017-01-01     B  44.5
7   2017-02-01     B  43.1
8   2017-03-01     B  45.9
9   2017-04-01     B  44.5
10  2017-05-01     B  43.1
11  2017-06-01     B  45.9
12  2017-01-01     C   150
13  2017-02-01     C   147
14  2017-03-01     C   143
15  2017-04-01     C   137
16  2017-05-01     C   136
17  2017-06-01     C   129


При создании данных заполняемыми прогнозами, укажем freq='MS', что покажет, что шаг = месяц

```
make_future_dataframe(periods=4, freq='MS')
```

In [21]:
final = pd.DataFrame()
grouped = df.groupby('Group')
for g in grouped.groups:
    # будет создаваться новый объект с элементами из этой группы
    group = grouped.get_group(g)
    m = Prophet()    
    m.fit(group)    
    # freq='MS' - переводит прогноз в месяцы
    future = m.make_future_dataframe(periods=4, freq='MS')
    forecast = m.predict(future)    
    # если надо добавить столбцы '_lower', '_upper' 
    forecast = forecast.rename(columns={'yhat': g, 'yhat_lower': g+'_lower', 'yhat_upper': g+'_upper'})
    final = pd.merge(final, forecast.set_index('ds'), how='outer', left_index=True, right_index=True)
    
# мы добавили столбцы '_lower', '_upper'
final = final[[g+ads for g in grouped.groups.keys() for ads in ['', '_lower', '_upper']]]  

# если без них, то 
# final = final[['yhat_' + g for g in grouped.groups.keys()]]
# см. исходный источник


12:46:22 - cmdstanpy - INFO - Chain [1] start processing
12:46:22 - cmdstanpy - INFO - Chain [1] done processing
12:46:23 - cmdstanpy - INFO - Chain [1] start processing
12:46:23 - cmdstanpy - INFO - Chain [1] done processing
12:46:24 - cmdstanpy - INFO - Chain [1] start processing
12:46:24 - cmdstanpy - INFO - Chain [1] done processing


In [22]:
final.reset_index(inplace=True);

In [23]:
final

Unnamed: 0,ds,A,A_lower,A_upper,B,B_lower,B_upper,C,C_lower,C_upper
0,2017-01-01,11.101709,10.140668,12.063294,44.107559,42.760082,45.628511,150.615,149.256361,151.955897
1,2017-02-01,13.472188,12.542401,14.417521,44.2691,42.956892,45.677807,146.374754,145.015362,147.722764
2,2017-03-01,15.613267,14.636046,16.611256,44.415007,42.991298,45.792087,142.544823,141.145766,143.872275
3,2017-04-01,17.984162,17.031709,18.920805,44.576548,43.201014,45.94119,138.30371,136.938025,139.598292
4,2017-05-01,20.278577,19.273253,21.169566,44.732878,43.364655,46.22233,134.199407,132.930997,135.587629
5,2017-06-01,22.649473,21.704069,23.5977,44.894418,43.447216,46.336592,129.958294,128.743202,131.309441
6,2017-07-01,24.943888,23.992245,25.858442,45.050748,43.533124,46.430624,125.85399,124.50678,127.267223
7,2017-08-01,27.314784,26.401845,28.290046,45.212288,43.847252,46.658172,121.612877,120.151802,123.054427
8,2017-09-01,29.685679,28.774171,30.550774,45.373829,44.010849,46.758857,117.371764,115.948936,118.639647
9,2017-10-01,31.980094,31.049156,32.933598,45.530159,44.101279,46.969994,113.267461,111.967327,114.56594
