In [1]:
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

from fbprophet import Prophet
from multiprocessing import Pool, cpu_count

Importing plotly failed. Interactive plots will not work.


In [2]:
df_train = (pd.read_csv('train.csv')
              .rename(columns={'date':'ds', 'sales':'y'}))

In [3]:
df_train.columns

Index(['ds', 'store', 'item', 'y'], dtype='object')

In [4]:
def rnd_timeseries(store, item):
    return df_train[(df_train.store==store)&(df_train.item==item)][['ds', 'y']]

In [5]:
ls_store_item = list(set(zip(df_train.store, df_train.item)))

In [6]:
%%time
series = [rnd_timeseries(store, item) for store, item in ls_store_item]

CPU times: user 3.44 s, sys: 93.8 ms, total: 3.53 s
Wall time: 3.52 s


In [7]:
def run_prophet(history_pd):
    model = Prophet(
        interval_width=0.95,
        growth='linear',
        daily_seasonality=False,
        weekly_seasonality=True,
        yearly_seasonality=True,
        seasonality_mode='multiplicative'
    )

    # fit the model
    model.fit(history_pd)

    # configure predictions
    future_pd = model.make_future_dataframe(
        periods=90,
        freq='d',
        include_history=True
    )

    # make predictions
    results_pd = model.predict(future_pd)
    return results_pd

In [9]:
%%timeit
f = run_prophet(series[0])

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


6.95 s ± 510 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 1 hr 20 Mins

In [55]:
start_time = time.time()
result = list(map(lambda history_pd: run_prophet(history_pd), tqdm(series)))
print("--- %s seconds ---" % (time.time() - start_time))

100%|██████████| 500/500 [1:20:37<00:00,  9.68s/it]  

--- 4837.546568393707 seconds ---





## 44 Mins using multiprocessing

In [51]:
start_time = time.time()
p = Pool(cpu_count())
predictions = list(tqdm(p.imap(run_prophet, series), total=len(series)))
p.close()
p.join()
print("--- %s seconds ---" % (time.time() - start_time))

100%|██████████| 500/500 [44:30<00:00,  5.34s/it]  

--- 2673.760531425476 seconds ---





## 35 mins: With concurrent futures

In [10]:
import concurrent.futures

In [11]:
%%time
with concurrent.futures.ProcessPoolExecutor() as executor:
    output = list(tqdm(executor.map(run_prophet, series), total=len(series)))

100%|██████████| 500/500 [35:23<00:00,  4.25s/it] 

CPU times: user 4.67 s, sys: 2.75 s, total: 7.42 s
Wall time: 35min 25s





## 15 Mins: Using Ray 

In [7]:
import ray
ray.init()

@ray.remote
def run_prophet(history_pd):
    model = Prophet(
        interval_width=0.95,
        growth='linear',
        daily_seasonality=False,
        weekly_seasonality=True,
        yearly_seasonality=True,
        seasonality_mode='multiplicative'
    )

    # fit the model
    model.fit(history_pd)

    # configure predictions
    future_pd = model.make_future_dataframe(
        periods=90,
        freq='d',
        include_history=True
    )

    # make predictions
    results_pd = model.predict(future_pd)
    return results_pd

2020-08-24 23:29:08,286	INFO resource_spec.py:223 -- Starting Ray with 3.08 GiB memory available for workers and up to 1.55 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-08-24 23:29:09,191	INFO services.py:1191 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


In [8]:
%%time
futures = ray.get([run_prophet.remote(i) for i in series])

[2m[36m(pid=8394)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8403)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8401)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8396)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8407)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8405)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8411)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8409)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8467)[0m Importing plotly failed. Interactive plots will not work.


[2m[36m(pid=8394)[0m Initial log joint probability = -32.3947
[2m[36m(pid=8394)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8394)[0m       99       3370.16   4.20642e-05       87.0329       0.485       0.485      124   
[2m[36m(pid=8394)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8394)[0m      106        3370.2   0.000156469       83.1894   1.536e-06       0.001      165  LS failed, Hessian reset 
[2m[36m(pid=8394)[0m      148       3370.25   0.000258214       98.1808   1.945e-06       0.001      257  LS failed, Hessian reset 
[2m[36m(pid=8394)[0m      199       3370.28   2.67403e-05       79.5758           1           1      333   
[2m[36m(pid=8394)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8394)[0m      222       3370.28   1.66973e-07       86.6703      0.7359      0.7

[2m[36m(pid=8445)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8510)[0m Importing plotly failed. Interactive plots will not work.


[2m[36m(pid=8401)[0m      134       4176.85   1.96209e-07       71.6584      0.5214      0.5214      213   
[2m[36m(pid=8401)[0m Optimization terminated normally: 
[2m[36m(pid=8401)[0m   Convergence detected: relative gradient magnitude is below tolerance


[2m[36m(pid=8476)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8529)[0m Importing plotly failed. Interactive plots will not work.
[2m[36m(pid=8499)[0m Importing plotly failed. Interactive plots will not work.


[2m[36m(pid=8407)[0m Initial log joint probability = -27.9777
[2m[36m(pid=8407)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8396)[0m Initial log joint probability = -15.7248
[2m[36m(pid=8396)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8405)[0m Initial log joint probability = -38.7425
[2m[36m(pid=8405)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8407)[0m       99        4304.2   2.64786e-05       78.9902      0.7415      0.7415      139   
[2m[36m(pid=8407)[0m     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
[2m[36m(pid=8396)[0m       83       4129.73   0.000862824       312.321   1.037e-05       0.001      151  LS failed, Hessian reset 
[2m[36m(pid=8407)[0m      116        4304.2   1.87317e-07       72.1699     0.06386         

In [13]:
len(futures[498])

1916