<a href="https://colab.research.google.com/github/canerturkseven/ForecastFlowML/blob/master/examples/ForecastFlowML_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade pip
!pip install "git+https://github.com/canerturkseven/forecastflowml"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pip
  Downloading pip-23.0.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.0.4
    Uninstalling pip-22.0.4:
      Successfully uninstalled pip-22.0.4
Successfully installed pip-23.0.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/canerturkseven/forecastflowml
  Cloning https://github.com/canerturkseven/forecastflowml to /tmp/pip-req-build-hoympmcd
  Running command git clone --filter=blob:none --quiet https://github.com/canerturkseven/forecastflowml /tmp/pip-req-build-hoympmcd
  Resolved https://github.com/canerturkseven/forecastflowml to commit 52079ce4d5596a0adaf4f0e428ec744456f72d47
  

In [None]:
!python -m pip list

Package                       Version
----------------------------- --------------------
absl-py                       1.4.0
aeppl                         0.0.33
aesara                        2.7.9
alabaster                     0.7.13
albumentations                1.2.1
alembic                       1.10.2
altair                        4.2.2
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
arviz                         0.12.1
astropy                       4.3.1
astunparse                    1.6.3
atomicwrites                  1.4.1
attrs                         22.2.0
audioread                     3.0.0
autograd                      1.5
Babel                         2.12.1
backcall                      0.2.0
backports.zoneinfo            0.2.1
beautifulsoup4                4.6.3
bleach                        6.0.0
blis                          0.7.9
bokeh                         2.4.3
branca                        0.6.0
bs4                           0.0.1
Cache

In [None]:
!python --version

Python 3.9.16


In [None]:
import mlflow
from forecastflowml.meta_model import MetaModel
from forecastflowml.preprocessing import FeatureExtractor
from forecastflowml.data.loader import load_walmart_m5
from pyspark.sql import SparkSession
import pyspark.sql.functions as F

In [None]:
# create spark environment
spark = (
    SparkSession.builder.master("local[2]")
    .config("spark.driver.memory", "16g")
    .config("spark.sql.execution.arrow.enabled", "true")
    .config("spark.sql.adaptive.enabled", "false")
    .config("spark.sql.shuffle.partitions", "2")
    .getOrCreate()
)

In [None]:
# load sample dataset
df = load_walmart_m5(spark)
df.show()

+--------------------+-----------+-------+------+--------+--------+-----+----------+---------+
|                  id|    item_id|dept_id|cat_id|store_id|state_id|sales|      date|christmas|
+--------------------+-----------+-------+------+--------+--------+-----+----------+---------+
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  2.0|2011-01-29|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  5.0|2011-01-30|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  3.0|2011-01-31|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  0.0|2011-02-01|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  0.0|2011-02-02|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  0.0|2011-02-03|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS|    TX_2|      TX|  0.0|2011-02-04|        0|
|FOODS_1_013_TX_2_...|FOODS_1_013|FOODS_1| FOODS| 

In [None]:

# initialize feature extractor model
preprocessor = FeatureExtractor(
    id_col="id",
    date_col="date",
    date_frequency="day",
    target_col="sales",
    target_encodings=[
        {
            "partition_cols": ["item_id", "store_id"],
            "windows": [7, 14, 28],
            "lags": [7, 14, 21, 28],
            "functions": ["mean", "std"],
        },
        {
            "partition_cols": ["item_id", "store_id"],
            "windows": [1],
            "lags": [7, 8, 9, 14, 15, 16, 21, 22, 23, 28, 29, 30],
            "functions": ["mean"],
        },
    ],
    date_features=[
        "day_of_month",
        "day_of_week",
        "week_of_year",
        "quarter",
        "month",
        "year",
    ],
    history_lengths=["item_id", ["item_id", "store_id"]],
    encode_events={
        "cols": ["christmas"],
        "window": 15,
    },
    count_consecutive_values={"value": 0, "lags": [7, 14, 21, 28]},
)

In [None]:
# checkpoint dataframe to save intermediate results
df_preprocessed = preprocessor.transform(df)
df_preprocessed.show()

+----------+--------------------+--------------------------------------+------+-----------------------------------+-------------------------------------+-------------------------------------+-------------------------------------+-----------+--------------------------------------+------------------------------------+-------------------------------------+-------------------------------------+---------+--------------------------------------+------------------------------------+------------------------------------+-------------------------------------+-------------------------------------+--------------------------------------+------------------------------------+-------------------------------------+-------------------------------------+-------------------------------------+-------------------------------------+-------------------------------------+------------------------------------+--------+------------------------------------+--------------------------------------+--------------------

In [None]:
# split dataset into train and test
df_train = df_preprocessed.filter(F.col("date") <= "2016-05-22")
df_test = df_preprocessed.filter(F.col("date") > "2016-05-22")

In [None]:
# initialize meta model
model = MetaModel(
    # dataset parameters
    group_col="cat_id",  # column to slice dataframe
    id_cols=["id"],  # columns to use as time series identifier
    date_col="date",  # date column
    target_col="sales",  # target column
    date_frequency="days",  # date frequency (days, weeks, months, years) of dataset
    # model parameters
    model_horizon=7,  # horizon per model
    max_forecast_horizon=28,  # total forecast horizon
    lag_feature_range=2,  # extra lags to include as features based on model horizon
    # cross validation and optimisation parameters
    n_cv_splits=1,  # number of time-based cv splits
    cv_step_length=28,  # number of dates between each cv folds
    max_hyperparam_evals=1,  # total number of optuna trials
    scoring="neg_mean_squared_error",  # sklearn scoring metric
    # optuna hyperparameter space
    hyperparam_space_fn=lambda trial: {
        "num_leaves": trial.suggest_int("num_leaves", 20, 30)
    },
    # mlflow parameters
    tracking_uri="./mlrunss",  # Mlflow tracking URI
)

In [None]:
# launch mlflow server using command "mlflow ui" and train the model
# examine the training progress on mlflow platform
model.train(df_train)



In [None]:
!pwd

/content


# New Section

In [None]:
# load meta model as mlflow.pyfunc
loaded_model = mlflow.pyfunc.load_model(f"runs:/{model.run_id}/meta_model")

#predict
loaded_model.predict(df_test).show()


It is preferred to use 'applyInPandas' over this API. This API will be deprecated in the future releases. See SPARK-28264 for more details.



+--------------------+----------+-------------------+
|                  id|      date|         prediction|
+--------------------+----------+-------------------+
|FOODS_1_002_TX_1_...|2016-06-06| 0.7062990683439702|
|FOODS_1_002_TX_1_...|2016-06-07| 0.6616471263913692|
|FOODS_1_002_TX_1_...|2016-06-08| 0.6379107324666581|
|FOODS_1_002_TX_1_...|2016-06-09| 0.6336893074922046|
|FOODS_1_002_TX_1_...|2016-06-10| 0.6904708238990559|
|FOODS_1_002_TX_1_...|2016-06-11| 0.7592523916745684|
|FOODS_1_002_TX_1_...|2016-06-12|  0.821500121343374|
|FOODS_1_011_WI_2_...|2016-06-06| 0.7260964554135955|
|FOODS_1_011_WI_2_...|2016-06-07| 0.6577081195362834|
|FOODS_1_011_WI_2_...|2016-06-08| 0.6577081195362834|
|FOODS_1_011_WI_2_...|2016-06-09| 0.9665992657669733|
|FOODS_1_011_WI_2_...|2016-06-10| 0.8815223618176051|
|FOODS_1_011_WI_2_...|2016-06-11|  0.887622059104613|
|FOODS_1_011_WI_2_...|2016-06-12| 1.3184426702033463|
|FOODS_1_026_TX_1_...|2016-06-06|0.47930805741719723|
|FOODS_1_026_TX_1_...|2016-0

In [None]:
model.cv_forecast_graph['FOODS']