Skip to content

How can accelerate Time Series Forecasting with cuML for LF model? #46

Open
@clevilll

Description

@clevilll

Hi,

I was trying to explore whether we could accelerate using the LF model using cuML, but I was not successful. Is there any chance of successfully adapting the LF model (like they did for RF here successfully) using GPU-based computation resources to reduce the runtime?

#%%time
# !pip install skforecast==0.14.0
# !pip install session_info
# !pip install linear-tree
# !pip install cuml-cu12 --extra-index-url=https://pypi.nvidia.com
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skforecast.direct import ForecasterDirect
from sklearn.linear_model import LinearRegression
from lineartree import LinearForestRegressor
import cuml

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Parameters
n_records = 100000
drift_rate = 0.001
seasonality_period = 24
start_date = '2010-01-01'

# Create synthetic dataset with positive drift
date_rng = pd.date_range(start=start_date, periods=n_records, freq='h')
np.random.seed(42)
noise = np.random.randn(n_records)
drift = np.cumsum(np.ones(n_records) * drift_rate)
seasonality = np.sin(np.linspace(0, 2 * np.pi, n_records) * (n_records / seasonality_period))

data = noise + drift + seasonality
df = pd.DataFrame(data, index=date_rng, columns=['y'])

# Store runtimes
runtimes = {}

# Run with GPU
print("Running with cuML (GPU)...")
start_time = time.time()
forecaster = ForecasterDirect(
    regressor=cuml.ensemble.LinearForestRegressor(   #<===== HERE===================
    base_estimator=LinearRegression(),

    # Explicitly set max_features to a valid value, e.g., 'sqrt'
    max_features='sqrt',
    n_estimators=10,#200,
    max_depth=13,
    n_jobs=-1  # Parallelize Random Forest to use all CPU cores
    ),
    steps=100,
    lags=100,
    n_jobs=1,
)

# Convert target variable to float32 *before* fitting
df['y'] = df['y'].astype(np.float32)
forecaster.fit(y=df['y'])
runtimes["cuML (GPU)"] = time.time() - start_time
print(f"cuML (GPU) runtime: {runtimes['cuML (GPU)']:.2f} seconds\n")


# Print summary
print("=" * 50)
print("Runtime Comparison Summary:")
for backend, time_taken in runtimes.items():
    print(f"{backend}: {time_taken:.2f} seconds")
print("=" * 50)
Running with cuML (GPU)...
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
[<ipython-input-17-94c3823248ef>](https://localhost:8080/#) in <cell line: 0>()
     39 start_time = time.time()
     40 forecaster = ForecasterDirect(
---> 41     regressor=cuml.ensemble.LinearForestRegressor(   #<===== HERE===================
     42     base_estimator=LinearRegression(),
     43 

AttributeError: module 'cuml.ensemble' has no attribute 'LinearForestRegressor'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions