Open
Description
Hi,
I was trying to explore whether we could accelerate using the LF model using cuML, but I was not successful. Is there any chance of successfully adapting the LF model (like they did for RF here successfully) using GPU-based computation resources to reduce the runtime?
#%%time
# !pip install skforecast==0.14.0
# !pip install session_info
# !pip install linear-tree
# !pip install cuml-cu12 --extra-index-url=https://pypi.nvidia.com
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skforecast.direct import ForecasterDirect
from sklearn.linear_model import LinearRegression
from lineartree import LinearForestRegressor
import cuml
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# Parameters
n_records = 100000
drift_rate = 0.001
seasonality_period = 24
start_date = '2010-01-01'
# Create synthetic dataset with positive drift
date_rng = pd.date_range(start=start_date, periods=n_records, freq='h')
np.random.seed(42)
noise = np.random.randn(n_records)
drift = np.cumsum(np.ones(n_records) * drift_rate)
seasonality = np.sin(np.linspace(0, 2 * np.pi, n_records) * (n_records / seasonality_period))
data = noise + drift + seasonality
df = pd.DataFrame(data, index=date_rng, columns=['y'])
# Store runtimes
runtimes = {}
# Run with GPU
print("Running with cuML (GPU)...")
start_time = time.time()
forecaster = ForecasterDirect(
regressor=cuml.ensemble.LinearForestRegressor( #<===== HERE===================
base_estimator=LinearRegression(),
# Explicitly set max_features to a valid value, e.g., 'sqrt'
max_features='sqrt',
n_estimators=10,#200,
max_depth=13,
n_jobs=-1 # Parallelize Random Forest to use all CPU cores
),
steps=100,
lags=100,
n_jobs=1,
)
# Convert target variable to float32 *before* fitting
df['y'] = df['y'].astype(np.float32)
forecaster.fit(y=df['y'])
runtimes["cuML (GPU)"] = time.time() - start_time
print(f"cuML (GPU) runtime: {runtimes['cuML (GPU)']:.2f} seconds\n")
# Print summary
print("=" * 50)
print("Runtime Comparison Summary:")
for backend, time_taken in runtimes.items():
print(f"{backend}: {time_taken:.2f} seconds")
print("=" * 50)
Running with cuML (GPU)...
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
[<ipython-input-17-94c3823248ef>](https://localhost:8080/#) in <cell line: 0>()
39 start_time = time.time()
40 forecaster = ForecasterDirect(
---> 41 regressor=cuml.ensemble.LinearForestRegressor( #<===== HERE===================
42 base_estimator=LinearRegression(),
43
AttributeError: module 'cuml.ensemble' has no attribute 'LinearForestRegressor'
Metadata
Metadata
Assignees
Labels
No labels