# Auto-Sklearn API: Regression Documentation

This notebook documents the API for `AutoSklearnRegressor`.

**Goal:** Demonstrate the standard usage of the tool using a generic dataset:
1.  `fit()`: Training a regression pipeline.
2.  `leaderboard()`: Inspecting the top regression models found.
3.  `sprint_statistics()`: Viewing optimization performance.
4.  `predict()`: Generating continuous predictions.

In [None]:
import autosklearn.regression
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import pandas as pd

# 1. Load a standard Regression dataset (California Housing)
X, y = sklearn.datasets.fetch_california_housing(return_X_y=True, as_frame=True)

# 2. Split the data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=42, test_size=0.2
)

print(f"Training Shape: {X_train.shape}")
print(f"Testing Shape:  {X_test.shape}")

In [None]:
# Initialize the AutoML Regressor
# We use n_jobs=1 for compatibility with Mac M-Series chips
automl = autosklearn.regression.AutoSklearnRegressor(
    time_left_for_this_task=120,  # Short time for API demo
    per_run_time_limit=30,
    n_jobs=1,                     
    seed=1
)

# Start the search process
print("Starting AutoML Regression Search...")
automl.fit(X_train, y_train)
print("Search Complete.")

In [None]:
# 1. Sprint Statistics: High-level summary of the run
print("--- Sprint Statistics ---")
print(automl.sprint_statistics())

# 2. Leaderboard: Top performing regression models found
print("\n--- Leaderboard ---")
print(automl.leaderboard())

In [None]:
# 3. Show Models: View the internal structure of the final ensemble
# This shows exactly which algorithms (Random Forest, SVM, etc.) were chosen
ensemble_structure = automl.show_models()

# Print a snippet of the model definitions
print(str(ensemble_structure)[:1000] + "...")

In [None]:
# 4. Predict: Generate predictions on unseen data
y_pred = automl.predict(X_test)

# Evaluate using standard Regression metrics
r2 = sklearn.metrics.r2_score(y_test, y_pred)
mae = sklearn.metrics.mean_absolute_error(y_test, y_pred)

print(f"R2 Score: {r2:.4f}")
print(f"MAE:      {mae:.4f}")