🧠 Objectif :
Lancer un entraînement AutoML Forecasting sur Vertex AI à partir de la table BQ préparée

In [None]:
import os
from google.cloud import aiplatform
import yaml
from datetime import datetime


=== 1. Initialisation Vertex AI ===

In [None]:
PROJECT_ID = "avisia-certification-ml-yde"
REGION = "us-central1"
BUCKET_URI = f"gs://{PROJECT_ID}-vertex-bucket"  # adapt if needed
BQ_SOURCE_URI = "bq://avisia-certification-ml-yde.chicago_taxis.demand_by_hour"

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

# === 2. Chargement de la configuration ===

In [None]:
with open("config/pipeline_config.yaml", "r") as f:
    config = yaml.safe_load(f)

forecast_config = config["forecasting"]

# Vérification des clés importantes
time_column = forecast_config["time_column"]
target_column = forecast_config["target_column"]
context_column = forecast_config["context_column"]
forecast_horizon = forecast_config["forecast_horizon"]
window_size = forecast_config["window_size"]

# === 3. Création du dataset time series sur Vertex AI ===


In [None]:
dataset = aiplatform.TimeSeriesDataset.create(
    display_name="chicago_taxi_demand",
    bq_source=BQ_SOURCE_URI,
    time_column_spec_column_name=time_column,
)

print("✅ Dataset créé :", dataset.resource_name)

# === 4. Lancement du job d'entraînement AutoML Forecast ===


In [None]:
training_job = aiplatform.AutoMLForecastingTrainingJob(
    display_name="taxi_demand_forecast_job",
    optimization_objective="minimize-rmse",
    column_transformations=[
        {"auto": [target_column]},
        {"auto": [context_column]},
        {"auto": [time_column]},
        {"auto": forecast_config["available_at_forecast_columns"]},
    ],
    target_column=target_column,
    time_column=time_column,
    time_series_identifier_column=context_column,
    unavailable_at_forecast_columns=forecast_config.get("unavailable_at_forecast_columns", []),
    available_at_forecast_columns=forecast_config["available_at_forecast_columns"],
    forecast_horizon=forecast_horizon,
    context_window=window_size,
    data_granularity_unit="hour",
    data_granularity_count=1,
    export_evaluated_data_items=True,
)

model = training_job.run(
    dataset=dataset,
    training_fraction_split=0.8,
    validation_fraction_split=0.1,
    test_fraction_split=0.1,
    model_display_name=f"taxi_demand_model_{datetime.now().strftime('%Y%m%d_%H%M')}",
    sync=True,
)

print("✅ Modèle entraîné :", model.resource_name)