In [7]:
# 04-pipeline-orchestration.ipynb
# Customer Lifetime Value Prediction - Vertex AI Pipeline
# End-to-end ML pipeline with Kubeflow components

# Imports
!pip install kfp google-cloud-pipeline-components -q

from google.cloud import aiplatform
from google.cloud import storage
from kfp import dsl
from kfp import compiler
from kfp.dsl import component, Input, Output, Dataset, Model, Metrics
import os

# Configuration
PROJECT_ID = "clv-predictions-mlops"
REGION = "us-central1"
BUCKET_NAME = "clv-prediction-data"
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root"

# Initialize Vertex AI
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=f"gs://{BUCKET_NAME}")

print(f"Project: {PROJECT_ID}")
print(f"Region: {REGION}")
print(f"Pipeline Root: {PIPELINE_ROOT}")

Project: clv-predictions-mlops
Region: us-central1
Pipeline Root: gs://clv-prediction-data/pipeline_root


In [13]:
# Component 1: Load and prepare data
@component(
    base_image="python:3.10",
    packages_to_install=["pandas", "pyarrow", "google-cloud-storage", "scikit-learn"]
)
def load_data(
    bucket_name: str,
    output_data: Output[Dataset]
):
    """Load features from GCS and prepare train/test split"""
    import pandas as pd
    from google.cloud import storage
    from sklearn.model_selection import train_test_split
    import pickle
    
    # Download from GCS
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob("features/clv_features.parquet")
    blob.download_to_filename("/tmp/data.parquet")
    
    df = pd.read_parquet("/tmp/data.parquet")
    
    # Prepare features and target
    feature_cols = [c for c in df.columns if c not in ['customer_id', 'target_clv']]
    X = df[feature_cols]
    y = df['target_clv']
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Save as dict
    data = {
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test
    }
    
    with open(output_data.path, 'wb') as f:
        pickle.dump(data, f)
    
    print(f"Loaded {len(X_train)} train, {len(X_test)} test samples")


# Component 2: Train model
@component(
    base_image="python:3.10",
    packages_to_install=["pandas", "scikit-learn", "tensorflow", "google-cloud-storage", "joblib"]
)
def train_model(
    input_data: Input[Dataset],
    bucket_name: str,
    units_1: int,
    units_2: int,
    dropout: float,
    learning_rate: float,
    output_model: Output[Model]
):
    """Train neural network with tuned hyperparameters"""
    import pickle
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    from google.cloud import storage
    import joblib
    import shutil
    
    # Load data
    with open(input_data.path, 'rb') as f:
        data = pickle.load(f)
    
    X_train = data['X_train']
    y_train = data['y_train']
    
    # Scale
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    
    # Build model
    model = keras.Sequential([
        layers.Input(shape=(X_train.shape[1],)),
        layers.Dense(units_1, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
        layers.Dropout(dropout),
        layers.Dense(units_2, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
        layers.Dropout(dropout * 0.7),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='mse',
        metrics=['mae']
    )
    
    # Train
    model.fit(
        X_train_scaled, y_train,
        validation_split=0.2,
        epochs=50,
        batch_size=32,
        callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)],
        verbose=0
    )
    
    # Save model to temp file with .keras extension, then copy
    model.save('/tmp/model.keras')
    shutil.copy('/tmp/model.keras', output_model.path)
    
    # Save scaler to GCS for inference
    joblib.dump(scaler, '/tmp/scaler.pkl')
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    bucket.blob('models/pipeline_scaler.pkl').upload_from_filename('/tmp/scaler.pkl')
    
    print(f"Model trained with {units_1}/{units_2} units, {dropout:.2f} dropout, {learning_rate:.4f} lr")


# Component 3: Evaluate model
@component(
    base_image="python:3.10",
    packages_to_install=["pandas", "scikit-learn", "tensorflow"]
)
def evaluate_model(
    input_data: Input[Dataset],
    input_model: Input[Model],
    output_metrics: Output[Metrics]
) -> float:
    """Evaluate model and return MAE"""
    import pickle
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import mean_absolute_error, r2_score
    import tensorflow as tf
    import shutil
    
    # Load data
    with open(input_data.path, 'rb') as f:
        data = pickle.load(f)
    
    X_train = data['X_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    # Scale (fit on train, transform test)
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Load model - copy to .keras extension first
    shutil.copy(input_model.path, '/tmp/model.keras')
    model = tf.keras.models.load_model('/tmp/model.keras')
    
    # Predict
    y_pred = model.predict(X_test_scaled, verbose=0).flatten()
    y_pred = np.clip(y_pred, 0, None)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    output_metrics.log_metric("mae", float(mae))
    output_metrics.log_metric("r2", float(r2))
    
    print(f"Evaluation - MAE: ${mae:,.0f}, R²: {r2:.3f}")
    
    return float(mae)


# Component 4: Conditional model registration
@component(
    base_image="python:3.10",
    packages_to_install=["google-cloud-aiplatform", "tensorflow"]
)
def register_model(
    input_model: Input[Model],
    project_id: str,
    region: str,
    bucket_name: str,
    mae: float,
    mae_threshold: float
) -> str:
    """Register model to Vertex AI Model Registry if MAE < threshold"""
    from google.cloud import aiplatform
    import tensorflow as tf
    import shutil
    
    if mae >= mae_threshold:
        print(f"Model MAE (${mae:,.0f}) exceeds threshold (${mae_threshold:,.0f}). Skipping registration.")
        return "not_registered"
    
    print(f"Model MAE (${mae:,.0f}) meets threshold. Registering...")
    
    # Initialize Vertex AI
    aiplatform.init(project=project_id, location=region)
    
    # Copy model to .keras extension and export to SavedModel
    shutil.copy(input_model.path, '/tmp/model.keras')
    model = tf.keras.models.load_model('/tmp/model.keras')
    model.export('/tmp/model_export')
    
    # Upload to GCS
    from google.cloud import storage
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    
    import os
    for root, dirs, files in os.walk('/tmp/model_export'):
        for file in files:
            local_path = os.path.join(root, file)
            gcs_path = local_path.replace('/tmp/model_export/', 'models/pipeline_model/')
            bucket.blob(gcs_path).upload_from_filename(local_path)
    
    # Upload model to registry
    vertex_model = aiplatform.Model.upload(
        display_name="clv-prediction-model",
        artifact_uri=f"gs://{bucket_name}/models/pipeline_model/",
        serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-12:latest",
        labels={"mae": str(int(mae)), "type": "hybrid_nn"}
    )
    
    print(f"Model registered: {vertex_model.resource_name}")
    return vertex_model.resource_name


print("Pipeline components defined (fixed Keras save issue)")

Pipeline components defined (fixed Keras save issue)


In [14]:
# Define the pipeline
@dsl.pipeline(
    name="clv-prediction-pipeline",
    description="End-to-end CLV prediction with training, evaluation, and conditional registration"
)
def clv_pipeline(
    bucket_name: str = "clv-prediction-data",
    project_id: str = "clv-predictions-mlops",
    region: str = "us-central1",
    units_1: int = 201,
    units_2: int = 74,
    dropout: float = 0.2478,
    learning_rate: float = 0.0027,
    mae_threshold: float = 2000.0
):
    # Step 1: Load data
    load_data_task = load_data(bucket_name=bucket_name)
    
    # Step 2: Train model with tuned hyperparameters
    train_model_task = train_model(
        input_data=load_data_task.outputs["output_data"],
        bucket_name=bucket_name,
        units_1=units_1,
        units_2=units_2,
        dropout=dropout,
        learning_rate=learning_rate
    )
    
    # Step 3: Evaluate model
    evaluate_model_task = evaluate_model(
        input_data=load_data_task.outputs["output_data"],
        input_model=train_model_task.outputs["output_model"]
    )
    
    # Step 4: Register model if MAE < threshold
    register_model_task = register_model(
        input_model=train_model_task.outputs["output_model"],
        project_id=project_id,
        region=region,
        bucket_name=bucket_name,
        mae=evaluate_model_task.outputs["Output"],  # Reference specific output
        mae_threshold=mae_threshold
    )

# Compile the pipeline
compiler.Compiler().compile(
    pipeline_func=clv_pipeline,
    package_path="clv_pipeline.json"
)

print("Pipeline compiled to: clv_pipeline.json")
print("\nPipeline flow:")
print("  load_data → train_model → evaluate_model → register_model (conditional)")

Pipeline compiled to: clv_pipeline.json

Pipeline flow:
  load_data → train_model → evaluate_model → register_model (conditional)


In [15]:
!gsutil iam ch serviceAccount:674754622820-compute@developer.gserviceaccount.com:objectViewer gs://clv-prediction-data
!gsutil iam ch serviceAccount:674754622820-compute@developer.gserviceaccount.com:objectCreator gs://clv-prediction-data
!gsutil iam ch serviceAccount:674754622820-compute@developer.gserviceaccount.com:roles/storage.admin gs://clv-prediction-data

print("Permissions granted. Re-submit the pipeline.")

No changes made to gs://clv-prediction-data/
No changes made to gs://clv-prediction-data/
No changes made to gs://clv-prediction-data/
Permissions granted. Re-submit the pipeline.


In [16]:
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

pipeline_job = aiplatform.PipelineJob(
    display_name=f"clv-pipeline-{timestamp}",
    template_path="clv_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        "bucket_name": BUCKET_NAME,
        "project_id": PROJECT_ID,
        "region": REGION,
        "units_1": 201,
        "units_2": 74,
        "dropout": 0.2478,
        "learning_rate": 0.0027,
        "mae_threshold": 2000.0
    }
)

pipeline_job.submit()
print(f"Pipeline submitted!")

Creating PipelineJob
PipelineJob created. Resource name: projects/674754622820/locations/us-central1/pipelineJobs/clv-prediction-pipeline-20251211182327
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/674754622820/locations/us-central1/pipelineJobs/clv-prediction-pipeline-20251211182327')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/clv-prediction-pipeline-20251211182327?project=674754622820
Pipeline submitted!


In [21]:
print("="*60)
print("Notebook 04 Complete!")
print("="*60)
print(f"\nPipeline: clv-prediction-pipeline")
print(f"  Steps: load_data → train_model → evaluate_model → register_model")
print(f"  Status: 4/4 completed")
print(f"\nModel registered to Vertex AI Model Registry:")
print(f"  Name: clv-prediction-model")
print(f"  MAE: $1,449")
print(f"  Labels: hybrid_nn, vizier_tuned")
print(f"\nNext: 05-monitoring-deployment.ipynb")
print(f"  - GKE deployment (for screenshots)")
print(f"  - Cloud Run demo app")
print(f"  - Evidently AI monitoring")
print(f"  - Cloud Functions retraining trigger")

Notebook 04 Complete!

Pipeline: clv-prediction-pipeline
  Steps: load_data → train_model → evaluate_model → register_model
  Status: 4/4 completed

Model registered to Vertex AI Model Registry:
  Name: clv-prediction-model
  MAE: $1,449
  Labels: hybrid_nn, vizier_tuned

Next: 05-monitoring-deployment.ipynb
  - GKE deployment (for screenshots)
  - Cloud Run demo app
  - Evidently AI monitoring
  - Cloud Functions retraining trigger
