In [None]:
import os
os.environ["CONSUMER_PROJECT_ID"]="PLACEHOLDER_YOUR_CLIENT_PROJECT"
os.environ["NETWORK"]="default" # Assuming your VPC network name
os.environ["PRODUCER_PROJECT_ID"]="PLACEHOLDER_YOUR_PRODUCER_PROJECT"
os.environ["TARGET_PROJECT"]=os.environ["PRODUCER_PROJECT_ID"]
os.environ["TARGET_PROJECT_ID"]=os.environ["PRODUCER_PROJECT_ID"]

# Endpoint 1 (us-central1)
os.environ["REGION_1"]="us-central1"
os.environ["ENDPOINT_ID_1"]="PLACEHOLDER_0000000000000000000"

# --- REPLACE THIS PLACEHOLDER ---
os.environ["SERVICE_ATTACHMENT_1"]=f"projects/PLACEHOLDER_XXXXXXXXXXXXXXXXXX-tp/regions/{os.environ['REGION_1']}/serviceAttachments/gkedpm-PLACEHOLDER_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

os.environ["EXPECTED_HOST_1"]=f"{os.environ['REGION_1']}-aiplatform.googleapis.com"

# Endpoint 2 (us-east4)ac
os.environ["REGION_2"]="us-east4"
os.environ["ENDPOINT_ID_2"]="PLACEHOLDER_111111111111111111"

# --- REPLACE THIS PLACEHOLDER ---
os.environ["SERVICE_ATTACHMENT_2"]=f"projects/PLACEHOLDER_YYYYYYYYYYYYYYYYY-tp/regions/{os.environ['REGION_2']}/serviceAttachments/gkedpm-PLACEHOLDER_YYYYYYYYYYYYYYYYYYYYYYYYYYYYYY"
os.environ["EXPECTED_HOST_2"]=f"{os.environ['REGION_2']}-aiplatform.googleapis.com"
           
# Internal PSC Infrastructure (IPs and Forwarding Rules used by Cloud Run)
os.environ["PSC_IP_NAME_1"] = f"ip-psc-vertex-{os.environ['REGION_1']}"
os.environ["PSC_RULE_NAME_1"] = f"rule-psc-vertex-{os.environ['REGION_1']}"

os.environ["PSC_IP_NAME_2"] = f"ip-psc-vertex-{os.environ['REGION_2']}"
os.environ["PSC_RULE_NAME_2"] = f"rule-psc-vertex-{os.environ['REGION_2']}"

# Cloud Run Service Names ("Smart Routers")
os.environ["ROUTER_SERVICE_1"] = f"vertex-router-{os.environ['REGION_1']}"
os.environ["ROUTER_SERVICE_2"] = f"vertex-router-{os.environ['REGION_2']}"

# Serverless NEGs (Connect LB to Cloud Run)
os.environ["SNEG_1"] = f"sneg-router-{os.environ['REGION_1']}"
os.environ["SNEG_2"] = f"sneg-router-{os.environ['REGION_2']}"

# Global Load Balancer Components
os.environ["LB_NAME"] = "vertex-global-failover-lb"
os.environ["LB_IP_NAME"] = "vertex-galb-ip"
os.environ["LB_CERT_NAME"] = "vertex-galb-self-signed"
os.environ["LB_MAP_NAME"] = "vertex-global-map"
os.environ["LB_BACKEND_NAME"] = "vertex-global-backend"
# Added these for completeness
os.environ["LB_PROXY_NAME"] = "vertex-galb-https-proxy"
os.environ["LB_FWD_RULE"] = "vertex-galb-forwarding-rule"

print("Environment variables set.")
print(f"Project: {os.environ['CONSUMER_PROJECT_ID']}")
print(f"Regions: {os.environ['REGION_1']} & {os.environ['REGION_2']}")

## Create networking infrastructure

In [None]:
%%bash
echo "--- CREATING PSC INFRASTRUCTURE ---"

# Region 1
gcloud compute addresses create ${PSC_IP_NAME_1} --region=${REGION_1} --subnet=${NETWORK} --purpose=GCE_ENDPOINT --quiet
gcloud compute forwarding-rules create ${PSC_RULE_NAME_1} \
    --region=${REGION_1} --network=${NETWORK} --address=${PSC_IP_NAME_1} \
    --target-service-attachment=${SERVICE_ATTACHMENT_1} \
    --allow-psc-global-access --quiet

# Region 2
gcloud compute addresses create ${PSC_IP_NAME_2} --region=${REGION_2} --subnet=${NETWORK} --purpose=GCE_ENDPOINT --quiet
gcloud compute forwarding-rules create ${PSC_RULE_NAME_2} \
    --region=${REGION_2} --network=${NETWORK} --address=${PSC_IP_NAME_2} \
    --target-service-attachment=${SERVICE_ATTACHMENT_2} \
    --allow-psc-global-access --quiet

# Output IPs for verification
IP_1=$(gcloud compute addresses describe ${PSC_IP_NAME_1} --region=${REGION_1} --format="value(address)")
IP_2=$(gcloud compute addresses describe ${PSC_IP_NAME_2} --region=${REGION_2} --format="value(address)")

echo "PSC IPs Created: ${IP_1} (R1), ${IP_2} (R2)"

## Build and deploy CloudRun "smart" router

In [None]:
import os

workdir = "smart_router"
os.makedirs(workdir, exist_ok=True)

app_code = """
import os
import requests
import google.auth
from google.auth.transport.requests import Request
from flask import Flask, request, jsonify
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app = Flask(__name__)

def get_auth_token():
    try:
        credentials, _ = google.auth.default()
        credentials.refresh(Request())
        return credentials.token, None
    except Exception as e:
        return None, str(e)

def call_vertex(ip, region, endpoint_id, payload):
    project_id = os.environ.get("VERTEX_PROJECT_ID") # Using the correct env var
    url = f"https://{ip}/v1/projects/{project_id}/locations/{region}/endpoints/{endpoint_id}:predict"
    
    token, err = get_auth_token()
    if err: return None, f"AUTH_ERROR: {err}"

    headers = {
        "Content-Type": "application/json",
        "Host": f"{region}-aiplatform.googleapis.com",
        "Authorization": f"Bearer {token}"
    }
    
    try:
        # verify=False required for IP connection
        return requests.post(url, json=payload, headers=headers, timeout=10, verify=False), None
    except Exception as e:
        return None, f"CONN_FAIL: {str(e)}"

@app.route('/', defaults={'path': ''}, methods=['POST'])
@app.route('/<path:path>', methods=['POST'])
def router(path):
    payload = request.get_json()
    debug_log = []
    
    # --- PRIMARY ---
    p_ip, p_reg, p_id = os.environ.get("PRIMARY_IP"), os.environ.get("PRIMARY_REGION"), os.environ.get("PRIMARY_ID")
    resp, err = call_vertex(p_ip, p_reg, p_id, payload)
    
    # Check explicitly for None to avoid "False" on 500 errors
    if resp is not None:
        if resp.status_code == 200:
            return jsonify(resp.json()), 200
        # Capture the error body
        debug_log.append(f"Primary ({p_reg}) HTTP {resp.status_code}: {resp.text}")
    else:
        debug_log.append(f"Primary ({p_reg}) Connection Failed: {err}")

    # --- BACKUP ---
    b_ip, b_reg, b_id = os.environ.get("BACKUP_IP"), os.environ.get("BACKUP_REGION"), os.environ.get("BACKUP_ID")
    
    # Only try backup if Primary failed
    resp, err = call_vertex(b_ip, b_reg, b_id, payload)
    
    if resp is not None:
        # Return whatever backup says, even if it's an error, so we see it
        if resp.status_code == 200:
            return jsonify(resp.json()), 200
        
        # If Backup also fails, log it and return 500 with traces
        debug_log.append(f"Backup ({b_reg}) HTTP {resp.status_code}: {resp.text}")
    else:
        debug_log.append(f"Backup ({b_reg}) Connection Failed: {err}")
    
    return jsonify({
        "error": "All regions failed",
        "debug_trace": debug_log,
        "config": {"target_project": os.environ.get("VERTEX_PROJECT_ID")}
    }), 500

if __name__ == "__main__":
    app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
"""

with open(f"{workdir}/main.py", "w") as f: f.write(app_code)
print("Verbose Debug Code Generated.")

In [None]:
%%bash
IP_1=$(gcloud compute addresses describe ${PSC_IP_NAME_1} --region=${REGION_1} --format="value(address)")
IP_2=$(gcloud compute addresses describe ${PSC_IP_NAME_2} --region=${REGION_2} --format="value(address)")

echo "--- DEPLOYING CLOUD RUN (With VPC Access) ---"

# Deploy Region 1
gcloud run deploy ${ROUTER_SERVICE_1} \
    --source=./smart_router \
    --region=${REGION_1} \
    --set-env-vars="CONSUMER_PROJECT_ID=${TARGET_PROJECT},PRIMARY_IP=${IP_1},PRIMARY_REGION=${REGION_1},PRIMARY_ID=${ENDPOINT_ID_1},BACKUP_IP=${IP_2},BACKUP_REGION=${REGION_2},BACKUP_ID=${ENDPOINT_ID_2}" \
    --quiet

# Deploy Region 2
gcloud run deploy ${ROUTER_SERVICE_2} \
    --source=./smart_router \
    --region=${REGION_2} \
    --set-env-vars="CONSUMER_PROJECT_ID=${TARGET_PROJECT},PRIMARY_IP=${IP_2},PRIMARY_REGION=${REGION_2},PRIMARY_ID=${ENDPOINT_ID_2},BACKUP_IP=${IP_1},BACKUP_REGION=${REGION_1},BACKUP_ID=${ENDPOINT_ID_1}" \
    --quiet

# Patch
gcloud run services update ${ROUTER_SERVICE_1} \
    --region=${REGION_1} \
    --update-env-vars="VERTEX_PROJECT_ID=${TARGET_PROJECT_ID}" \
    --quiet

# 2. Update Region 2
gcloud run services update ${ROUTER_SERVICE_2} \
    --region=${REGION_2} \
    --update-env-vars="VERTEX_PROJECT_ID=${TARGET_PROJECT_ID}" \
    --quiet

echo "Deployment Complete."

##  Global Load Balancer Setup

In [None]:
%%bash
echo "--- BUILDING GLOBAL LOAD BALANCER ---"

# 1. Serverless NEGs
gcloud compute network-endpoint-groups create ${SNEG_1} \
    --region=${REGION_1} --network-endpoint-type=SERVERLESS --cloud-run-service=${ROUTER_SERVICE_1} --quiet
gcloud compute network-endpoint-groups create ${SNEG_2} \
    --region=${REGION_2} --network-endpoint-type=SERVERLESS --cloud-run-service=${ROUTER_SERVICE_2} --quiet

# 2. Backend Service (NO PROTOCOL/PORT-NAME)
gcloud compute backend-services create ${LB_BACKEND_NAME} \
    --global --load-balancing-scheme=EXTERNAL_MANAGED --quiet

# 3. Add Backends
gcloud compute backend-services add-backend ${LB_BACKEND_NAME} --global --network-endpoint-group=${SNEG_1} --network-endpoint-group-region=${REGION_1} --quiet
gcloud compute backend-services add-backend ${LB_BACKEND_NAME} --global --network-endpoint-group=${SNEG_2} --network-endpoint-group-region=${REGION_2} --quiet

# 4. URL Map & Cert
gcloud compute url-maps create ${LB_MAP_NAME} --default-service=${LB_BACKEND_NAME} --global --quiet

openssl req -x509 -newkey rsa:2048 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=vertex-failover.com'
gcloud compute ssl-certificates create ${LB_CERT_NAME} --certificate=cert.pem --private-key=key.pem --global --quiet || true
rm key.pem cert.pem

# 5. Frontend
gcloud compute addresses create ${LB_IP_NAME} --global --ip-version=IPV4 --quiet
LB_IP=$(gcloud compute addresses describe ${LB_IP_NAME} --global --format="value(address)")

gcloud compute target-https-proxies create ${LB_PROXY_NAME} --url-map=${LB_MAP_NAME} --ssl-certificates=${LB_CERT_NAME} --global --quiet
gcloud compute forwarding-rules create ${LB_FWD_RULE} --load-balancing-scheme=EXTERNAL_MANAGED --network-tier=PREMIUM --address=${LB_IP} --target-https-proxy=${LB_PROXY_NAME} --ports=443 --global --quiet

echo "Load Balancer Created at ${LB_IP}"

## Test

In [None]:
import subprocess
import json
import requests
import urllib3
import os

urllib3.disable_warnings()

# 1. Get LB IP
lb_ip = subprocess.getoutput(f"gcloud compute addresses describe {os.environ['LB_IP_NAME']} --global --format='value(address)'")
token = subprocess.getoutput("gcloud auth print-identity-token")

print(f"Targeting LB: https://{lb_ip}/predict")

# 2. Payload
payload = {"instances": [[1.0], [2.0], [3.0]]}
#payload = {"instances": [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3]]}

# 3. Send Request
try:
    resp = requests.post(
        f"https://{lb_ip}/predict", 
        json=payload, 
        headers={"Authorization": f"Bearer {token}"},
        verify=False, 
        timeout=10
    )
    
    print(f"Status Code: {resp.status_code}")
    
    if resp.status_code == 200:
        print("\n✅ SUCCESS! Prediction Received:")
        print(json.dumps(resp.json(), indent=2))
    else:
        print("\n❌ FAILED. Debug Info:")
        # Pretty print the error JSON if possible
        try:
            print(json.dumps(resp.json(), indent=2))
        except:
            print(resp.text)

except Exception as e:
    print(f"Connection Error: {e}")