# Inference notebook

In [1]:
import numpy as np
import requests

In [2]:
API_URL = "http://localhost:8080"  # Uncomment for local testing
# Replace with Cloud Run URL from make get-api-url
# API_URL = "https://rnn-api-ubrff53eoq-ew.a.run.app"

# Use config from model training
WINDOW_SIZE = 168

# Sample input data
PRICE = 6.4
LOAD = 83900.0
PRODUCTION = 78502.0

# Generate noisy sequence data
prices = [PRICE + np.random.normal(0, 0.5) for _ in range(WINDOW_SIZE)]
loads = [LOAD + np.random.normal(0, 1000) for _ in range(WINDOW_SIZE)]
productions = [PRODUCTION + np.random.normal(0, 1000) for _ in range(WINDOW_SIZE)]

payload = {"features": list(zip(prices, loads, productions))}

print(f"Generated payload with {len(payload['features'])} timesteps")

response = requests.post(f"{API_URL}/predict", json=payload)
response.raise_for_status()
prediction = response.json()["predicted_price"]

print(f"Predicted Price: {prediction}")

Generated payload with 168 timesteps
Predicted Price: 25.39238166809082


# Monitoring and Metrics

Check the system metrics (Prometheus) and data drift reports using the following endpoints:

- **Metrics**: `/metrics`
- **Monitoring**: `/monitoring`

In [6]:
try:
    response = requests.get(f"{API_URL}/metrics")
    response.raise_for_status()
    print("Prometheus Metrics found!")
    print("-" * 20)
    # Remove comments and empty lines
    metrics = [line for line in response.text.split("\n") if line and not line.startswith("#")]

    for line in metrics[:20]:
        print(line)

    if len(metrics) > 20:
        print(f"\n... and {len(metrics) - 20} more metrics")

except Exception as e:
    print(f"Could not fetch metrics: {e}")

Prometheus Metrics found!
--------------------
python_gc_objects_collected_total{generation="0"} 9767.0
python_gc_objects_collected_total{generation="1"} 1320.0
python_gc_objects_collected_total{generation="2"} 271.0
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
python_gc_collections_total{generation="0"} 478.0
python_gc_collections_total{generation="1"} 43.0
python_gc_collections_total{generation="2"} 3.0
python_info{implementation="CPython",major="3",minor="13",patchlevel="11",version="3.13.11"} 1.0
process_virtual_memory_bytes 4.03267584e+09
process_resident_memory_bytes 6.71592448e+08
process_start_time_seconds 1.76916327955e+09
process_cpu_seconds_total 13.65
process_open_fds 29.0
process_max_fds 1.048576e+06
http_requests_total{handler="/predict",method="POST",status="2xx"} 1.0
http_requests_total{handler="/monitoring",method="GET",status="2xx"} 2.0
http

In [7]:
import time

print("Generating 15 random predictions to populate logs...")
for i in range(15):
    # Generate random variations
    prices = [PRICE + np.random.normal(0, 0.5) for _ in range(WINDOW_SIZE)]
    loads = [LOAD + np.random.normal(0, 1000) for _ in range(WINDOW_SIZE)]
    productions = [PRODUCTION + np.random.normal(0, 1000) for _ in range(WINDOW_SIZE)]

    payload = {"features": list(zip(prices, loads, productions))}

    try:
        r = requests.post(f"{API_URL}/predict", json=payload)
        r.raise_for_status()
        print(".", end="", flush=True)
    except Exception:
        print("x", end="", flush=True)

    # Small delay to be nice to the server
    time.sleep(0.1)

print("\nDone logging data!")

Generating 15 random predictions to populate logs...
...............
Done logging data!


In [8]:
# Check Data Drift Monitor
# Note: You need to make several predictions first to generate enough log data
try:
    response = requests.get(f"{API_URL}/monitoring")
    if response.status_code == 200:
        print("Monitoring Dashboard is accessible.")
        print(f"Response (HTML) length: {len(response.text)}")
        if "No data logged yet" in response.text:
            print("Status: No data logged yet.")
        elif "Not enough data" in response.text:
            print("Status: Not enough data for report.")
        else:
            print("Status: Report generated.")
    else:
        print(f"Failed to fetch monitoring: Status {response.status_code}")
except Exception as e:
    print(f"Error accessing monitoring: {e}")

Monitoring Dashboard is accessible.
Response (HTML) length: 13930758
Status: Report generated.
