<a href="https://colab.research.google.com/github/brandonmoss124/mgmt467-analytics-portfolio/blob/main/Lab7_Batch_Pipeline_(3).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 7: Automated Batch Pipeline (Cloud Functions + Scheduler)

This notebook

## 1. Setup & Authentication

Install required libraries and authenticate to Google Cloud.

In [1]:
# Install libraries (if needed in Colab)
!pip install -q google-cloud-bigquery requests functions-framework

from google.colab import auth  # comment out if not using Colab
auth.authenticate_user()  # this opens a browser window in Colab

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.8/55.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h

## 2. Configuration



In [2]:
import os
from google.cloud import bigquery

# TODO: update these values for your environment
PROJECT_ID = "proven-agility-477721-q9"  # e.g. 'mgmt-467-xxxx'
DATASET_ID = "superstore_data"
TABLE_ID = f"{PROJECT_ID}.{DATASET_ID}.realtime_weather"

os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

bq_client = bigquery.Client(project=PROJECT_ID)

## 3. Extract step – call the OpenWeatherMap API



In [3]:
# === API KEY SETUP (Colab only, DO NOT commit real key to GitHub) ===
import os

# This cell will PROMPT you for your OpenWeather API key at runtime,
# so you don't have to hard-code it in the notebook.

OPENWEATHER_API_KEY = input("Enter your OpenWeather API key (will not be saved in the file): ")
os.environ["OPENWEATHER_API_KEY"] = OPENWEATHER_API_KEY.strip()

print("OPENWEATHER_API_KEY is set?", "OPENWEATHER_API_KEY" in os.environ)


Enter your OpenWeather API key (will not be saved in the file): ec00a8d20fb1817148e8f1c375cfd219
OPENWEATHER_API_KEY is set? True


In [4]:
import requests
import logging
import os

# ensure OPENWEATHER_API_KEY is set as an environment variable in your runtime
os.environ.setdefault("OPENWEATHER_API_KEY", "REPLACE_WITH_YOUR_KEY_OR_USE_SECRETS")

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def fetch_weather_lafayette():
    """Fetch current weather for Lafayette, IN from OpenWeatherMap.
    You must set the OPENWEATHER_API_KEY environment variable.
    Includes basic error handling so the notebook does not crash
    if the API key is invalid or the request fails.
    """
    api_key = os.environ["OPENWEATHER_API_KEY"]
    city = "Lafayette,IN,US"
    url = "https://api.openweathermap.org/data/2.5/weather"

    params = {
        "q": city,
        "appid": api_key,
        "units": "metric",
    }

    try:
        response = requests.get(url, params=params, timeout=30)
        response.raise_for_status()  # will raise if HTTP error
        logger.info("Weather API call succeeded")
        return response.json()
    except requests.exceptions.RequestException as e:
        logger.error(f"Weather API call failed: {e}")
        return None

# Quick manual test (optional in Colab)
sample_weather = fetch_weather_lafayette()
if sample_weather is not None:
    sample_weather
else:
    print("No weather data returned. Check your OPENWEATHER_API_KEY or network connectivity.")


## 4. Load step



In [5]:
from datetime import datetime, timezone

def transform_weather_for_bq(weather_json):
    """Transform the raw OpenWeatherMap JSON into a list of rows
    compatible with your BigQuery table schema.

    TODO: Ensure this matches the schema you created in BigQuery.
    """
    return [{
        "city": weather_json.get("name"),
        "temp_c": weather_json["main"]["temp"],
        "humidity": weather_json["main"]["humidity"],
        "weather_main": weather_json["weather"][0]["main"],
        "weather_desc": weather_json["weather"][0]["description"],
        "dt_utc": datetime.fromtimestamp(weather_json["dt"], tz=timezone.utc).isoformat(),
    }]

def load_weather_to_bq(weather_json):
    rows_to_insert = transform_weather_for_bq(weather_json)
    errors = bq_client.insert_rows_json(TABLE_ID, rows_to_insert)
    if errors:
        raise RuntimeError(f"BigQuery insert errors: {errors}")

# Optional manual test
# load_weather_to_bq(sample_weather)

## 5. Cloud Function entrypoint (HTTP)



In [6]:
import functions_framework
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@functions_framework.http
def ingest_weather(request):
    """HTTP Cloud Function that fetches weather and loads it into BigQuery.

    TODO: In a later step, add robust error-handling with try/except.
    """
    logger.info("Starting weather ingest.")
    weather_json = fetch_weather_lafayette()
    load_weather_to_bq(weather_json)
    logger.info("Ingest complete.")
    return ("OK: data loaded", 200)

## 6. Challenge: Error handling prompt


Ask Gemini:
"Please show me how to add a simple try/except block with `requests.exceptions.RequestException` and logging to my `ingest_weather` Cloud Function. I want it to log an error if the OpenWeatherMap API call fails and return an HTTP 500 status, and otherwise log success and return 200 when the data is loaded into BigQuery."
``` :contentReference[oaicite:0]{index=0}


