In [34]:
import google.auth
from google.cloud import bigquery
import requests
import pandas as pd

## Get the project_id
CREDENTIALS, PROJECT_ID = google.auth.default()
print(f"Detected Project ID: {PROJECT_ID}")

## Set various names
DATASET_ID = "bootcamp_challenge5"
TABLE_ID = "airports_data"
FORECAST_TABLE = "airport_weather_forecasts"
REPORT_TABLE = "airport_weather_alerts"

GCS_URI = "gs://labs.roitraining.com/data-to-ai-workshop/airports.csv"

## Connect BigQuery Client and create dataset

client = bigquery.Client(project=PROJECT_ID)

dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET_ID}")
dataset_ref.location = "US"

try:
    client.create_dataset(dataset_ref)
    print("Dataset created.")
except Exception:
    print("Dataset already exists.")

## Ingeset data
table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV,
    skip_leading_rows=1,
    autodetect=True,
)

load_job = client.load_table_from_uri(
    GCS_URI,
    table_id,
    job_config=job_config,
)

load_job.result()
print("Raw data loaded into BigQuery.")

Detected Project ID: qwiklabs-gcp-00-c2e92c8fc9eb
Dataset already exists.
Raw data loaded into BigQuery.


In [35]:
## Load data from BigQuery

query = f"""
SELECT *
FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
"""

df = client.query(query).to_dataframe()
df.head(100)

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,icao_code,iata_code,gps_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total RF Heliport,40.070985,-74.933689,11,,US,US-PA,Bensalem,False,,,K00A,00A,https://www.penndot.pa.gov/TravelInPA/airports...,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435,,US,US-KS,Leoti,False,,,00AA,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.947733,-151.692524,450,,US,US-AK,Anchor Point,False,,,00AK,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820,,US,US-AL,Harvest,False,,,00AL,00AL,,,
4,506791,00AN,small_airport,Katmai Lodge Airport,59.093287,-156.456699,80,,US,US-AK,King Salmon,False,,,00AN,00AN,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,506122,01NR,heliport,McGee 01 Heliport,34.196264,-77.919917,40,,US,US-NC,Wilmington,False,,,01NR,01NR,,,
96,6615,01NV,small_airport,Lantana Ranch Airport,38.763901,-119.027000,4600,,US,US-NV,Yerington,False,,,01NV,01NV,,,
97,6616,01NY,heliport,Vassar Hospital Heliport,41.692415,-73.936830,100,,US,US-NY,Poughkeepsie,False,,,01NY,01NY,,,
98,347920,01OH,heliport,Atrium Medical Center Heliport,39.497455,-84.313851,775,,US,US-OH,Middletown,False,,,01OH,01OH,,,


In [36]:
## Prep the subset of large airports in US

query = f"""
SELECT
  ident,
  name as airport_name,
  latitude_deg,
  longitude_deg
FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
WHERE type = 'large_airport'
  AND latitude_deg IS NOT NULL
  AND longitude_deg IS NOT NULL
  AND iso_country = 'US'
"""

df_airports = client.query(query).to_dataframe()
df_airports.head()

Unnamed: 0,ident,airport_name,latitude_deg,longitude_deg
0,KABQ,Albuquerque International Sunport,35.039976,-106.608925
1,KADW,Joint Base Andrews,38.810799,-76.866997
2,KATL,Hartsfield Jackson Atlanta International Airport,33.6367,-84.428101
3,KAUS,Austin Bergstrom International Airport,30.197535,-97.662015
4,KBDL,Bradley International Airport,41.93851,-72.688066


In [37]:
## API call to pull weather forecast

HEADERS = {
    "User-Agent": "airport-weather-poc"
}

def get_weather_forecast(lat, lon):
    point_url = f"https://api.weather.gov/points/{lat},{lon}"
    point_resp = requests.get(point_url, headers=HEADERS)

    if point_resp.status_code != 200:
        return None

    forecast_url = point_resp.json()["properties"]["forecast"]
    forecast_resp = requests.get(forecast_url, headers=HEADERS)

    if forecast_resp.status_code != 200:
        return None

    # Use first forecast period (current / near-term)
    period = forecast_resp.json()["properties"]["periods"][0]

    return {
        "forecast_name": period["name"],
        "short_forecast": period["shortForecast"],
        "temperature": period["temperature"],
        "wind_speed": period["windSpeed"],
        "detailed_forecast": period["detailedForecast"]
    }

import time

weather_results = []

for _, row in df_airports.iterrows():
    print(f"Fetching forecast for {row.ident}...")
    forecast = get_weather_forecast(row.latitude_deg, row.longitude_deg)

    if forecast:
        weather_results.append({
            "airport_id": row.ident,
            "airport_name": row.airport_name,
            "latitude": row.latitude_deg,
            "longitude": row.longitude_deg,
            **forecast
        })

    time.sleep(0.2)

Fetching forecast for KABQ...
Fetching forecast for KADW...
Fetching forecast for KATL...
Fetching forecast for KAUS...
Fetching forecast for KBDL...
Fetching forecast for KBNA...
Fetching forecast for KBOS...
Fetching forecast for KBUF...
Fetching forecast for KBWI...
Fetching forecast for KCLE...
Fetching forecast for KCLT...
Fetching forecast for KCMH...
Fetching forecast for KCVG...
Fetching forecast for KDCA...
Fetching forecast for KDEN...
Fetching forecast for KDFW...
Fetching forecast for KDTW...
Fetching forecast for KEWR...
Fetching forecast for KFLL...
Fetching forecast for KIAD...
Fetching forecast for KIAH...
Fetching forecast for KIND...
Fetching forecast for KJAX...
Fetching forecast for KJFK...
Fetching forecast for KLAS...
Fetching forecast for KLAX...
Fetching forecast for KLGA...
Fetching forecast for KMCI...
Fetching forecast for KMCO...
Fetching forecast for KMDW...
Fetching forecast for KMEM...
Fetching forecast for KMIA...
Fetching forecast for KMKE...
Fetching f

In [38]:
## Load data into Big Query

df_forecasts = pd.DataFrame(weather_results)

table_id = f"{PROJECT_ID}.{DATASET_ID}.{FORECAST_TABLE}"

job = client.load_table_from_dataframe(
    df_forecasts,
    table_id
)

job.result()
print("Weather forecasts loaded into BigQuery.")

Weather forecasts loaded into BigQuery.


In [39]:
df_forecasts.head()

Unnamed: 0,airport_id,airport_name,latitude,longitude,forecast_name,short_forecast,temperature,wind_speed,detailed_forecast
0,KABQ,Albuquerque International Sunport,35.039976,-106.608925,Today,Sunny,56,5 to 15 mph,"Sunny, with a high near 56. Northwest wind 5 t..."
1,KADW,Joint Base Andrews,38.810799,-76.866997,This Afternoon,Partly Sunny,36,10 mph,"Partly sunny, with a high near 36. Southwest w..."
2,KATL,Hartsfield Jackson Atlanta International Airport,33.6367,-84.428101,This Afternoon,Sunny,52,10 mph,"Sunny, with a high near 52. South wind around ..."
3,KAUS,Austin Bergstrom International Airport,30.197535,-97.662015,Today,Sunny,73,15 mph,"Sunny. High near 73, with temperatures falling..."
4,KBDL,Bradley International Airport,41.93851,-72.688066,Today,Sunny,32,12 to 17 mph,"Sunny, with a high near 32. West wind 12 to 17..."


In [40]:
## Set up connection to Gemini

MODEL_NAME = 'flash_model'

## load LLM From Remote Endpoint
## Basic Model Data EDA
query = f"""
CREATE OR REPLACE MODEL `{DATASET_ID}.{MODEL_NAME}`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.5-flash');
"""

print(query)
client.query(query).result()


CREATE OR REPLACE MODEL `bootcamp_challenge5.flash_model`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.5-flash');



<google.cloud.bigquery.table._EmptyRowIterator at 0x7a8348545640>

In [41]:
## Create the weather report and store the alerts in BigQuery

REPORT_TABLE

query = f"""
     CREATE OR REPLACE TABLE `{PROJECT_ID}.{DATASET_ID}.{REPORT_TABLE}` AS
     SELECT
         *
       , JSON_VALUE(ml_generate_text_result, '$.candidates[0].content.parts[0].text') AS alert
     FROM ML.GENERATE_TEXT(
       MODEL `{PROJECT_ID}.{DATASET_ID}.{MODEL_NAME}`,
      (
        SELECT CONCAT("Analyze the weather data and create an alert: airport_id, airport_name, forecast_name, short_forecast, temperature, wind_speed, detailed_forecast: ",
               airport_id, airport_name, forecast_name, short_forecast, temperature, wind_speed, detailed_forecast) as prompt,
               *
        FROM `{PROJECT_ID}.{DATASET_ID}.{FORECAST_TABLE}`),
        STRUCT (0.2 AS temperature,
                1024 AS max_output_tokens,
                0.8 AS top_p,
                40 AS top_k))
"""

results = client.query(query).result()