In [1]:
from google.cloud import bigquery
import requests

In [2]:
## Specify Project Key Metadata

PROJECT_ID = "qwiklabs-gcp-00-d269cced691e"
DATASET_ID = "airport_data"
TABLE_ID = "airports"
FORECAST_TABLE = "airport_weather_forecasts"
REPORT_TABLE = "airport_weather_alert"

GCS_URI = "gs://labs.roitraining.com/data-to-ai-workshop/airports.csv"

client = bigquery.Client(project=PROJECT_ID)


In [3]:
## Step 1: Create Airport Data

dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET_ID}")
dataset_ref.location = "US"

try:
    client.create_dataset(dataset_ref)
    print("Dataset created.")
except Exception:
    print("Dataset already exists.")

Dataset already exists.


In [4]:
table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV,
    skip_leading_rows=1,
    autodetect=True,
)

load_job = client.load_table_from_uri(
    GCS_URI,
    table_ref,
    job_config=job_config
)

load_job.result()  # Wait for job to complete
print("CSV loaded into BigQuery.")


CSV loaded into BigQuery.


In [5]:
## Check Availability / Successful Ingestion
query = f"""
SELECT *
FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
LIMIT 10
"""

df = client.query(query).to_dataframe()
df


Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,icao_code,iata_code,gps_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total RF Heliport,40.070985,-74.933689,11,,US,US-PA,Bensalem,False,,,K00A,00A,https://www.penndot.pa.gov/TravelInPA/airports...,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435,,US,US-KS,Leoti,False,,,00AA,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.947733,-151.692524,450,,US,US-AK,Anchor Point,False,,,00AK,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820,,US,US-AL,Harvest,False,,,00AL,00AL,,,
4,506791,00AN,small_airport,Katmai Lodge Airport,59.093287,-156.456699,80,,US,US-AK,King Salmon,False,,,00AN,00AN,,,
5,322127,00AS,small_airport,Fulton Airport,34.942803,-97.818019,1100,,US,US-OK,Alex,False,,,00AS,00AS,,,
6,6527,00AZ,small_airport,Cordes Airport,34.305599,-112.165001,3810,,US,US-AZ,Cordes,False,,,00AZ,00AZ,,,
7,6528,00CA,small_airport,Goldstone (GTS) Airport,35.35474,-116.885329,3038,,US,US-CA,Barstow,False,,,00CA,00CA,,https://en.wikipedia.org/wiki/Goldstone_Gts_Ai...,
8,324424,00CL,small_airport,Williams Ag Airport,39.427188,-121.763427,87,,US,US-CA,Biggs,False,,,00CL,00CL,,,
9,322658,00CN,heliport,Kitchen Creek Helibase Heliport,32.727374,-116.459742,3350,,US,US-CA,Pine Valley,False,,,00CN,00CN,,,


In [6]:
## Step 2: Call API to pull data

In [7]:
query = f"""
SELECT
  ident,
  name as airport_name,
  latitude_deg,
  longitude_deg
FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
WHERE type = 'large_airport'
  AND latitude_deg IS NOT NULL
  AND longitude_deg IS NOT NULL
  AND iso_country = 'US'
"""

df_airports = client.query(query).to_dataframe()
df_airports.head()

Unnamed: 0,ident,airport_name,latitude_deg,longitude_deg
0,KABQ,Albuquerque International Sunport,35.039976,-106.608925
1,KADW,Joint Base Andrews,38.810799,-76.866997
2,KATL,Hartsfield Jackson Atlanta International Airport,33.6367,-84.428101
3,KAUS,Austin Bergstrom International Airport,30.197535,-97.662015
4,KBDL,Bradley International Airport,41.93851,-72.688066


In [8]:
HEADERS = {
    "User-Agent": "airport-weather-poc"
}


In [9]:
def get_weather_forecast(lat, lon):
    point_url = f"https://api.weather.gov/points/{lat},{lon}"
    point_resp = requests.get(point_url, headers=HEADERS)

    if point_resp.status_code != 200:
        return None

    forecast_url = point_resp.json()["properties"]["forecast"]
    forecast_resp = requests.get(forecast_url, headers=HEADERS)

    if forecast_resp.status_code != 200:
        return None

    # Use first forecast period (current / near-term)
    period = forecast_resp.json()["properties"]["periods"][0]

    return {
        "forecast_name": period["name"],
        "short_forecast": period["shortForecast"],
        "temperature": period["temperature"],
        "wind_speed": period["windSpeed"],
        "detailed_forecast": period["detailedForecast"]
    }


In [10]:
import time

weather_results = []

for _, row in df_airports.iterrows():
    print(f"Fetching forecast for {row.ident}...")
    forecast = get_weather_forecast(row.latitude_deg, row.longitude_deg)

    if forecast:
        weather_results.append({
            "airport_id": row.ident,
            "airport_name": row.airport_name,
            "latitude": row.latitude_deg,
            "longitude": row.longitude_deg,
            **forecast
        })

    time.sleep(0.2)  # Be polite to the NWS API


Fetching forecast for KABQ...
Fetching forecast for KADW...
Fetching forecast for KATL...
Fetching forecast for KAUS...
Fetching forecast for KBDL...
Fetching forecast for KBNA...
Fetching forecast for KBOS...
Fetching forecast for KBUF...
Fetching forecast for KBWI...
Fetching forecast for KCLE...
Fetching forecast for KCLT...
Fetching forecast for KCMH...
Fetching forecast for KCVG...
Fetching forecast for KDCA...
Fetching forecast for KDEN...
Fetching forecast for KDFW...
Fetching forecast for KDTW...
Fetching forecast for KEWR...
Fetching forecast for KFLL...
Fetching forecast for KIAD...
Fetching forecast for KIAH...
Fetching forecast for KIND...
Fetching forecast for KJAX...
Fetching forecast for KJFK...
Fetching forecast for KLAS...
Fetching forecast for KLAX...
Fetching forecast for KLGA...
Fetching forecast for KMCI...
Fetching forecast for KMCO...
Fetching forecast for KMDW...
Fetching forecast for KMEM...
Fetching forecast for KMIA...
Fetching forecast for KMKE...
Fetching f

In [11]:
## Create Data Frame -

import pandas as pd

df_forecasts = pd.DataFrame(weather_results)
df_forecasts.head()

table_id = f"{PROJECT_ID}.{DATASET_ID}.{FORECAST_TABLE}"

job = client.load_table_from_dataframe(
    df_forecasts,
    table_id
)

job.result()
print("Weather forecasts loaded into BigQuery.")

Weather forecasts loaded into BigQuery.


In [12]:
## Step 3, Use Gemini to Create an alert for each airport and store the alerts in BQ

MODEL_NAME = 'flash_model'

## load LLM From Remote Endpoint
## Basic Model Data EDA
query = f"""
CREATE OR REPLACE MODEL `{DATASET_ID}.{MODEL_NAME}`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.0-flash-001');
"""

print(query)
client.query(query).result()


CREATE OR REPLACE MODEL `airport_data.flash_model`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.0-flash-001');



<google.cloud.bigquery.table._EmptyRowIterator at 0x7c2c8e6bae70>

In [13]:
## Create the weather Report

REPORT_TABLE

query = f"""
     CREATE OR REPLACE TABLE `{PROJECT_ID}.{DATASET_ID}.{REPORT_TABLE}` AS
     SELECT
         *
       , JSON_VALUE(ml_generate_text_result, '$.candidates[0].content.parts[0].text') AS alert
     FROM ML.GENERATE_TEXT(
       MODEL `{PROJECT_ID}.{DATASET_ID}.{MODEL_NAME}`,
      (
        SELECT CONCAT("Analyze the weather data and create an alert: airport_id, airport_name, forecast_name, short_forecast, temperature, wind_speed, detailed_forecast: ",
               airport_id, airport_name, forecast_name, short_forecast, temperature, wind_speed, detailed_forecast) as prompt,
               *
        FROM `{PROJECT_ID}.{DATASET_ID}.{FORECAST_TABLE}`),
        STRUCT (0.2 AS temperature,
                1024 AS max_output_tokens,
                0.8 AS top_p,
                40 AS top_k))
"""

results = client.query(query).result()