In [38]:
from google.cloud import bigquery
import pandas as pd

In [39]:
## Specify Project Key Metadata

PROJECT_ID = "qwiklabs-gcp-00-d269cced691e"
DATASET_ID = "weather"
RAW_TABLE = "weather_data_raw"
REPORT_TABLE = "weather_data_report"
MODEL_NAME = "flash_model"

GCS_URI = "gs://labs.roitraining.com/data-to-ai-workshop/weather_data.csv"

client = bigquery.Client(project=PROJECT_ID)


In [40]:
## For this practice, it asks for using BigQuery's LLM Capability, so will use Bigquery library instead of pandas

dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET_ID}")
dataset_ref.location = "US"

try:
    client.create_dataset(dataset_ref)
    print("Dataset created.")
except Exception:
    print("Dataset already exists.")


Dataset created.


In [41]:
# Data Ingestion, Directly load from GCS bucket location

table_id = f"{PROJECT_ID}.{DATASET_ID}.{RAW_TABLE}"

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV,
    skip_leading_rows=1,
    autodetect=True
)

load_job = client.load_table_from_uri(
    GCS_URI,
    table_id,
    job_config=job_config
)

load_job.result()
print("Emergency call data loaded.")


Emergency call data loaded.


In [42]:
## Basic Model Data EDA
query = f"""
SELECT *
FROM `{PROJECT_ID}.{DATASET_ID}.{RAW_TABLE}`
LIMIT 10
"""
client.query(query).to_dataframe()


Unnamed: 0,date,city,state,temperature_f,wind_speed_mph,precipitation_in,barometric_pressure_inHg,humidity_percent,weather_condition
0,2025-02-21,Atlanta,GA,55.7,5.0,0.12,29.8,50.4,Cloudy
1,2025-02-26,Atlanta,GA,75.2,10.4,0.03,29.58,49.9,Cloudy
2,2025-03-01,Atlanta,GA,51.7,4.7,0.08,29.74,49.9,Cloudy
3,2025-03-05,Atlanta,GA,74.4,5.1,0.02,29.92,50.4,Cloudy
4,2025-03-10,Atlanta,GA,59.5,9.6,0.09,29.67,57.2,Cloudy
5,2025-03-14,Atlanta,GA,71.7,7.2,0.18,29.92,55.3,Cloudy
6,2025-02-19,Boston,MA,61.7,3.9,0.11,29.62,54.1,Cloudy
7,2025-03-09,Boston,MA,76.7,4.3,0.09,29.52,40.9,Cloudy
8,2025-03-13,Boston,MA,71.9,9.8,0.16,29.99,42.3,Cloudy
9,2025-03-19,Boston,MA,60.7,6.4,0.04,29.83,49.4,Cloudy


In [43]:
## load LLM From Remote Endpoint

## Basic Model Data EDA
query = f"""
CREATE OR REPLACE MODEL `{DATASET_ID}.{MODEL_NAME}`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.0-flash-001');
"""

print(query)
client.query(query).result()


CREATE OR REPLACE MODEL `weather.flash_model`
  REMOTE WITH CONNECTION DEFAULT
  OPTIONS(ENDPOINT = 'gemini-2.0-flash-001');



<google.cloud.bigquery.table._EmptyRowIterator at 0x7e0ff56c6f90>

In [44]:
## Create the weather Report

query = f"""
     CREATE OR REPLACE TABLE `{PROJECT_ID}.{DATASET_ID}.{REPORT_TABLE}` AS
     SELECT
         *
       , JSON_VALUE(ml_generate_text_result, '$.candidates[0].content.parts[0].text') AS weather_report
     FROM ML.GENERATE_TEXT(
       MODEL `{PROJECT_ID}.{DATASET_ID}.{MODEL_NAME}`,
      (
        SELECT CONCAT("Analyze the weather and create a report based on the following data in order: date, city, state, temperature_f, wind_speed_mph, precipitation_in, barometric_pressure_inHg, humidity_percent, weather_condition: ",
               date, city, state, temperature_f, wind_speed_mph, precipitation_in, barometric_pressure_inHg, humidity_percent, weather_condition) as prompt
        FROM `{PROJECT_ID}.{DATASET_ID}.{RAW_TABLE}`),
        STRUCT (0.2 AS temperature,
                1024 AS max_output_tokens,
                0.8 AS top_p,
                40 AS top_k))
"""

results = client.query(query).result()

In [45]:
## Show final results

## Basic Model Data EDA
query = f"""
SELECT *
FROM `{PROJECT_ID}.{DATASET_ID}.{REPORT_TABLE}`
LIMIT 10
"""
client.query(query).to_dataframe()


Unnamed: 0,ml_generate_text_result,ml_generate_text_status,prompt,weather_report
0,"{""candidates"":[{""avg_logprobs"":-0.176171815030...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
1,"{""candidates"":[{""avg_logprobs"":-0.174204832429...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
2,"{""candidates"":[{""avg_logprobs"":-0.169341433250...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
3,"{""candidates"":[{""avg_logprobs"":-0.219321654723...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
4,"{""candidates"":[{""avg_logprobs"":-0.153981772457...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
5,"{""candidates"":[{""avg_logprobs"":-0.201877470050...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
6,"{""candidates"":[{""avg_logprobs"":-0.144523405692...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
7,"{""candidates"":[{""avg_logprobs"":-0.192333414236...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
8,"{""candidates"":[{""avg_logprobs"":-0.128207424230...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
9,"{""candidates"":[{""avg_logprobs"":-0.178764515094...",,Analyze the weather and create a report based ...,"Okay, here's a weather report based on the pro..."
