In [None]:


import pandas as pd

# Read the file
df = pd.read_csv('gs://labs.roitraining.com/data-to-ai-workshop/weather_data.csv')

# Pull the data schema and data types to determine the required table schema.
print(df.dtypes)

date                         object
city                         object
state                        object
temperature_f               float64
wind_speed_mph              float64
precipitation_in            float64
barometric_pressure_inHg    float64
humidity_percent            float64
weather_condition            object
dtype: object


In [None]:
# Import the file into a BigQuery table

from google.cloud import bigquery

client = bigquery.Client()

# Set dataset reference vars.
project_id = "qwiklabs-gcp-02-949c0486d822"
dataset_id = "dani_data_to_ai_workshop"
table_id = "weather_data"

dataset_ref = bigquery.DatasetReference(project_id, dataset_id)
dataset = bigquery.Dataset(dataset_ref)
client.create_dataset(dataset, exists_ok=True)

# Define the schema
schema = [
    bigquery.SchemaField("date", "STRING"),
    bigquery.SchemaField("city", "STRING"),
    bigquery.SchemaField("state", "STRING"),
    bigquery.SchemaField("temperature_f", "FLOAT"),
    bigquery.SchemaField("wind_speed_mph", "FLOAT"),
    bigquery.SchemaField("precipitation_in", "FLOAT"),
    bigquery.SchemaField("barometric_pressure_inHg", "FLOAT"),
    bigquery.SchemaField("humidity_percent", "FLOAT"),
    bigquery.SchemaField("weather_condition", "STRING"),
]

# Create the table if it doesn't exist.
table_ref = bigquery.TableReference(dataset_ref, table_id)
table = bigquery.Table(table_ref, schema=schema)
client.create_table(table, exists_ok=True)

# Load the data into the table.
job_config = bigquery.LoadJobConfig(
    schema=schema,
    source_format=bigquery.SourceFormat.CSV,
    skip_leading_rows=1,  # Skip the header row
)

job = client.load_table_from_uri(
    'gs://labs.roitraining.com/data-to-ai-workshop/weather_data.csv',
    table,
    location='US',
    job_config=job_config
)

try:
    job.result()  # Wait for the job to complete
    table = client.get_table(table_ref)
    print(f"Loaded {table.num_rows} rows and {len(table.schema)} columns to {table_id}")
except Exception as e:
    print(f"Error loading data: {e}")


# Validate the table was created.
table = client.get_table(table_ref)
print(f"Table {table_id} created with {table.num_rows} rows and {len(table.schema)} columns")




Loaded 600 rows and 9 columns to weather_data
Table weather_data created with 600 rows and 9 columns


In [None]:
# Create a model using BigQuery ML with REMOTE WITH CONNECTION

project_id = "qwiklabs-gcp-02-949c0486d822"
dataset_id = "dani_data_to_ai_workshop"
connection_name = "us-central1.vertex-ai"

# Full connection path for reference
# projects/qwiklabs-gcp-02-949c0486d822/locations/us-central1/connections/vertex-ai

# SQL query to create a Gemini model using REMOTE WITH CONNECTION
# For remote models without fine-tuning, we don't need to provide training data
create_model_query = f"""
CREATE OR REPLACE MODEL `{project_id}.{dataset_id}.weather_gemini_model`
REMOTE WITH CONNECTION `{project_id}.{connection_name}`
OPTIONS (
  endpoint = 'gemini-2.5-flash'
)
"""

# Execute the CREATE MODEL query with location specified - use 'US' to match where dataset was created
job = client.query(create_model_query, location="US")
job.result()  # Wait for the model to be created

try:
    # Wait for the job to complete
    job.result()
    print(f"Successfully created model: {project_id}.{dataset_id}.weather_gemini_model")
except Exception as e:
    print(f"Error creating Gemini model: {e}")


Successfully created model: qwiklabs-gcp-02-949c0486d822.dani_data_to_ai_workshop.weather_gemini_model


In [7]:
# Example of using the Gemini 2.5 model to add weather forecasts and alerts to the weather_data table

# First, let's create a query that uses the weather_data table to generate a context-rich prompt
weather_data_query = """
WITH recent_weather AS (
  SELECT
    date,
    city,
    state,
    temperature_f,
    wind_speed_mph,
    precipitation_in,
    barometric_pressure_inHg,
    humidity_percent,
    weather_condition
  FROM
    `{project_id}.{dataset_id}.weather_data`
  ORDER BY
    date DESC
  LIMIT 20
)
SELECT
  CONCAT(
    'Based on the following recent weather data: ',
    STRING_AGG(
      CONCAT(
        'Date: ', date,
        ', City: ', city,
        ', State: ', state,
        ', Temperature: ', CAST(temperature_f AS STRING), ' F',
        ', Wind: ', CAST(wind_speed_mph AS STRING), ' mph',
        ', Precipitation: ', CAST(precipitation_in AS STRING), ' in',
        ', Pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
        ', Humidity: ', CAST(humidity_percent AS STRING), '%',
        ', Condition: ', weather_condition
      ),
      '. '
    ),
    ' Generate a detailed 7-day weather forecast for each city in the dataset. Include temperature ranges, precipitation chances, and wind conditions for each day.'
  ) AS forecast_prompt,
  CONCAT(
    'Based on the following recent weather data: ',
    STRING_AGG(
      CONCAT(
        'Date: ', date,
        ', City: ', city,
        ', State: ', state,
        ', Temperature: ', CAST(temperature_f AS STRING), ' F',
        ', Wind: ', CAST(wind_speed_mph AS STRING), ' mph',
        ', Precipitation: ', CAST(precipitation_in AS STRING), ' in',
        ', Pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
        ', Humidity: ', CAST(humidity_percent AS STRING), '%',
        ', Condition: ', weather_condition
      ),
      '. '
    ),
    ' Identify any potential weather alerts, warnings, or hazardous conditions that might occur in any of these cities in the next 7 days. Include severity levels and recommended precautions.'
  ) AS alerts_prompt
FROM
  recent_weather
"""

# Format the query with the project and dataset IDs
formatted_weather_query = weather_data_query.format(
    project_id=project_id,
    dataset_id=dataset_id
)

# Execute the query to get data-based prompts
weather_job = client.query(formatted_weather_query, location="US")
weather_results = weather_job.result()

# Get the prompts from the results
forecast_prompt = ""
alerts_prompt = ""
for row in weather_results:
    forecast_prompt = row.forecast_prompt
    alerts_prompt = row.alerts_prompt

print("=== GENERATED PROMPTS ===")
print("Forecast prompt length:", len(forecast_prompt))
print("Alerts prompt length:", len(alerts_prompt))
print("\n")

# Create a query that will generate forecasts and alerts for each row of data
row_based_query = f"""
WITH weather_data_with_prompts AS (
  SELECT
    *,
    CONCAT(
      'Generate a 7-day weather forecast for ', city, ', ', state,
      ' starting from ', date,
      '. Current conditions: Temperature: ', CAST(temperature_f AS STRING), '°F',
      ', Wind speed: ', CAST(wind_speed_mph AS STRING), ' mph',
      ', Precipitation: ', CAST(precipitation_in AS STRING), ' inches',
      ', Barometric pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
      ', Humidity: ', CAST(humidity_percent AS STRING), '%',
      ', Weather condition: ', weather_condition,
      '. Include temperature ranges, precipitation chances, and wind conditions for each day.'
    ) AS forecast_prompt,
    CONCAT(
      'Based on the following weather data for ', city, ', ', state,
      ' on ', date,
      ': Temperature: ', CAST(temperature_f AS STRING), '°F',
      ', Wind speed: ', CAST(wind_speed_mph AS STRING), ' mph',
      ', Precipitation: ', CAST(precipitation_in AS STRING), ' inches',
      ', Barometric pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
      ', Humidity: ', CAST(humidity_percent AS STRING), '%',
      ', Weather condition: ', weather_condition,
      '. Identify any potential weather alerts, warnings, or hazardous conditions that might occur in the next 7 days. Include severity levels and recommended precautions.'
    ) AS alerts_prompt
  FROM
    `{project_id}.{dataset_id}.weather_data`
  LIMIT 5  -- Limiting to 5 rows for testing
)
SELECT
  date, city, state, temperature_f, wind_speed_mph, precipitation_in,
  barometric_pressure_inHg, humidity_percent, weather_condition,
  forecast_prompt, alerts_prompt
FROM
  weather_data_with_prompts
"""

# Execute the query to get data with prompts
row_job = client.query(row_based_query, location="US")
row_results = row_job.result()

# Convert to a list for processing
rows_with_prompts = list(row_results)

print(f"Generated prompts for {len(rows_with_prompts)} rows")

# Sample a prompt to see what it looks like
if rows_with_prompts:
    sample_row = rows_with_prompts[0]
    print("\n=== SAMPLE ROW-SPECIFIC PROMPT ===")
    print(f"City: {sample_row.city}, State: {sample_row.state}, Date: {sample_row.date}")
    print("\nForecast prompt:")
    print(sample_row.forecast_prompt)
    print("\nAlerts prompt:")
    print(sample_row.alerts_prompt)

print("Creating enhanced weather data table with forecasts and alerts...")

# First, check if the enhanced table already exists and delete it if it does
enhanced_table_id = "weather_data_enhanced"
enhanced_table_ref = bigquery.TableReference(dataset_ref, enhanced_table_id)

try:
    client.get_table(enhanced_table_ref)
    client.delete_table(enhanced_table_ref)
    print(f"Deleted existing {enhanced_table_id} table")
except Exception:
    print(f"Table {enhanced_table_id} does not exist yet, will create it")

# Create the enhanced table directly with ML.GENERATE_TEXT in the query
enhanced_query = f"""
CREATE OR REPLACE TABLE `{project_id}.{dataset_id}.{enhanced_table_id}` AS
WITH weather_data_with_prompts AS (
  SELECT
    date,
    city,
    state,
    temperature_f,
    wind_speed_mph,
    precipitation_in,
    barometric_pressure_inHg,
    humidity_percent,
    weather_condition,
    CONCAT(
      'Generate a 7-day weather forecast for ', city, ', ', state,
      ' starting from ', date,
      '. Current conditions: Temperature: ', CAST(temperature_f AS STRING), '°F',
      ', Wind speed: ', CAST(wind_speed_mph AS STRING), ' mph',
      ', Precipitation: ', CAST(precipitation_in AS STRING), ' inches',
      ', Barometric pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
      ', Humidity: ', CAST(humidity_percent AS STRING), '%',
      ', Weather condition: ', weather_condition,
      '. Include temperature ranges, precipitation chances, and wind conditions for each day.'
    ) AS forecast_prompt,
    CONCAT(
      'Based on the following weather data for ', city, ', ', state,
      ' on ', date,
      ': Temperature: ', CAST(temperature_f AS STRING), '°F',
      ', Wind speed: ', CAST(wind_speed_mph AS STRING), ' mph',
      ', Precipitation: ', CAST(precipitation_in AS STRING), ' inches',
      ', Barometric pressure: ', CAST(barometric_pressure_inHg AS STRING), ' inHg',
      ', Humidity: ', CAST(humidity_percent AS STRING), '%',
      ', Weather condition: ', weather_condition,
      '. Identify any potential weather alerts, warnings, or hazardous conditions that might occur in the next 7 days. Include severity levels and recommended precautions.'
    ) AS alerts_prompt
  FROM
    `{project_id}.{dataset_id}.weather_data`
)
-- single query
SELECT
  w.date,
  w.city,
  w.state,
  w.temperature_f,
  w.wind_speed_mph,
  w.precipitation_in,
  w.barometric_pressure_inHg,
  w.humidity_percent,
  w.weather_condition,
  (SELECT ml_generate_text_result FROM ML.GENERATE_TEXT(
    MODEL `{project_id}.{dataset_id}.weather_gemini_model`,
    (SELECT w.forecast_prompt AS prompt),
    STRUCT(0.1 AS temperature, 8192 AS max_output_tokens)
  )) AS weather_forecast,
  (SELECT ml_generate_text_result FROM ML.GENERATE_TEXT(
    MODEL `{project_id}.{dataset_id}.weather_gemini_model`,
    (SELECT w.alerts_prompt AS prompt),
    STRUCT(0.1 AS temperature, 8192 AS max_output_tokens)
  )) AS weather_alerts,
  -- Extract text directly from the model responses
  JSON_VALUE((SELECT ml_generate_text_result FROM ML.GENERATE_TEXT(
    MODEL `{project_id}.{dataset_id}.weather_gemini_model`,
    (SELECT w.forecast_prompt AS prompt),
    STRUCT(0.1 AS temperature, 8192 AS max_output_tokens)
  )), '$.candidates[0].content.parts[0].text') AS weather_forecast_text,
  JSON_VALUE((SELECT ml_generate_text_result FROM ML.GENERATE_TEXT(
    MODEL `{project_id}.{dataset_id}.weather_gemini_model`,
    (SELECT w.alerts_prompt AS prompt),
    STRUCT(0.1 AS temperature, 8192 AS max_output_tokens)
  )), '$.candidates[0].content.parts[0].text') AS weather_alerts_text
FROM
  weather_data_with_prompts w
"""

# Execute the query to create the enhanced table
try:
    create_job = client.query(enhanced_query, location="US")
    create_job.result()
    print("Successfully created enhanced weather data table")
except Exception as e:
    print(f"Error creating enhanced table: {str(e)}")

# Define a function to extract text from JSON responses
import json

def extract_text_from_response(response):
    """Extract the actual text content from a Gemini model response."""
    if response is None:
        return "No content available"

    try:
        # Convert to string if not already
        if isinstance(response, dict):
            response_dict = response
        elif isinstance(response, str):
            if response.strip().startswith('{'):
                try:
                    response_dict = json.loads(response)
                except json.JSONDecodeError:
                    return f"Invalid JSON: {response[:100]}..."
            else:
                return response[:1000]  # If it's just text, return it directly
        else:
            return f"Unknown response type: {type(response)}"

        # Gemini 2.5 Flash format (as seen in the examples)
        if 'candidates' in response_dict and response_dict['candidates']:
            candidate = response_dict['candidates'][0]
            if 'content' in candidate and isinstance(candidate['content'], dict):
                content = candidate['content']
                if 'parts' in content and isinstance(content['parts'], list) and len(content['parts']) > 0:
                    part = content['parts'][0]
                    if 'text' in part:
                        return part['text']

        # If we couldn't extract using the specific path, try a more general approach
        return "Could not extract text from the response. Please check the model output format."

    except Exception as e:
        return f"Error extracting text: {str(e)}"

# Now check the enhanced table with extracted text columns
print("Checking enhanced table with extracted text columns...")

# First check if the enhanced table exists
try:
    client.get_table(f"{project_id}.{dataset_id}.{enhanced_table_id}")
    table_exists = True
    print(f"Table {enhanced_table_id} exists, proceeding with query")
except Exception:
    table_exists = False
    print(f"Table {enhanced_table_id} does not exist yet")

if not table_exists:
    print("Skipping query since enhanced table doesn't exist")
    print("Please run the previous cell successfully before proceeding.")
else:
    # Query the enhanced table to get the text columns
    query = f"""
    SELECT
      date, city, state, temperature_f,
      weather_forecast_text, weather_alerts_text
    FROM
      `{project_id}.{dataset_id}.{enhanced_table_id}`
    LIMIT 5
    """
    print(f"Querying {enhanced_table_id} table for text columns...")

    try:
        query_job = client.query(query, location="US")
        results = query_job.result()

        # Show sample of the data with extracted text
        print("\n=== SAMPLE OF WEATHER DATA WITH EXTRACTED TEXT ===\n")
        for row in results:
            print(f"Date: {row.date}, City: {row.city}, State: {row.state}")
            print(f"Temperature: {row.temperature_f}°F")

            print("\n--- WEATHER FORECAST ---")
            if row.weather_forecast_text:
                forecast_lines = row.weather_forecast_text.split('\n')[:10]
                print('\n'.join(forecast_lines))
                if len(forecast_lines) < len(row.weather_forecast_text.split('\n')):
                    print("... (forecast continues)")
            else:
                print("No forecast text available")

            print("\n--- WEATHER ALERTS ---")
            if row.weather_alerts_text:
                alerts_lines = row.weather_alerts_text.split('\n')[:10]
                print('\n'.join(alerts_lines))
                if len(alerts_lines) < len(row.weather_alerts_text.split('\n')):
                    print("... (alerts continue)")
            else:
                print("No alerts text available")
            print("\n" + "-"*80)

        # Get the row count of the table
        count_query = f"""
        SELECT COUNT(*) as row_count
        FROM `{project_id}.{dataset_id}.{enhanced_table_id}`
        """

        count_job = client.query(count_query, location="US")
        count_results = count_job.result()

        for row in count_results:
            print(f"Enhanced table has {row.row_count} rows with extracted text columns")

    except Exception as e:
        print(f"Error processing results: {str(e)}")

    # If the enhanced table was created successfully, let's display a sample of it
if table_exists:
    try:
        # Verify the new table was created and show a sample
        sample_query = f"""
        SELECT
          date, city, state, temperature_f,
          TO_JSON_STRING(weather_forecast) AS forecast_preview,
          TO_JSON_STRING(weather_alerts) AS alerts_preview
        FROM
          `{project_id}.{dataset_id}.{enhanced_table_id}`
        LIMIT 10
        """

        sample_job = client.query(sample_query, location="US")
        sample_results = sample_job.result()

        print("=== SAMPLE OF ENHANCED WEATHER DATA TABLE ===")
        for row in sample_results:
            print(f"Date: {row.date}, City: {row.city}, State: {row.state}")
            print(f"Temperature: {row.temperature_f}°F")
            print(f"Forecast Preview: {row.forecast_preview}...")
            print(f"Alerts Preview: {row.alerts_preview}...")
            print("---")

        # Get the row count of the new table
        count_query = f"""
        SELECT COUNT(*) as row_count
        FROM `{project_id}.{dataset_id}.{enhanced_table_id}`
        """

        count_job = client.query(count_query, location="US")
        count_results = count_job.result()

        for row in count_results:
            print(f"Successfully created enhanced table with {row.row_count} rows")

        # Check the table head. Query the table and print the first 2 rows with model responses.
        print(f"\n=== TABLE {enhanced_table_id} WITH MODEL RESPONSES ===")
        head_query = f"""
        SELECT
          date, city, state, temperature_f,
          weather_forecast,
          weather_alerts
        FROM `{project_id}.{dataset_id}.{enhanced_table_id}`
        LIMIT 5
        """
        head_job = client.query(head_query, location="US")
        head_results = head_job.result()

        for row in head_results:
            print(f"\nDate: {row.date}, City: {row.city}, State: {row.state}, Temp: {row.temperature_f}°F")
            print("\n--- WEATHER FORECAST ---")
            print(row.weather_forecast)
            print("\n--- WEATHER ALERTS ---")
            print(row.weather_alerts)
            print("\n" + "-"*80)

    except Exception as e:
        print(f"Error displaying enhanced table: {str(e)}")

=== GENERATED PROMPTS ===
Forecast prompt length: 3430
Alerts prompt length: 3458


Generated prompts for 5 rows

=== SAMPLE ROW-SPECIFIC PROMPT ===
City: Atlanta, State: GA, Date: 2025-02-21

Forecast prompt:
Generate a 7-day weather forecast for Atlanta, GA starting from 2025-02-21. Current conditions: Temperature: 55.7°F, Wind speed: 5 mph, Precipitation: 0.12 inches, Barometric pressure: 29.8 inHg, Humidity: 50.4%, Weather condition: Cloudy. Include temperature ranges, precipitation chances, and wind conditions for each day.

Alerts prompt:
Creating enhanced weather data table with forecasts and alerts...
Deleted existing weather_data_enhanced table
Successfully created enhanced weather data table
Checking enhanced table with extracted text columns...
Table weather_data_enhanced exists, proceeding with query
Querying weather_data_enhanced table for text columns...

=== SAMPLE OF WEATHER DATA WITH EXTRACTED TEXT ===

Date: 2025-03-01, City: Boston, State: MA
Temperature: 75.6°F

---