In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
from generate_data import generate_energy_provider_data
from snowflake.snowpark.context import get_active_session
session = get_active_session()

## Data & Metadata Generation

In [None]:
generate_energy_provider_data(
    session, 
    num_customers = 1000, 
    start_date =datetime(2025, 7, 1), 
    end_date = datetime(2025, 9, 22)
)

with open('metadata.sql', 'r') as f:
    sql_stmts = f.read()

for sql_stmt in sql_stmts.split(';'):
    if len(sql_stmt) > 0:
        session.sql(sql_stmt).collect()

## View Data

In [None]:
customers_df = session.table('DIM_CUSTOMERS')
customers_df.show(n=3)

contracts_df = session.table('FACT_CONTRACTS')
contracts_df.show(n=3)

readings_df = session.table('FACT_SMART_METER_READINGS')
readings_df.show(n=3)

billing_df = session.table('FACT_BILLINGS')
billing_df.show(n=3)

support_cases_df = session.table('FACT_SUPPORT_CASES')
support_cases_df.show(n=3)

## Search Services for High Cardinality Columns

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE _CA_TARIFF_PLAN
  ON TARIFF_PLAN
  WAREHOUSE = AI_WH
  TARGET_LAG = '1 minute'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
    DISTINCT TARIFF_PLAN
  FROM
    FACT_CONTRACTS
);

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE _CA_CUSTOMER_NAME
  ON CUSTOMER_NAME
  WAREHOUSE = AI_WH
  TARGET_LAG = '1 minute'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
    DISTINCT CUSTOMER_NAME
  FROM
    DIM_CUSTOMERS
);

## Create Semantic View

In [None]:
create or replace semantic view AI_DEVELOPMENT.SI_ENERGY_COMPANY.CUSTOMER_DATA tables (
    DIM_CUSTOMERS primary key (CUSTOMER_ID),
    FACT_BILLINGS primary key (INVOICE_ID),
    FACT_CONTRACTS primary key (CONTRACT_ID),
    FACT_SMART_METER_READINGS primary key (READING_ID),
    FACT_SUPPORT_CASES primary key (CASE_ID)
) relationships (
    CUSTOMERS_X_BILLINGS as FACT_BILLINGS(CUSTOMER_ID) references DIM_CUSTOMERS(CUSTOMER_ID),
    CUSTOMERS_X_CONTRACTS as FACT_CONTRACTS(CUSTOMER_ID) references DIM_CUSTOMERS(CUSTOMER_ID),
    CUSTOMERS_X_SMART_METER_READINGS as FACT_SMART_METER_READINGS(CUSTOMER_ID) references DIM_CUSTOMERS(CUSTOMER_ID),
    CUSTOMERS_X_SUPPORT_CASES as FACT_SUPPORT_CASES(CUSTOMER_ID) references DIM_CUSTOMERS(CUSTOMER_ID)
) facts (
    FACT_BILLINGS.AMOUNT_DUE as AMOUNT_DUE with synonyms =(
        'amount_payable',
        'total_due',
        'outstanding_balance',
        'payment_amount',
        'invoice_total',
        'amount_owing',
        'balance_due',
        'payable_amount'
    ) comment = 'The total amount payable in EUR for the billing period.',
    FACT_SMART_METER_READINGS.KWH_CONSUMPTION as KWH_CONSUMPTION with synonyms =(
        'energy_consumed',
        'total_energy_used',
        'kilowatt_hours_used',
        'power_consumption',
        'energy_usage',
        'total_kwh_used',
        'consumption_in_kwh'
    ) comment = 'The total energy consumed in kilowatt-hours (kWh) during the interval leading up to the timestamp.',
    FACT_SMART_METER_READINGS.KW_GENERATION as KW_GENERATION with synonyms =(
        'solar_power_generation',
        'renewable_energy_output',
        'kilowatt_production',
        'energy_generated',
        'power_produced',
        'solar_panel_output',
        'kw_produced',
        'green_energy_generation'
    ) comment = 'The total energy generated in kilowatts (kW) for customers with solar panels.'
) dimensions (
    DIM_CUSTOMERS.ACCOUNT_STATUS as ACCOUNT_STATUS with synonyms =(
        'account_state',
        'customer_status',
        'subscription_status',
        'account_condition',
        'membership_status',
        'client_status'
    ) comment = 'The current status of the customer''s account (e.g., ''active'', ''suspended'').',
    DIM_CUSTOMERS.ADDRESS as ADDRESS with synonyms =(
        'location',
        'street_address',
        'service_location',
        'customer_location',
        'physical_address',
        'mailing_address',
        'residence',
        'customer_residence'
    ) comment = 'The full street address of the customer''s service location.',
    DIM_CUSTOMERS.CITY as CITY with synonyms =(
        'town',
        'municipality',
        'metropolis',
        'urban_area',
        'locality',
        'settlement',
        'borough'
    ) comment = 'The city of the customer''s service location.',
    DIM_CUSTOMERS.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'customer_key',
        'unique_customer_identifier',
        'client_id',
        'account_number',
        'customer_reference',
        'user_id'
    ) comment = 'The primary key and unique string identifier for each customer (e.g., CID_001001).',
    DIM_CUSTOMERS.CUSTOMER_NAME as CUSTOMER_NAME with synonyms =(
        'customer_full_name',
        'client_name',
        'account_holder_name',
        'account_name',
        'customer_title',
        'full_name',
        'account_owner'
    ) comment = 'The name of the customer.' with cortex search service _CA_CUSTOMER_NAME,
    DIM_CUSTOMERS.CUSTOMER_TYPE as CUSTOMER_TYPE with synonyms =(
        'customer_category',
        'account_type',
        'client_classification',
        'customer_segment',
        'account_classification',
        'client_type'
    ) comment = 'Categorizes the customer as either ''residential'' or ''commercial''.',
    DIM_CUSTOMERS.EMAIL as EMAIL with synonyms =(
        'email_address',
        'contact_email',
        'customer_email',
        'billing_email',
        'communication_email'
    ) comment = 'The customer''s email address for communication and billing.',
    DIM_CUSTOMERS.JOIN_DATE as JOIN_DATE comment = 'The date when the customer first signed up for service with E.ON.',
    DIM_CUSTOMERS.POSTAL_CODE as POSTAL_CODE with synonyms =(
        'zip_code',
        'postcode',
        'zip',
        'postal',
        'mailing_code',
        'geographic_code'
    ) comment = 'The postal code for the customer''s address.',
    FACT_BILLINGS.CONSUMPTION_PERIOD_END as CONSUMPTION_PERIOD_END with synonyms =(
        'billing_period_end',
        'consumption_end_date',
        'end_of_billing_cycle',
        'invoice_period_end',
        'period_end_date',
        'service_end_date'
    ) comment = 'The end date of the billing period for which consumption is being charged.',
    FACT_BILLINGS.CONSUMPTION_PERIOD_START as CONSUMPTION_PERIOD_START with synonyms =(
        'billing_cycle_start',
        'start_date',
        'consumption_start',
        'period_begin',
        'charge_start_date',
        'invoice_start_date'
    ) comment = 'The start date of the billing period for which consumption is being charged.',
    FACT_BILLINGS.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'client_id',
        'account_id',
        'customer_account',
        'client_reference',
        'account_number',
        'customer_reference_id'
    ) comment = 'Foreign key linking to DIM_CUSTOMERS, identifying the customer who received the invoice.',
    FACT_BILLINGS.DUE_DATE as DUE_DATE with synonyms =(
        'payment_deadline',
        'due_by_date',
        'invoice_due_date',
        'payment_due_date',
        'settlement_date',
        'maturity_date'
    ) comment = 'The date by which the payment for the invoice is due.',
    FACT_BILLINGS.INVOICE_DATE as INVOICE_DATE with synonyms =(
        'invoice_generation_date',
        'invoice_issue_date',
        'billing_date',
        'invoice_creation_date',
        'date_invoiced'
    ) comment = 'The date the invoice was generated and issued to the customer.',
    FACT_BILLINGS.INVOICE_ID as INVOICE_ID with synonyms =(
        'invoice_number',
        'invoice_key',
        'billing_id',
        'invoice_identifier',
        'invoice_code',
        'document_id',
        'invoice_reference'
    ) comment = 'The primary key and unique string identifier for each invoice (e.g., INV_0080001).',
    FACT_BILLINGS.PAYMENT_STATUS as PAYMENT_STATUS with synonyms =(
        'payment_state',
        'invoice_status',
        'billing_status',
        'payment_condition',
        'invoice_condition',
        'billing_condition',
        'payment_situation',
        'invoice_situation',
        'billing_situation'
    ) comment = 'The current status of the invoice payment (e.g., ''paid'', ''overdue'', ''pending'').',
    FACT_CONTRACTS.CONTRACT_ID as CONTRACT_ID with synonyms =(
        'contract_key',
        'service_contract_id',
        'agreement_id',
        'contract_identifier',
        'service_agreement_number',
        'unique_contract_code'
    ) comment = 'The primary key and unique string identifier for each service contract (e.g., CON_0005001).',
    FACT_CONTRACTS.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'client_id',
        'account_holder',
        'customer_account',
        'account_number',
        'client_reference',
        'customer_reference'
    ) comment = 'Foreign key linking to DIM_CUSTOMERS, specifying which customer holds the contract.',
    FACT_CONTRACTS.END_DATE as END_DATE with synonyms =(
        'expiration_date',
        'contract_end',
        'termination_date',
        'end_of_contract',
        'contract_expiration',
        'expiry_date'
    ) comment = 'The date on which the service contract is set to expire. NULL indicates an ongoing contract.',
    FACT_CONTRACTS.SERVICE_TYPE as SERVICE_TYPE with synonyms =(
        'energy_service',
        'service_category',
        'contract_type',
        'utility_service',
        'energy_contract_type',
        'service_offering'
    ) comment = 'The type of energy service provided under the contract (e.g., ''electricity'', ''gas'', ''solar panel lease'').',
    FACT_CONTRACTS.START_DATE as START_DATE with synonyms =(
        'effective_date',
        'contract_start',
        'commencement_date',
        'initiation_date',
        'activation_date',
        'begin_date'
    ) comment = 'The date on which the service contract became effective.',
    FACT_CONTRACTS.STATUS as STATUS with synonyms =(
        'contract_state',
        'current_state',
        'state',
        'condition',
        'situation',
        'position',
        'standing',
        'state_of_affairs'
    ) comment = 'The current status of the contract (e.g., ''active'', ''pending renewal'').',
    FACT_CONTRACTS.TARIFF_PLAN as TARIFF_PLAN with synonyms =(
        'pricing_plan',
        'rate_plan',
        'tariff_scheme',
        'rate_schedule',
        'pricing_scheme',
        'energy_plan',
        'service_rate',
        'contract_rate'
    ) comment = 'The specific pricing plan associated with the contract' with cortex search service _CA_TARIFF_PLAN,
    FACT_SMART_METER_READINGS.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'client_id',
        'account_id',
        'user_id',
        'account_number',
        'customer_account',
        'client_account',
        'subscriber_id'
    ) comment = 'Foreign key linking to DIM_CUSTOMERS, identifying the customer associated with the meter reading.',
    FACT_SMART_METER_READINGS.METER_ID as METER_ID with synonyms =(
        'meter_identifier',
        'smart_meter_id',
        'physical_meter_id',
        'meter_serial_number',
        'device_id',
        'meter_reference',
        'meter_number'
    ) comment = 'The unique identifier for the physical smart meter installed at the customer''s location.',
    FACT_SMART_METER_READINGS.READING_ID as READING_ID with synonyms =(
        'meter_reading_id',
        'reading_identifier',
        'smart_meter_id',
        'unique_reading_key',
        'meter_read_key',
        'reading_unique_id'
    ) comment = 'The primary key and unique string identifier for each individual meter reading (e.g., RID_00100001).',
    FACT_SMART_METER_READINGS.TIMESTAMP as TIMESTAMP with synonyms =(
        'date',
        'datetime',
        'record_time',
        'reading_time',
        'log_time',
        'event_time',
        'creation_time',
        'capture_time'
    ) comment = 'The exact date and time when the meter reading was recorded.',
    FACT_SUPPORT_CASES.CASE_ID as CASE_ID with synonyms =(
        'case_number',
        'support_case_id',
        'customer_case_id',
        'issue_id',
        'service_request_id',
        'ticket_id',
        'case_reference',
        'support_ticket_number'
    ) comment = 'The primary key and unique string identifier for each customer support case (e.g., CAS_000101).',
    FACT_SUPPORT_CASES.CASE_DATE as CASE_DATE with synonyms =(
        'case_creation_date',
        'support_case_date',
        'issue_date',
        'case_opening_date',
        'case_initiation_date',
        'support_request_date'
    ) comment = 'The date the support case was created.',
    FACT_SUPPORT_CASES.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'client_id',
        'account_id',
        'user_id',
        'customer_account_number',
        'client_identifier',
        'patron_id',
        'subscriber_id'
    ) comment = 'Foreign key linking to DIM_CUSTOMERS, identifying the customer who initiated the support case.',
    FACT_SUPPORT_CASES.DESCRIPTION as DESCRIPTION with synonyms =(
        'summary',
        'brief',
        'issue_summary',
        'case_summary',
        'case_description',
        'issue_description',
        'brief_summary',
        'case_overview'
    ) comment = 'A brief summary of the customer''s issue or inquiry.',
    FACT_SUPPORT_CASES.ISSUE_TYPE as ISSUE_TYPE with synonyms =(
        'issue_category',
        'problem_type',
        'case_reason',
        'support_request_type',
        'complaint_type',
        'inquiry_category'
    ) comment = 'A category describing the nature of the customer''s issue (e.g., ''billing inquiry'', ''service outage'').',
    FACT_SUPPORT_CASES.RESOLUTION_STATUS as RESOLUTION_STATUS with synonyms =(
        'case_status',
        'issue_status',
        'support_case_status',
        'ticket_status',
        'resolution_stage',
        'case_outcome',
        'issue_resolution',
        'support_resolution',
        'case_progress'
    ) comment = 'The current status of the support case (e.g., ''open'', ''closed'', ''escalated'').'
) with extension (
    CA = '{"tables":[{"name":"DIM_CUSTOMERS","dimensions":[{"name":"ACCOUNT_STATUS","sample_values":["canceled","suspended","active"],"is_enum":true},{"name":"ADDRESS","sample_values":["Anatoli-Herrmann-Straße 555","Pruschkestr. 5/9","Bonbachgasse 7/0"]},{"name":"CITY","sample_values":["Viersen","Hohenmölsen","Paderborn"]},{"name":"CUSTOMER_ID","sample_values":["CID_001123","CID_001162","CID_001174"]},{"name":"CUSTOMER_NAME","sample_values":["Arnfried Faust","Bogdan Hesse","Mira Hettner"]},{"name":"CUSTOMER_TYPE","sample_values":["residential","commercial"],"is_enum":true},{"name":"EMAIL","sample_values":["giovanna.linke@gmx.de","ria.ruppert@gmail.com","alicja.ziegert@web.de"]},{"name":"POSTAL_CODE","sample_values":["10011","18563","88279"]}],"time_dimensions":[{"name":"JOIN_DATE","sample_values":["2024-03-25","2018-11-28","2021-12-18"]}]},{"name":"FACT_BILLINGS","dimensions":[{"name":"CUSTOMER_ID","sample_values":["CID_001044","CID_001060","CID_001001"]},{"name":"INVOICE_ID","sample_values":["INV_0080001","INV_0080190","INV_0080020"]},{"name":"PAYMENT_STATUS","sample_values":["pending","overdue","paid"],"is_enum":true}],"facts":[{"name":"AMOUNT_DUE","sample_values":["114.3","110.82","81.89"]}],"time_dimensions":[{"name":"CONSUMPTION_PERIOD_END","sample_values":["2025-08-31","2025-07-31","2025-09-30"]},{"name":"CONSUMPTION_PERIOD_START","sample_values":["2025-08-01","2025-09-01","2025-07-01"]},{"name":"DUE_DATE","sample_values":["2025-08-19","2025-10-19","2025-09-19"]},{"name":"INVOICE_DATE","sample_values":["2025-08-05","2025-09-05","2025-10-05"]}]},{"name":"FACT_CONTRACTS","dimensions":[{"name":"CONTRACT_ID","sample_values":["CON_0005001","CON_0005002","CON_0005055"]},{"name":"CUSTOMER_ID","sample_values":["CID_001123","CID_001162","CID_001174"]},{"name":"SERVICE_TYPE","sample_values":["electricity","gas","solar panel lease"],"is_enum":true},{"name":"STATUS","sample_values":["expired","pending renewal","active"],"is_enum":true},{"name":"TARIFF_PLAN","sample_values":["Energy Plus Variable","Basic Home","Green Fix","Green Energy Ultra"]}],"time_dimensions":[{"name":"END_DATE","sample_values":["2027-11-24","2027-06-05"]},{"name":"START_DATE","sample_values":["2025-08-15","2025-05-30","2022-04-26"]}]},{"name":"FACT_SMART_METER_READINGS","dimensions":[{"name":"CUSTOMER_ID","sample_values":["CID_001001","CID_001002","CID_001003"]},{"name":"METER_ID","sample_values":["MTR-13349","MTR-13346","MTR-13348"]},{"name":"READING_ID","sample_values":["RID_00100017","RID_00100256","RID_00100186"]}],"facts":[{"name":"KW_GENERATION","sample_values":["0","2.18","4.08"]},{"name":"KWH_CONSUMPTION","sample_values":["13.38","64.4","8.61"]}],"time_dimensions":[{"name":"TIMESTAMP","sample_values":["2025-09-18T19:39:00.000+0000","2025-07-04T01:07:00.000+0000","2025-07-01T06:24:00.000+0000"]}]},{"name":"FACT_SUPPORT_CASES","dimensions":[{"name":"CASE_ID","sample_values":["CAS_000345","CAS_000101","CAS_000189"]},{"name":"CUSTOMER_ID","sample_values":["CID_001208","CID_001063","CID_001741"]},{"name":"DESCRIPTION","sample_values":["Customer inquired about unexpectedly high bill for August.","Customer called with a general query about their service."]},{"name":"ISSUE_TYPE","sample_values":["meter reading issue","service outage","tariff plan query"]},{"name":"RESOLUTION_STATUS","sample_values":["open","closed","escalated"],"is_enum":true}],"time_dimensions":[{"name":"CASE_DATE","sample_values":["2025-08-10","2025-07-19","2025-09-09"]}]}],"relationships":[{"name":"CUSTOMERS_X_BILLINGS"},{"name":"CUSTOMERS_X_CONTRACTS"},{"name":"CUSTOMERS_X_SMART_METER_READINGS"},{"name":"CUSTOMERS_X_SUPPORT_CASES"}],"verified_queries":[{"name":"Which tariff plan generates the most net energy? Calculate this by subtracting total generation from total consumption for all customers on solar panel leases.","question":"Which tariff plan generates the most net energy? Calculate this by subtracting total generation from total consumption for all customers on solar panel leases.","sql":"SELECT\\n    c.TARIff_PLAN,\\n    -- Calculate total generation, consumption, and the resulting net energy\\n    SUM(r.KW_GENERATION) AS TOTAL_GENERATION,\\n    SUM(r.KWH_CONSUMPTION) AS TOTAL_CONSUMPTION,\\n    (SUM(r.KW_GENERATION) - SUM(r.KWH_CONSUMPTION)) AS NET_ENERGY\\nFROM\\n    FACT_SMART_METER_READINGS AS r\\nJOIN\\n    -- Join with contracts to filter by service type and group by tariff plan\\n    FACT_CONTRACTS AS c\\n    ON r.CUSTOMER_ID = c.CUSTOMER_ID\\nWHERE\\n    -- Filter for only solar panel lease contracts\\n    c.SERVICE_TYPE = ''solar panel lease''\\nGROUP BY\\n    c.TARIFF_PLAN\\nORDER BY\\n    -- Order by net energy to find the plan that generates the most\\n    NET_ENERGY DESC;","use_as_onboarding_question":false,"verified_by":"Michael Gorkow","verified_at":1758583097},{"name":"What are my top 10 customers based on revenue?","question":"What are my top 10 customers based on revenue?","sql":"SELECT\\n  c.customer_name,\\n  MIN(b.invoice_date) AS start_date,\\n  MAX(b.invoice_date) AS end_date,\\n  SUM(b.amount_due) AS total_revenue\\nFROM\\n  fact_billings AS b\\n  LEFT OUTER JOIN dim_customers AS c ON b.customer_id = c.customer_id\\nGROUP BY\\n  c.customer_name\\nORDER BY\\n  total_revenue DESC NULLS LAST\\nLIMIT\\n  10","use_as_onboarding_question":false,"verified_by":"Michael Gorkow","verified_at":1758594395}]}'
);

## Scenario: New customers with new Tariffs and a customer complaint

__Cortex Search__ will automatically add the new tariff name to its search service and it will become queriable via Cortex Analyst / Agents.

In [None]:
-- Insert 5 new customers
INSERT INTO AI_DEVELOPMENT.SI_ENERGY_COMPANY.DIM_CUSTOMERS
VALUES
    ('CID_009999', 'Markus Weber', 'markus.weber@email.de', 'Hauptstraße 15', 'Hamburg', '20095', 'residential', 'active', '2025-08-15'),
    ('CID_009998', 'Julia Schmidt', 'j.schmidt@web.de', 'Gartenweg 5', 'München', '80331', 'residential', 'active', '2025-09-01'),
    ('CID_009997', 'Stefan Bauer', 'stefan.b@mailservice.com', 'Schillerstraße 22', 'Frankfurt', '60313', 'commercial', 'active', '2025-07-20'),
    ('CID_009996', 'Anna Keller', 'anna.keller88@email.de', 'Amselweg 1', 'Stuttgart', '70173', 'residential', 'active', '2025-09-10'),
    ('CID_009995', 'David Lehmann', 'david.lehmann@provider.net', 'Birkenallee 45', 'Düsseldorf', '40213', 'residential', 'active', '2025-08-22');

-- Insert 5 new contracts with new tariff
INSERT INTO AI_DEVELOPMENT.SI_ENERGY_COMPANY.FACT_CONTRACTS
VALUES
    ('CON_0009999', 'CID_009999', 'electricity', 'Green Energy Ultra', '2025-08-15', '2026-08-14', 'active'),
    ('CON_0009998', 'CID_009998', 'electricity', 'Green Energy Ultra', '2025-09-01', '2026-08-31', 'active'),
    ('CON_0009997', 'CID_009997', 'electricity', 'Green Energy Ultra', '2025-07-20', '2026-07-19', 'active'),
    ('CON_0009996', 'CID_009996', 'electricity', 'Green Energy Ultra', '2025-09-10', '2026-09-09', 'active'),
    ('CON_0009995', 'CID_009995', 'electricity', 'Green Energy Ultra', '2025-08-22', '2026-08-21', 'active');

-- Insert 4 new billings, one is overdue
INSERT INTO AI_DEVELOPMENT.SI_ENERGY_COMPANY.FACT_BILLINGS
VALUES
    ('INV_0009999', 'CID_009999', '2025-08-05', '2025-08-19', '120.18', 'paid', '2025-07-01', '2025-07-31'),
    ('INV_0009998', 'CID_009999', '2025-09-05', '2025-09-19', '4852.64', 'overdue', '2025-07-01', '2025-07-31'),
    ('INV_0009997', 'CID_009998', '2025-08-05', '2025-08-19', '111.32', 'paid', '2025-08-01', '2025-08-30'),
    ('INV_0009996', 'CID_009998', '2025-09-05', '2025-09-19', '134.34', 'paid', '2025-08-01', '2025-08-30');

-- Insert 1 new customer complaints
INSERT INTO AI_DEVELOPMENT.SI_ENERGY_COMPANY.FACT_SUPPORT_CASES
VALUES
    ('CAS_0009999', 'CID_009999', '2025-09-05', 'billing inquiry', 'escalated', 'Customer complained about unusual high bill.');

## Cortex Search for Unstructured Data

In [None]:
SELECT
  *
FROM 
  DIRECTORY('@CONTRACTS');

In [None]:
CREATE OR REPLACE TABLE CONTRACTS_PDF_EXTRACTIONS AS (
WITH PARSED_DOC AS (
  SELECT 
    RELATIVE_PATH,
    GET_PRESIGNED_URL('@CONTRACTS', RELATIVE_PATH) AS DOCUMENT_URL,
    AI_PARSE_DOCUMENT(to_file(file_url), {'mode': 'layout'}) AS PARSE_DOCUMENT_OUTPUT
  FROM 
    DIRECTORY('@CONTRACTS')
)
SELECT
  RELATIVE_PATH,
  DOCUMENT_URL,
  PARSE_DOCUMENT_OUTPUT['content']::TEXT AS DOCUMENT_CONTENT,
  PARSE_DOCUMENT_OUTPUT['metadata']['pageCount']::INT NUM_DOCUMENT_PAGES
FROM 
  PARSED_DOC
);

SELECT * FROM CONTRACTS_PDF_EXTRACTIONS

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE CUSTOMER_CONTRACTS
  ON DOCUMENT_CONTENT
  ATTRIBUTES RELATIVE_PATH, DOCUMENT_URL
  WAREHOUSE = AI_WH
  TARGET_LAG = '12 hour'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
    *
  FROM
    CONTRACTS_PDF_EXTRACTIONS
);

## Create the Agent

In [None]:
CREATE OR REPLACE AGENT SNOWFLAKE_INTELLIGENCE.AGENTS.ENERGY_COMPANY_AGENT
profile='{"display_name":"Energy Company Agent","avatar":"DiamondIcon","color":"var(--chartDim_3-x11sbcwy)"}'
comment='This agent has access to customer data from an Energy Company.'
FROM SPECIFICATION 
$$
{
  "models": {
    "orchestration": "claude-4-sonnet"
  },
  "instructions": {
    "orchestration": "If you are asked about contracts for specific persons, look up the contract id in the CUSTOMER_DATA_HUB tool before searching the contract using CUSTOMER_CONTRACTS tools.\nWhen sending emails, make sure to provide well formatted content using html.",
    "sample_questions": [
      {
        "question": "What is the percentage of overdue payments per tariff plan? visualize results in a bar chart."
      },
      {
        "question": "Which customers are in our new tariff \"green energy ultra\" and have overdue payments in September? For these customers, list their support cases and the affected revenue as well as their payment terms according to their contract. Also check the customers invoice data for August and compare it against the open invoice. Finally send an executive summary mail to my email address."
      }
    ]
  },
  "tools": [
    {
      "tool_spec": {
        "type": "cortex_analyst_text_to_sql",
        "name": "CUSTOMER_DATA_HUB",
        "description": "This tool provides access to structured information such as \n* customers (name, email, address, account_status)\n* contracts (service type, tariff plan, start date, end date)\n* smart meter readings (KWH_CONSUMPTION & KW_GENERATION)\n* billings (invoices, amounts, due dates)\n* support cases (case data, issue type, resolution)"
      }
    },
    {
      "tool_spec": {
        "type": "cortex_search",
        "name": "CUSTOMER_CONTRACTS",
        "description": "Use this tool to get customer contracts that include detailed information."
      }
    },
    {
      "tool_spec": {
        "type": "generic",
        "name": "send_email",
        "description": "Use this tool to send emails.",
        "input_schema": {
          "type": "object",
          "properties": {
            "recipient": {
              "description": "email address of the recipient.",
              "type": "string"
            },
            "subject": {
              "description": "subject of the email.",
              "type": "string"
            },
            "text": {
              "description": "The text of the email. Supports html code for formatted emails.",
              "type": "string"
            }
          },
          "required": [
            "recipient",
            "subject",
            "text"
          ]
        }
      }
    }
  ],
  "tool_resources": {
    "CUSTOMER_CONTRACTS": {
      "max_results": 4,
      "name": "AI_DEVELOPMENT.SI_ENERGY_COMPANY.CUSTOMER_CONTRACTS"
    },
    "CUSTOMER_DATA_HUB": {
      "execution_environment": {
        "query_timeout": 300,
        "type": "warehouse",
        "warehouse": ""
      },
      "semantic_view": "AI_DEVELOPMENT.SI_ENERGY_COMPANY.CUSTOMER_DATA"
    },
    "send_email": {
      "execution_environment": {
        "type": "warehouse",
        "warehouse": ""
      },
      "identifier": "AI_DEVELOPMENT.PUBLIC.SEND_MAIL",
      "name": "SEND_MAIL(VARCHAR, VARCHAR, VARCHAR)",
      "type": "procedure"
    }
  }
}
$$