In [1]:
import os
import sys
from pathlib import Path
# Add project root to sys.path
project_root = Path().resolve().parent  # one level up from notebooks/
sys.path.append(str(project_root))

import time
from datetime import datetime
import pandas as pd
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client(project='trilink-472019')
# Load the extension
%load_ext google.cloud.bigquery
%load_ext bigquery_magics

from src.calltranscripts_simulation import EnhancedCallCenterDataGenerator



In [4]:
customers_df = client.query("select * from `trilink-472019.database.customer_df`").to_dataframe()
internet_df = client.query("select * from `trilink-472019.database.internet_df`").to_dataframe()
mobile_df = client.query("select * from `trilink-472019.database.mobile_df`").to_dataframe()
security_df = client.query("select * from `trilink-472019.database.security_df`").to_dataframe()

##### Running the simulation for 2500 call transcripts

- Different simulation scenarios as in module
- Calls from 2025-01-01 to 2025-09-01
- Actual customer datapoints added in the simulation

In [6]:
generator = EnhancedCallCenterDataGenerator(customers_df, internet_df, mobile_df, security_df)
calls_df = generator.generate_call_dataset(start_date="2025-01-01", end_date="2025-09-01", num_calls=2500)
calls_df.to_csv("..//data//call_transcripts_v1.csv",index=False)

# calls_df = CallCenterDataGenerator.create_trilink_call_dataset(
#     customers_df, internet_df, mobile_df, security_df,
#     start_date="2025-07-01", 
#     end_date="2025-09-01",
#     num_calls=1000,
#     scenario_weights={'complaint_resolution': 0.12, 'cross_sell_mobile': 0.15}  # Custom weights
# )


🎯 ENHANCED CALL GENERATION STARTING
📊 Target: 2500 calls from 2025-01-01 to 2025-09-01
🤖 Using Two-Stage Approach:
   Stage 1: Natural transcripts (Gemini 2.5 Flash)
   Stage 2: Performance evaluation (Gemini 2.5 Flash Lite)
🔍 Fast analysis of 100000 customers...
✅ Found 93622 customers with services
   📡 Internet: 72756 customers
   📱 Mobile: 74395 customers
   🏠 Security: 43819 customers
   Building context 0/5000...
   Building context 100/5000...
   Building context 200/5000...
   Building context 300/5000...
   Building context 400/5000...
   Building context 500/5000...
   Building context 600/5000...
   Building context 700/5000...
   Building context 800/5000...
   Building context 900/5000...
   Building context 1000/5000...
   Building context 1100/5000...
   Building context 1200/5000...
   Building context 1300/5000...
   Building context 1400/5000...
   Building context 1500/5000...
   Building context 1600/5000...
   Building context 1700/5000...
   Building context 1800

In [28]:
calls_df=pd.read_csv("..//data//call_transcripts_v1.csv")

In [6]:
calls_df.head(5)

Unnamed: 0,call_id,customer_id,call_date,call_time,agent_id,agent_name,primary_scenario,call_transcript,overall_rating,call_successful,customer_monthly_spend,customer_service_count,customer_issue_history
0,CALL_000001,C00077940,2025-04-26,12:08,agent_005,Lisa Wang,payment_assistance,**Call Transcript**\n\n**Date:** 2025-04-26\n*...,5,True,173,2,18
1,CALL_000002,C00050897,2025-01-31,14:51,agent_007,Jennifer Davis,billing_inquiry,**Call Transcript**\n\n**Date:** 2025-01-31\n*...,5,True,182,2,0
2,CALL_000003,C00062906,2025-08-26,16:46,agent_008,Robert Kim,contract_renewal,**TriLink Telecom Customer Service Call Transc...,5,False,176,2,0
3,CALL_000004,C00077227,2025-06-13,17:59,agent_006,Michael Brown,technical_support,**Call Transcript: TriLink Telecom Customer Se...,7,True,100,1,3
4,CALL_000005,C00012668,2025-08-13,11:40,agent_004,James Thompson,cross_sell_security,**TriLink Telecom Customer Service Call Transc...,4,False,36,1,3


##### Displaying some of the calls that were simulated

In [7]:
print(calls_df.iloc[28])
print(calls_df['call_transcript'][28])

call_id                                                         CALL_000029
customer_id                                                       C00033257
call_date                                                        2025-02-07
call_time                                                             09:11
agent_id                                                          agent_002
agent_name                                                       David Chen
primary_scenario                                         payment_assistance
call_transcript           **Call Transcript - TriLink Telecom**\n\n**Dat...
overall_rating                                                            6
call_successful                                                        True
customer_monthly_spend                                                  182
customer_service_count                                                    2
customer_issue_history                                                    2
Name: 28, dt

In [8]:
print(calls_df.iloc[354])
print(calls_df['call_transcript'][354])

call_id                                                         CALL_000355
customer_id                                                       C00000029
call_date                                                        2025-03-29
call_time                                                             14:05
agent_id                                                          agent_010
agent_name                                                  Carlos Martinez
primary_scenario                                       complaint_resolution
call_transcript           **TriLink Telecom Customer Service Call Transc...
overall_rating                                                            7
call_successful                                                        True
customer_monthly_spend                                                  117
customer_service_count                                                    2
customer_issue_history                                                    4
Name: 354, d

In [9]:
print(calls_df.iloc[1785])
print(calls_df['call_transcript'][1785])

call_id                                                         CALL_001786
customer_id                                                       C00043315
call_date                                                        2025-03-09
call_time                                                             15:49
agent_id                                                          agent_010
agent_name                                                  Carlos Martinez
primary_scenario                                       complaint_resolution
call_transcript           **TriLink Telecom Customer Service Call Transc...
overall_rating                                                            5
call_successful                                                       False
customer_monthly_spend                                                  170
customer_service_count                                                    2
customer_issue_history                                                    5
Name: 1785, 

##### Create the BigQuery Table for CallLogs

In [29]:
job1 = client.load_table_from_dataframe(calls_df, "trilink-472019.database.call_transcripts_raw")
job1.result()
print(f"✅ Uploaded {len(calls_df)} rows to BigQuery!")

✅ Uploaded 2500 rows to BigQuery!


- Some EDA on the call transcripts data 

In [38]:
%%bigquery
--Get overall Rating Freq across the data
select count(*) as call_cnt,overall_rating
from trilink-472019.database.call_transcripts_raw
group by overall_rating;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,call_cnt,overall_rating
0,11,3
1,247,4
2,548,5
3,1083,6
4,562,7
5,49,8


In [40]:
%%bigquery
--Get the number of calls per month
SELECT 
  COUNT(*) as call_cnt,
  EXTRACT(YEAR FROM DATE(call_date)) as yr,
  EXTRACT(MONTH FROM DATE(call_date)) as mnth 
FROM `trilink-472019.database.call_transcripts_raw` 
GROUP BY 
  EXTRACT(YEAR FROM DATE(call_date)),
  EXTRACT(MONTH FROM DATE(call_date))
ORDER BY yr, mnth;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,call_cnt,yr,mnth
0,329,2025,1
1,295,2025,2
2,303,2025,3
3,316,2025,4
4,319,2025,5
5,307,2025,6
6,297,2025,7
7,327,2025,8
8,7,2025,9


- We need to create a embedding enpoint connection first for our project to use
- We will create this for text embeddings now where we create text_embedding_005_model connection for the text-embedding-005 model
- Before this we need to create a connection in bigqiuery for the vertex AI models and let's name it us.vertex-ai-connection

In [21]:
# Create the connection
!bq mk --connection --location=us --project_id=trilink-472019 --connection_type=CLOUD_RESOURCE vertex-ai-connection

Connection 259783098753.us.vertex-ai-connection successfully created


- Why You Need to Grant Access to the Service Account
        When you create a BigQuery connection, Google automatically creates a service account for that connection. This service account is what actually makes the API calls to Vertex AI, not your personal account.
        The Flow:

        You → Submit BigQuery query with AI functions
        BigQuery → Uses the connection's service account to call Vertex AI
        Service Account → Needs permission to access Vertex AI mode

In [24]:
# Grant Vertex AI User role to the connection service account
!gcloud projects add-iam-policy-binding trilink-472019 --member="serviceAccount:bqcx-259783098753-8wxm@gcp-sa-bigquery-condel.iam.gserviceaccount.com" --role="roles/aiplatform.user"


bindings:
- members:
  - user:hazardscarn10@gmail.com
  role: roles/aiplatform.admin
- members:
  - serviceAccount:bqcx-259783098753-8wxm@gcp-sa-bigquery-condel.iam.gserviceaccount.com
  role: roles/aiplatform.user
- members:
  - user:hazardscarn10@gmail.com
  role: roles/bigquery.admin
- members:
  - user:hazardscarn10@gmail.com
  role: roles/bigquery.connectionUser
- members:
  - user:hazardscarn10@gmail.com
  role: roles/bigquery.dataViewer
- members:
  - user:hazardscarn10@gmail.com
  role: roles/bigquery.user
- members:
  - user:hazardscarn10@gmail.com
  role: roles/owner
etag: BwY-tmm2MPg=
version: 1


Updated IAM policy for project [trilink-472019].


- Now let's create the connection model for embedding-005

In [26]:
%%bigquery
CREATE OR REPLACE MODEL `trilink-472019.database.text_embedding_005_model`
REMOTE WITH CONNECTION `us.vertex-ai-connection`
OPTIONS(ENDPOINT = 'text-embedding-005');

Query is running:   0%|          |

- Using `trilink-472019.database.text_embedding_005_model` endpoint of embedding model created before, we will create a vector DB for the calls and add an index

In [30]:
%%bigquery
CREATE OR REPLACE TABLE `trilink-472019.database.call_transcripts_raw_embeddings` AS
SELECT 
  *,
  (SELECT ml_generate_embedding_result 
   FROM ML.GENERATE_EMBEDDING(
     MODEL `trilink-472019.database.text_embedding_005_model`,
     (SELECT call_transcript AS content),
     STRUCT(TRUE AS flatten_json_output)
   )
  ) AS call_transcript_embedding
FROM `trilink-472019.database.call_transcripts_raw`;

Query is running:   0%|          |

- Skipping the indexing since we have 2.5K calls for sim only
    - Indexing is for huge datasets and we are good with vector search here
    - We will use indexing for the product recommendation section later

In [None]:
# %%bigquery
# CREATE OR REPLACE VECTOR INDEX call_transcripts_vector_index
# ON `trilink-472019.database.call_transcripts_raw_embeddings`(call_transcript_embedding)
# OPTIONS(
#   index_type = 'IVF',
#   distance_type = 'COSINE',
#   ivf_options = '{"num_lists": 100}'
# );

- finding top 5 similar calls with rating more than 6 for a sample call/issue

    - Customer experienced a complete service outage for 3 hours yesterday during peak business hours.'
     ' They are requesting compensation and want to know what caused the outage. This has happened twice this month already.

In [31]:
%%bigquery similar_calls_df
WITH query_embedding AS (
  SELECT ml_generate_embedding_result AS embedding
  FROM ML.GENERATE_EMBEDDING(
    MODEL `trilink-472019.database.text_embedding_005_model`,
    (SELECT 'Customer experienced a complete service outage for 3 hours yesterday during peak business hours.'
     ' They are requesting compensation and want to know what caused the outage. This has happened twice this month already.' AS content),
    STRUCT(TRUE AS flatten_json_output)
  )
)
SELECT 
  call_id,
  call_transcript,
  overall_rating,
  ML.DISTANCE(
    call_transcript_embedding, 
    (SELECT embedding FROM query_embedding), 
    'COSINE'
  ) AS similarity_score
FROM `trilink-472019.database.call_transcripts_raw_embeddings`
WHERE overall_rating > 6
ORDER BY similarity_score ASC
LIMIT 5;

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
similar_calls_df

Unnamed: 0,call_id,call_transcript,overall_rating,similarity_score
0,CALL_001445,**TriLink Telecom Customer Service Call Transc...,7,0.240723
1,CALL_000900,**Call Transcript: TriLink Telecom Customer Se...,7,0.250516
2,CALL_002016,**Call Transcript**\n\n**Date:** 2025-05-20\n*...,7,0.252246
3,CALL_001003,**Call Transcript - TriLink Telecom**\n\n**Dat...,7,0.254536
4,CALL_002054,**Call Transcript**\n\n**Date:** 2025-07-19\n*...,7,0.254872


In [33]:
print(similar_calls_df['call_transcript'][0])

**TriLink Telecom Customer Service Call Transcript**

**Date:** 2025-05-26
**Time:** 10:17 AM EST
**Agent:** Carlos Martinez (agent_010), Escalation Specialist
**Customer:** C00097646

**(Call begins with standard IVR prompts and hold music, followed by a transfer to Carlos.)**

**Carlos:** Thank you for holding, you've reached TriLink Telecom, my name is Carlos. I understand you've been transferred to our escalation department. How may I assist you today?

**Customer:** (Sighs) Finally. Yes, Carlos, I need to speak to someone who can actually do something. My name is Jessica Thompson, Customer ID C00097646. I'm calling to lodge a formal complaint, which frankly, is a follow-up to an issue that should have been resolved already.

**Carlos:** Alright, Ms. Thompson. Thank you for providing your account details. I'm pulling up your file now. I see here you have our Premium_Gig internet plan, four lines on Unlimited_Premium mobile, and two security devices. Your total monthly bill is $466.

In [34]:
print(similar_calls_df['call_transcript'][1])

**Call Transcript: TriLink Telecom Customer Service**

**Date:** 2025-01-06
**Time:** 10:17 AM EST
**Agent:** Carlos Martinez (agent_010) - Escalation Specialist
**Customer ID:** C00017091
**Customer Name:** Eleanor Vance (71 years old)
**Call Reason:** Formal complaints about service or experience

---

**(Call begins with standard IVR and transfer)**

**Carlos Martinez:** Thank you for calling TriLink Telecom. My name is Carlos Martinez, and I'm an escalation specialist. I understand you've been transferred to me to discuss a formal complaint. May I please have your account number or the phone number associated with your service?

**Eleanor Vance:** Yes, finally, an actual person. My account number is C00017091. It's about my internet service, and honestly, the whole experience lately. This is just ridiculous.

**Carlos Martinez:** Thank you, Ms. Vance. Please bear with me for just a moment while I pull up your details.
**(Typing sounds)**
Alright, Ms. Vance, I see your account here.

In [None]:

# -- STEP 0: Create remote model first (run this once)
# CREATE OR REPLACE MODEL `trilink-472019.database.text_embedding_model`
# REMOTE WITH CONNECTION `us.vertex-ai-connection`
# OPTIONS (endpoint = 'text-embedding-005');

#### Create the Call Trasncripts for Test period

- Simulate 2025 September Call Transcripts

In [2]:
##Simulate and create the calls for this month
customers_df = client.query("select * from `trilink-472019.database.customer_df`").to_dataframe()
internet_df = client.query("select * from `trilink-472019.database.internet_df`").to_dataframe()
mobile_df = client.query("select * from `trilink-472019.database.mobile_df`").to_dataframe()
security_df = client.query("select * from `trilink-472019.database.security_df`").to_dataframe()
generator = EnhancedCallCenterDataGenerator(customers_df, internet_df, mobile_df, security_df)
calls_df = generator.generate_call_dataset(start_date="2025-09-01", end_date="2025-09-30", num_calls=350)



##Let's make sure call_ID is unique from previous by adding a test suffix
calls_df['call_id'].astype(str) + '_tsep'
calls_df.to_csv("..//data//call_transcripts_test.csv",index=False)



🎯 ENHANCED CALL GENERATION STARTING
📊 Target: 350 calls from 2025-09-01 to 2025-09-30
🤖 Using Two-Stage Approach:
   Stage 1: Natural transcripts (Gemini 2.5 Flash)
   Stage 2: Performance evaluation (Gemini 2.5 Flash Lite)
🔍 Fast analysis of 100000 customers...
✅ Found 93622 customers with services
   📡 Internet: 72756 customers
   📱 Mobile: 74395 customers
   🏠 Security: 43819 customers
   Building context 0/700...
   Building context 100/700...
   Building context 200/700...
   Building context 300/700...
   Building context 400/700...
   Building context 500/700...
   Building context 600/700...
🔍 Found 93622 customers with services

🔄 Starting call 1/350...
   👤 Selected customer: C00090141
   📋 Scenario: cross_sell_security | Agent: agent_009
   🎤 Generating transcript... ✅
   📊 Evaluating call... ✅ (Rating: 7/10, Success: True)

🔄 Starting call 2/350...
   👤 Selected customer: C00007300
   📋 Scenario: complaint_resolution | Agent: agent_010
   🎤 Generating transcript... ✅
   📊 

##### Append the calls to bigquery call and add embeddings

In [3]:
##Append the calls to bigquery call table
calls_df.to_gbq(
    destination_table='trilink-472019.database.call_transcripts_raw',
    project_id='trilink-472019',
    if_exists='append',
    credentials=None  # Uses default credentials
)

100%|██████████| 1/1 [00:00<?, ?it/s]


- We can also add the embeddings for the calls
    - We will add the embeddings for new calls and append to existing embedding table at the end of each day after the followup email is processed

In [4]:
%%bigquery
--Get the number of calls per month
SELECT 
  COUNT(*) as call_cnt,
  EXTRACT(YEAR FROM DATE(call_date)) as yr,
  EXTRACT(MONTH FROM DATE(call_date)) as mnth 
FROM `trilink-472019.database.call_transcripts_raw` 
GROUP BY 
  EXTRACT(YEAR FROM DATE(call_date)),
  EXTRACT(MONTH FROM DATE(call_date))
ORDER BY yr, mnth;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,call_cnt,yr,mnth
0,329,2025,1
1,295,2025,2
2,303,2025,3
3,316,2025,4
4,319,2025,5
5,307,2025,6
6,297,2025,7
7,327,2025,8
8,357,2025,9


Test code - temoved to diff nb later

In [None]:
%%bigquery
WITH target_calls AS (
  SELECT 
    call_id,
    customer_id,
    call_transcript,
    call_date,
    AI.GENERATE_BOOL(
      CONCAT(
        'Analyze this customer service call transcript and determine if it requires email follow-up. ',
        'We want to err on the side of caution - SEND EMAIL FOLLOW-UP for ANY call that is NOT purely routine. ',
        '',
        'SEND EMAIL FOLLOW-UP if the call involves ANY of these: ',
        '1. Customer expressed ANY dissatisfaction, frustration, confusion, or unhappiness ',
        '2. ANY technical issues, problems, complaints, or concerns were mentioned ',
        '3. ANY solutions, explanations, or troubleshooting steps were provided ',
        '4. Customer asked questions about products, services, or account details ',
        '5. ANY account changes, modifications, or requests were discussed ',
        '6. Customer seemed uncertain or needed clarification on anything ',
        '7. ANY follow-up actions, callbacks, or next steps were mentioned ',
        '8. Opportunities for additional services or products were identified ',
        '9. Customer tone suggested they were not completely satisfied ',
        '10. Call involved anything beyond the most basic routine transactions. ',
        '',
        'ONLY skip email follow-up for these very limited routine cases: ',
        '- Simple payment made with confirmed success and happy customer ',
        '- Quick balance inquiry with no issues or questions ',
        '- Brief, successful IVR navigation with satisfied customer ',
        '- Standard account verification completed smoothly with no other discussion. ',
        '',
        'When in doubt, choose TRUE for email follow-up. Call transcript: "', call_transcript, '"'
      ),
      connection_id => 'us.vertex-ai-connection',
      endpoint => 'gemini-2.5-flash'
    ).result as email_needed
    
  FROM `trilink-472019.database.call_transcripts_raw`
  WHERE PARSE_DATE('%Y-%m-%d', call_date) >= DATE('2025-06-19')
    AND PARSE_DATE('%Y-%m-%d', call_date) <= DATE('2025-06-19')
),

email_calls_with_embeddings AS (
  SELECT *
  FROM ML.GENERATE_EMBEDDING(
    MODEL `trilink-472019.database.text_embedding_005_model`,
    (SELECT 
       call_transcript AS content,
       call_id,
       customer_id,
       call_date,
       email_needed
     FROM target_calls 
     WHERE email_needed = TRUE),
    STRUCT(TRUE AS flatten_json_output, 'RETRIEVAL_DOCUMENT' AS task_type)
  )
),

similar_email_added as (
  SELECT query.call_id,
  query.customer_id,
  query.call_date,
  query.content,base.call_transcript,base.primary_scenario,base.overall_rating,vs.distance
FROM
  VECTOR_SEARCH(
    TABLE `trilink-472019.database.call_transcripts_raw_embeddings`,
    'call_transcript_embedding',
    TABLE email_calls_with_embeddings,
    'ml_generate_embedding_result',
    top_k => 10,
    distance_type => 'COSINE') vs
  WHERE CAST(vs.base.overall_rating AS INT64) > 6
  
),

similar_calls_added AS (
  SELECT 
    call_id,
    customer_id,
    call_date,
    content as current_call_transcript,
    
    -- Concatenate all similar call transcripts and ratings
    STRING_AGG(
      CONCAT(
        'Rating: ', overall_rating, '/10\n',
        'Scenario: ', primary_scenario, '\n', 
        'Call: ', call_transcript, '\n\n---\n\n'
      )
      ORDER BY CAST(overall_rating AS INT64) DESC, distance ASC
    ) as all_similar_calls
    
  FROM similar_email_added
  GROUP BY call_id, customer_id, call_date, content
),

-- Aggregate security devices per customer
security_aggregated AS (
  SELECT 
    customer_id,
    STRING_AGG(device_type, ', ' ORDER BY device_type) AS device_types_concat,
    COUNT(device_type) AS total_security_devices
  FROM `trilink-472019.database.security_df`
  GROUP BY customer_id
),

--Get the current customer services and products and possibility for corrsell or upsell
base AS (
  SELECT 
    a.*,
    e.age,
    e.city,
    e.family_size,
    e.fiber_availability,
    e.home_ownership,
    e.home_type,
    e.life_stage,
    e.neighborhood_crime_rate,
    e.work_from_home_flag,
    b.device_types_concat,
    b.total_security_devices,
    c.speed_mbps AS internet_speed,
    c.plan_tier AS internet_plan,
    c.contract_type AS internet_contract_type,
    c.internet_tenure_days,
    d.plan_type AS mobile_plan,
    d.contract_type AS mobile_contract_type,
    d.data_overage_frequency AS mobile_data_overage_frequency,
    d.monthly_cost AS mobile_monthly_cost,
    
    -- Customer type flags
    CASE WHEN b.device_types_concat IS NULL THEN 0 ELSE 1 END AS security_customer,
    CASE WHEN c.internet_churn IS NULL THEN 'Never been our internet customer'
      WHEN c.internet_churn = 1 THEN 'Churned internet customer'
      ELSE 'Active internet customer' END AS internet_customer_status,
    CASE WHEN d.mobile_churn IS NULL THEN 'Never been our mobile customer'
      WHEN d.mobile_churn = 1 THEN 'Churned mobile customer'
      ELSE 'Active mobile customer'
    END AS mobile_customer_status
    
  FROM similar_calls_added a
  LEFT JOIN security_aggregated b ON a.customer_id = b.customer_id
  LEFT JOIN `trilink-472019.database.customer_df` e ON a.customer_id = e.customer_id
  LEFT JOIN `trilink-472019.database.internet_df` c ON a.customer_id = c.customer_id AND c.internet_churn = 0
  LEFT JOIN `trilink-472019.database.mobile_df` d ON a.customer_id = d.customer_id AND d.mobile_churn = 0
)

SELECT 
  call_id,
  customer_id,
  call_date,
  current_call_transcript,
  all_similar_calls,
  
  AI.GENERATE(
    CONCAT(
      'Create a personalized follow-up/next best action for solving customers issues email based on this customer call and examples of successful agent approaches.\\n\\n',
      
      '=== CUSTOMER INFO ===\\n',
      'Customer ID: ', customer_id, '\\n',
      'Customer Age: ', age, '\\n',
      'Customer City: ', city, '\\n',
      'Customer Family Size: ', family_size, '\\n',
      'Customer Fiber Availability: ', CASE WHEN fiber_availability THEN 'Yes' ELSE 'No' END, '\\n',
      'Customer Home Ownership: ', home_ownership, '\\n',
      'Customer Home Type: ', home_type, '\\n',
      'Customer Life Stage: ', life_stage, '\\n',
      'Customer Neighborhood Crime Rate: ', neighborhood_crime_rate, '\\n',
      'Customer Work From Home Flag: ', CASE WHEN work_from_home_flag THEN 'Yes' ELSE 'No' END, '\\n\\n',
          
      '=== CUSTOMER CURRENT SERVICES ===\\n',
      'Internet Status: ', internet_customer_status, 
      CASE WHEN internet_customer_status = 'Active internet customer' 
        THEN CONCAT(' (Plan: ', COALESCE(internet_plan, 'Unknown'), ', Speed: ', COALESCE(CAST(internet_speed AS STRING), 'Unknown'), ' Mbps)')
        ELSE '' END, '\\n',
      'Mobile Status: ', mobile_customer_status,
      CASE WHEN mobile_customer_status = 'Active mobile customer'
        THEN CONCAT(' (Plan: ', COALESCE(mobile_plan, 'Unknown'), ', Monthly Cost: $', COALESCE(CAST(mobile_monthly_cost AS STRING), 'Unknown'), ')')
        ELSE '' END, '\\n',
      'Security Services: ', 
      CASE WHEN security_customer = 1 
        THEN CONCAT('Yes (Devices: ', COALESCE(device_types_concat, 'Unknown'), ', Total: ', COALESCE(CAST(total_security_devices AS STRING), '0'), ')')
        ELSE 'No security services' END, '\\n\\n',

      '=== CURRENT CALL DETAILS ===\\n',
      'Date: ', call_date, '\\n',
      'Call ID: ', call_id, '\\n',
      'Call Transcript: ', current_call_transcript, '\\n\\n',

      
      '=== AVAILABLE UPGRADE/CROSS-SELL OPTIONS ===\\n',
      'INTERNET PLANS AVAILABLE:\\n',
      '• Premium Gig: 1000 Mbps at $100/month\\n',
      '• Standard: 100 Mbps at $70/month\\n', 
      '• Basic: 25 Mbps at $40/month\\n\\n',
      
      'MOBILE PLANS AVAILABLE:\\n',
      '• Limited 2GB: $30/month (Canada only, international $1/min)\\n',
      '• Limited 5GB: $40/month (Canada only, international $1/min)\\n',
      '• Limited 10GB: $50/month (Canada only, international $1/min)\\n',
      '• Unlimited Standard: $70/month (Free North America calls, 7-day international roaming)\\n',
      '• Unlimited Premium: $90/month (Free worldwide calls, 7-day international roaming, Free Disney Plus for 1 year)\\n\\n',
      
      'SECURITY DEVICES AVAILABLE:\\n',
      '• Motion Sensor: $30 each\\n',
      '• Window Sensor: $30 each\\n',
      '• Smart Doorbell: $80 (optional $10/month monitoring)\\n',
      '• Smart Lock: $60\\n',
      '• Indoor Camera: $120 (audio communication +$30)\\n',
      '• Outdoor Camera: $220 (optional $10/month monitoring, includes 1-year free Spotlight Pro)\\n',
      '• Security Panel: $250 (requires $30/month monitoring for alarm setup)\\n\\n',
      
      '=== SIMILAR SUCCESSFUL APPROACHES ===\\n',
      'If the current call transcript shows a pending solution, you may reference how other agents handled similar cases:\\n',
      COALESCE(all_similar_calls, 'No similar high-rated examples found'), '\\n\\n',
      
      '=== EMAIL INSTRUCTIONS ===\\n',
      'PRIMARY GOAL: Address the call transcript and provide appropriate follow-up/next best action for solving customers issues\\n',
      'SECONDARY GOAL (if appropriate): Suggest relevant upgrades/cross-sells that adds business value\\n\\n',
      
      'CROSS-SELL/UPSELL GUIDELINES:\\n',
      '• ONLY suggest services they dont currently have or clear upgrades\\n',
      '• Focus on how it could solve problems mentioned in the call or improve the customers current or future needs\\n',
      '• Keep suggestions brief - maximum 2-4 sentences\\n',
      '• Make it sound helpful, not sales-y. Add some small offers (none greater than 15$ for upsells/cross sells)\\n',
      'Mention what the upsell/cross sell product is, what it offers, what plan it is(if its a mobile or internet one) and how it would be great fit for the customer'
      '• If customer was upset/angry, DO NOT include any cross or upsell sales suggestions\\n\\n',
      
      'EMAIL REQUIREMENTS:\\n',
      '• Start with warm, professional greeting\\n',
      '• Reference the specific conversation and any issues discussed\\n',
      '• Provide clear next steps or resolution\\n',
      '• If appropriate, briefly mention how additional services could help\\n',
      '• End with contact information for further assistance\\n',
      '• Sign as "TriLink Customer Success Team"\\n',
      '• Keep tone personal and human, not automated\\n',
      '• Total length should be concise but thorough\\n',
      'Add the cross-sell and upsell options as long as it follows the guidelines',
        'For upsell/cross sell cases customer could reach out to 1800-TRILINK'
        
        
      '''****NOTE****: 
      Write the email with natural, personalized language. 
      Avoid placeholder text like [Customer Name],[Customers Full Name], [Company], 
      or other bracketed variables that make emails appear automated. If the customer's name is unknown, use 'Dear Valued Customer' as the greeting.
      
      If the call transcript have a placeholder name for the customer, igonore that name. Use Dear Valued Customer.
      
      IMPORTANT: The email should read as if it was written specifically for this recipient by a human, not generated from a template or an AI.'''  

    ),
    connection_id => 'us.vertex-ai-connection',
    endpoint => 'gemini-2.5-flash'
  ).result AS personalized_email,
  age,city,family_size,fiber_availability,home_ownership,home_type,life_stage,neighborhood_crime_rate,work_from_home_flag,device_types_concat,total_security_devices,internet_speed,internet_plan,internet_contract_type,internet_tenure_days,mobile_plan,mobile_contract_type,mobile_data_overage_frequency,mobile_monthly_cost,security_customer

FROM base;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,call_id,customer_id,call_date,current_call_transcript,all_similar_calls,personalized_email
0,CALL_001195,C00083036,2025-06-19,**TriLink Telecom Customer Service Call Transc...,Rating: 7/10\nScenario: equipment_replacement\...,"Dear Mr. Arthur Jenkins,\n\nThis email is a fo..."
1,CALL_001512,C00023666,2025-06-19,**Call Transcript**\n\n**Date:** 2025-06-19\n*...,Rating: 7/10\nScenario: technical_support\nCal...,Subject: Following Up on Your TriLink Telecom ...
2,CALL_002289,C00007779,2025-06-19,**TriLink Telecom Customer Service Call Transc...,Rating: 7/10\nScenario: service_upgrade\nCall:...,"Dear John Miller,\n\nThank you for speaking wi..."
3,CALL_001242,C00008539,2025-06-19,**TriLink Telecom Customer Service Call Transc...,Rating: 7/10\nScenario: service_cancellation\n...,"Dear Mr. Henderson,\n\nThank you for reaching ..."
4,CALL_002264,C00062906,2025-06-19,**Call Transcript**\n\n**Date:** 2025-06-19\n*...,Rating: 7/10\nScenario: service_downgrade\nCal...,"Dear Robert Davies,\n\nThank you for speaking ..."
5,CALL_000631,C00011114,2025-06-19,**Call Transcript: TriLink Telecom Customer Se...,Rating: 7/10\nScenario: contract_renewal\nCall...,Subject: Following Up on Your TriLink Telecom ...
6,CALL_000758,C00093156,2025-06-19,## TriLink Telecom Customer Service Call Trans...,Rating: 7/10\nScenario: service_cancellation\n...,Subject: Following Up On Your TriLink Security...
7,CALL_000037,C00064759,2025-06-19,**TriLink Telecom Customer Service Call Transc...,Rating: 7/10\nScenario: billing_inquiry\nCall:...,"Dear Mr. Miller,\n\nThank you for speaking wit..."
8,CALL_000565,C00036196,2025-06-19,**Call Transcript: TriLink Telecom Customer Se...,Rating: 8/10\nScenario: upsell_security_device...,"Dear Valued Customer,\n\nThank you for reachin..."
9,CALL_000454,C00089431,2025-06-19,**TriLink Telecom Customer Service Call Transc...,Rating: 7/10\nScenario: payment_assistance\nCa...,Subject: Following Up on Your TriLink Payment ...
