In [1]:
import google.cloud.bigquery
import tqdm
import pandas as pd
import numpy as np
# Load the extension
%load_ext google.cloud.bigquery
%load_ext bigquery_magics



In [None]:
! gcloud config set project "trilink-472019"


To update your Application Default Credentials quota project, use the `gcloud auth application-default set-quota-project` command.
Updated property [core/project].


#### Idea

- Business often uses Machine Learning to create models that predicts events of customers/prospects like churn, upsell etc.
- However these models are usually used to create a list of targets and all the targets are treated in the same way or with A/B testing similar cookie cutter content
- This is not efficient as we are losing a lot infomration that the models possess and not using it in the campaigns
- Hence the idea here is to create Xgboost models in bigquery ML for predicting churn for internet and mobile customers of TriLink
- We will use the BigQuery ML.EXPLAIN_PREDICT which not only predicts the probability of event, but also gives the individual contribution of features towards churn
- This helps us creating/identifying exactly what's leading to this customers churn and can be added in as one of the personalization parameter in the automated hyperpersonalized retention email
- Inorder to this let's first create the internet and mobile churn models and the views for the model datasets

#### Mobile Churn Model Creation

##### Create the view for the churn model for mobile


In [2]:
%%bigquery

CREATE OR REPLACE VIEW `trilink-472019.database.mobile_churn_data` AS
SELECT 
  c.customer_id,
  
  -- Customer demographics
  c.age,
  c.household_income,
  c.family_size,
  c.home_ownership,
  c.work_from_home_flag,
  c.education_level,
  c.life_stage,
  c.home_square_footage,
  c.property_value,
  c.neighborhood_crime_rate,
  c.neighborhood_income_median,
  c.fiber_availability,
  c.income_bracket,
  
  -- Mobile service features
  m.service_id,
  m.plan_type,
  m.line_count,
  m.monthly_cost,
  m.data_overage_frequency,
  m.contract_type,
  m.family_plan_flag,
  m.device_upgrade_cycle,
  m.mobile_tenure_days,
  m.mobile_churn,
  
  -- Essential calculated features only
  ROUND(c.household_income / c.family_size, 2) AS income_per_person,
  ROUND(m.monthly_cost / m.line_count, 2) AS cost_per_line,
  ROUND(m.mobile_tenure_days / 365.25, 2) AS tenure_years
  
FROM `trilink-472019.database.customer_df` c
INNER JOIN `trilink-472019.database.mobile_df` m
ON c.customer_id = m.customer_id;

Query is running:   0%|          |

In [3]:
%%bigquery

select mobile_churn,count(*) as cnt
FROM `trilink-472019.database.mobile_churn_data`
group by mobile_churn;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,mobile_churn,cnt
0,0,56134
1,1,18261


##### Create Mobile Churn Prediction Model (Boosting Model)

In [4]:
%%bigquery

CREATE OR REPLACE MODEL `trilink-472019.database.mobile_churn_predictor`
OPTIONS(
  model_type='BOOSTED_TREE_CLASSIFIER',
  input_label_cols=['mobile_churn'],
  enable_global_explain=TRUE
) AS
SELECT 
  mobile_churn,
  
  -- Demographic features
  age,
  household_income,
  family_size,
  home_square_footage,
  property_value,
  neighborhood_crime_rate,
  neighborhood_income_median,
  
  -- Categorical features
  income_bracket,
  home_ownership,
  education_level,
  life_stage,
  plan_type,
  contract_type,
  
  -- Boolean features
  work_from_home_flag,
  fiber_availability,
  family_plan_flag,
  
  -- Mobile behavior features
  line_count,
  monthly_cost,
  data_overage_frequency,
  device_upgrade_cycle,
  tenure_years,
  income_per_person,
  cost_per_line

FROM `trilink-472019.database.mobile_churn_data`
WHERE mobile_churn IS NOT NULL;

Query is running:   0%|          |

##### Score the mobile churn model with feature contribution for a customer to demo the scoring

In [5]:
%%bigquery
SELECT 
  customer_id,
  predicted_mobile_churn,
  ROUND(probability * 100, 1) AS churn_probability_percent,
  
  -- Risk category
  CASE 
    WHEN probability >= 0.8 THEN 'CRITICAL RISK'
    WHEN probability >= 0.6 THEN 'HIGH RISK'
    WHEN probability >= 0.4 THEN 'MEDIUM RISK'
    WHEN probability >= 0.2 THEN 'LOW RISK'
    ELSE 'VERY LOW RISK'
  END AS risk_category,
  
  -- Customer details
  plan_type,
  monthly_cost,
  contract_type,
  tenure_years,
  data_overage_frequency,
  family_plan_flag,
  
  -- Feature contributions (raw string from BigQuery)
  top_feature_attributions,
  
  -- Prediction details
  ROUND(baseline_prediction_value, 4) AS baseline_prediction,
  ROUND(prediction_value, 4) AS final_prediction,
  approximation_error

FROM ML.EXPLAIN_PREDICT(
  MODEL `trilink-472019.database.mobile_churn_predictor`,
  (SELECT * FROM `trilink-472019.database.mobile_churn_data` 
   WHERE customer_id = 'C00004244')  -- CHANGE THIS CUSTOMER ID
);

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,customer_id,predicted_mobile_churn,churn_probability_percent,risk_category,plan_type,monthly_cost,contract_type,tenure_years,data_overage_frequency,family_plan_flag,top_feature_attributions,baseline_prediction,final_prediction,approximation_error
0,C00004244,0,64.6,HIGH RISK,Limited_2GB,30,Month_to_Month,1.23,4,False,"[{'feature': 'household_income', 'attribution'...",1.091,0.8002,0.0


#### Internet Churn Model

##### Create the view for the churn model for Internet


In [6]:
%%bigquery
-- Create Internet Churn Data View
CREATE OR REPLACE VIEW `trilink-472019.database.internet_churn_data` AS 
SELECT 
   c.customer_id,
   
   -- Customer demographics
   c.age,
   c.household_income,
   c.family_size,
   c.home_ownership,
   c.work_from_home_flag,
   c.education_level,
   c.life_stage,
   c.home_square_footage,
   c.property_value,
   c.neighborhood_crime_rate,
   c.neighborhood_income_median,
   c.fiber_availability,
   c.income_bracket,
   
   -- Internet service features
   i.service_id,
   i.plan_tier,
   i.speed_mbps,
   i.monthly_cost,
   i.data_usage_gb,
   i.connected_devices,
   i.contract_type,
   i.speed_complaints,
   i.outage_count,
   i.internet_tenure_days,
   i.internet_churn,
   i.early_termination,
   i.contract_completed_percent,
   
   -- Essential calculated features
   ROUND(c.household_income / c.family_size, 2) AS income_per_person,
   ROUND(i.monthly_cost / i.speed_mbps, 2) AS cost_per_mbps,
   ROUND(i.internet_tenure_days / 365.25, 2) AS tenure_years,
   ROUND(i.data_usage_gb / i.connected_devices, 2) AS data_per_device
   
FROM `trilink-472019.database.customer_df` c 
INNER JOIN `trilink-472019.database.internet_df` i ON c.customer_id = i.customer_id;


Query is running:   0%|          |

In [7]:
%%bigquery

select internet_churn,count(*) as cnt
FROM `trilink-472019.database.internet_churn_data`
group by internet_churn;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,internet_churn,cnt
0,0,59009
1,1,13747


##### Create Internet Churn Model - Boosting Model

In [8]:
%%bigquery

-- Create Internet Churn Prediction Model
CREATE OR REPLACE MODEL `trilink-472019.database.internet_churn_predictor`
OPTIONS(
   model_type='BOOSTED_TREE_CLASSIFIER',
   input_label_cols=['internet_churn'],
   enable_global_explain=TRUE
) AS 
SELECT 
   internet_churn,
   
   -- Demographic features
   age,
   household_income,
   family_size,
   home_square_footage,
   property_value,
   neighborhood_crime_rate,
   neighborhood_income_median,
   
   -- Categorical features
   income_bracket,
   home_ownership,
   education_level,
   life_stage,
   plan_tier,
   contract_type,
   
   -- Boolean features
   work_from_home_flag,
   fiber_availability,
   early_termination,
   
   -- Internet behavior features
   speed_mbps,
   monthly_cost,
   data_usage_gb,
   connected_devices,
   speed_complaints,
   outage_count,
   tenure_years,
   contract_completed_percent,
   
   -- Calculated features
   income_per_person,
   cost_per_mbps,
   data_per_device
   
FROM `trilink-472019.database.internet_churn_data` 
WHERE internet_churn IS NOT NULL;

Query is running:   0%|          |

##### Score Internet customer with model with feature contribution

In [10]:
%%bigquery
SELECT 
  *,
  ROUND(probability * 100, 1) AS churn_probability_percent,
  
  -- Risk category
  CASE 
    WHEN probability >= 0.8 THEN 'CRITICAL RISK'
    WHEN probability >= 0.6 THEN 'HIGH RISK'
    WHEN probability >= 0.4 THEN 'MEDIUM RISK'
    WHEN probability >= 0.2 THEN 'LOW RISK'
    ELSE 'VERY LOW RISK'
  END AS risk_category,
  
  -- Feature contributions (raw string from BigQuery)
  top_feature_attributions,
  
  -- Prediction details
  ROUND(baseline_prediction_value, 4) AS baseline_prediction,
  ROUND(prediction_value, 4) AS final_prediction,
  approximation_error

FROM ML.EXPLAIN_PREDICT(
  MODEL `trilink-472019.database.internet_churn_predictor`,
  (SELECT * FROM `trilink-472019.database.internet_churn_data` 
   WHERE customer_id = 'C00000005')
);

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,predicted_internet_churn,probability,top_feature_attributions,baseline_prediction_value,prediction_value,approximation_error,customer_id,age,household_income,family_size,...,income_per_person,cost_per_mbps,tenure_years,data_per_device,churn_probability_percent,risk_category,top_feature_attributions_1,baseline_prediction,final_prediction,approximation_error_1
0,0,0.939258,"[{'feature': 'outage_count', 'attribution': 0....",1.601866,1.86923,0.0,C00000005,49,111316,4,...,27829.0,1.72,2.0,3.85,93.9,CRITICAL RISK,"[{'feature': 'outage_count', 'attribution': 0....",1.6019,1.8692,0.0


- Using the prediction and feature contribution to generate personalized retention messages using AI.GENERATE

In [None]:
# ##Same idea done with bigquery connection
# import pandas as pd
# from google.cloud import bigquery

# # Initialize BigQuery client
# client = bigquery.Client(project='trilink-471315')

# # Your SQL query (replace with your actual query)
# query = """
# WITH customer_analysis AS (
#   SELECT
#      customer_id,
#     predicted_mobile_churn,
#     ROUND(probability * 100, 1) AS churn_probability_percent,
    
#     -- Customer profile
#     age,
#     plan_type,
#     monthly_cost,
#     contract_type,
#     tenure_years,
#     data_overage_frequency,
#     family_plan_flag,
#     household_income,
    
#     -- Additional features for problem analysis
#     family_size,
#     home_ownership,
#     work_from_home_flag,
#     education_level,
#     life_stage,
#     income_per_person,
#     cost_per_line,
#     fiber_availability,
    
#     -- Risk category
#     CASE
#        WHEN probability >= 0.8 THEN 'CRITICAL RISK'
#       WHEN probability >= 0.6 THEN 'HIGH RISK'
#       WHEN probability >= 0.4 THEN 'MEDIUM RISK'
#       ELSE 'LOW RISK'
#     END AS risk_category,
    
#     -- Feature contributions
#     top_feature_attributions
#    FROM ML.EXPLAIN_PREDICT(
#     MODEL `trilink-471315.database.mobile_churn_predictor`,
#     (SELECT * FROM `trilink-471315.database.mobile_churn_data`
#       WHERE customer_id = 'C00004244')  -- CHANGE CUSTOMER ID HERE
#   )
# )

# SELECT
#   customer_id,
#   churn_probability_percent,
#   risk_category,
#   plan_type,
#   monthly_cost,
#   top_feature_attributions,
  
#   -- Generate comprehensive problem description for vector search
#   AI.GENERATE(
#     CONCAT(
#       'Analyze this mobile customer situation and identify their key problems/pain points in under 150 words .Start with Customer is facing....:\\n\\n',
#       'Customer Profile:\\n',
#       '- Age: ', CAST(age AS STRING), ' years (', life_stage, ')\\n',
#       '- Income: $', CAST(household_income AS STRING), ' household, $', CAST(CAST(income_per_person AS INT64) AS STRING), ' per person\\n',
#       '- Family: ', CAST(family_size AS STRING), ' people, ', home_ownership, '\\n',
#       '- Education: ', education_level, '\\n',
#       '- Works from home: ', CAST(work_from_home_flag AS STRING), '\\n\\n',
#       'Mobile Service:\\n',
#       '- Plan: ', plan_type, '\\n',
#       '- Cost: $', CAST(monthly_cost AS STRING), '/month ($', CAST(CAST(cost_per_line AS INT64) AS STRING), ' per line)\\n',
#       '- Contract: ', contract_type, '\\n',
#       '- Tenure: ', CAST(tenure_years AS STRING), ' years\\n',
#       '- Data overages: ', CAST(data_overage_frequency AS STRING), ' per month\\n',
#       '- Family plan: ', CAST(family_plan_flag AS STRING), '\\n',
#       '- Fiber available: ', CAST(fiber_availability AS STRING), '\\n\\n',
#       'Churn Analysis:\\n',
#       '- Risk: ', CAST(churn_probability_percent AS STRING), '% (', risk_category, ')\\n',
#       '- Key factors: ', ARRAY_TO_STRING(
#         ARRAY(
#           SELECT CONCAT(feature, ': ', CAST(ROUND(attribution, 3) AS STRING))
#           FROM UNNEST(top_feature_attributions)
#           ORDER BY ABS(attribution) DESC
#           LIMIT 5
#         ), ', '
#       ), '\\n\\n',
#       'Task: Write a detailed problem analysis identifying:\\n',
#       '1. Main pain points and frustrations\\n',
#       '2. Why they are at risk of churning\\n',
#       '3. Service usage concerns\\n',
#       '4. Financial constraints\\n',
#       '5. Demographic factors affecting needs\\n\\n',
#       'Format as a comprehensive problem statement for semantic matching with solutions.'
#     ),
#     connection_id => 'us.vertex-ai-connection',
#     endpoint => 'gemini-2.5-flash'
#   ).result AS comprehensive_problem_description,

#   -- Your existing working personalized retention message  
#   AI.GENERATE(
#     CONCAT(
#       'Create a personalized retention message for this mobile customer:\\n\\n',
#       'Customer Profile:\\n',
#       '- Age: ', CAST(age AS STRING), '\\n',
#       '- Current Plan: ', plan_type, '\\n', 
#       '- Monthly Cost: $', CAST(monthly_cost AS STRING), '\\n',
#       '- Contract: ', contract_type, '\\n',
#       '- Tenure: ', CAST(tenure_years AS STRING), ' years\\n',
#       '- Data Overages: ', CAST(data_overage_frequency AS STRING), ' per month\\n',
#       '- Family Plan: ', CAST(family_plan_flag AS STRING), '\\n\\n',
#       'Churn Risk Analysis:\\n',
#       '- Churn Probability: ', CAST(churn_probability_percent AS STRING), '%\\n',
#       '- Risk Level: ', risk_category, '\\n\\n',
#       'Top Factors Affecting Churn Risk. Higher the positive magnitude, more its leading to churn:\\n',
#       ARRAY_TO_STRING(
#         ARRAY(
#           SELECT CONCAT('- ', feature, ': ', CAST(ROUND(attribution, 3) AS STRING))
#           FROM UNNEST(top_feature_attributions)
#           ORDER BY ABS(attribution) DESC
#         ), '\\n'
#       ), '\\n\\n',
#       'Instructions:\\n',
#       '1. Write a personalized email message addressing their specific concerns\\n',
#       '2. Understand what features are contributing the most towards churn and preventing churn and analyze those to create the personalized email\\n',
#       '3. Offer solutions based on the risk factors identified\\n',
#       '4. Include a compelling retention offer\\n',
#       '5. Keep the tone friendly and helpful\\n',
#       '6. Include a clear call-to-action\\n\\n',
#       'Generate the email message:'
#     ),
#     connection_id => 'us.vertex-ai-connection',
#     endpoint => 'gemini-2.5-flash'
#   ).result AS personalized_retention_message

# FROM customer_analysis;
# """

# # Execute query and get results
# df = client.query(query).to_dataframe()

# # Display basic info
# print("="*80)
# print("CUSTOMER RETENTION ANALYSIS RESULTS")
# print("="*80)

# # Display customer summary
# for index, row in df.iterrows():
#     print(f"Customer ID: {row['customer_id']}")
#     print(f"Churn Probability: {row['churn_probability_percent']}%")
#     print(f"Risk Category: {row['risk_category']}")
#     print(f"Current Plan: {row['plan_type']}")
#     print(f"Monthly Cost: ${row['monthly_cost']}")
#     print()
    
#     # Display feature attributions in a readable format
#     print("TOP FEATURE ATTRIBUTIONS:")
#     print("-" * 40)
#     for feature in row['top_feature_attributions']:
#         feature_name = feature['feature']
#         attribution = feature['attribution']
#         impact = "📈 INCREASES" if attribution > 0 else "📉 DECREASES"
#         print(f"{feature_name:25} | {attribution:+7.3f} | {impact} churn risk")
#     print()

#     # Display the customer issue
#     print("CUSTOMER ISSUE IDENTIFIED:")
#     print("=" * 80)
#     message = row['comprehensive_problem_description']
#     formatted_message = message.replace('\\n', '\n')
#     print(formatted_message)
#     print("=" * 80)
    
#     # Display the personalized message with proper formatting
#     print("PERSONALIZED RETENTION MESSAGE:")
#     print("=" * 80)
#     message = row['personalized_retention_message']
#     # Replace \\n with actual newlines for better readability
#     formatted_message = message.replace('\\n', '\n')
#     print(formatted_message)
#     print("=" * 80)





CUSTOMER RETENTION ANALYSIS RESULTS
Customer ID: C00004244
Churn Probability: 64.6%
Risk Category: HIGH RISK
Current Plan: Limited_2GB
Monthly Cost: $30

TOP FEATURE ATTRIBUTIONS:
----------------------------------------
household_income          |  -0.336 | 📉 DECREASES churn risk
age                       |  +0.120 | 📈 INCREASES churn risk
data_overage_frequency    |  +0.107 | 📈 INCREASES churn risk
family_plan_flag          |  -0.087 | 📉 DECREASES churn risk
monthly_cost              |  -0.045 | 📉 DECREASES churn risk

CUSTOMER ISSUE IDENTIFIED:
Customer is facing significant dissatisfaction and a high risk of churn (64.6%) due to a severe mismatch between their mobile service plan and actual usage needs, primarily driven by persistent data insufficiency and its associated financial and experiential frustrations.

**Comprehensive Problem Statement for Semantic Matching:**

The customer, a 16-year-old with an individual mobile plan, is critically underserved by their Limited_2GB mobil

In [24]:
df['top_feature_attributions'].values

array([array([{'feature': 'household_income', 'attribution': -0.33624252676963806},
              {'feature': 'age', 'attribution': 0.12005558609962463},
              {'feature': 'data_overage_frequency', 'attribution': 0.10666532814502716},
              {'feature': 'family_plan_flag', 'attribution': -0.0872725397348404},
              {'feature': 'monthly_cost', 'attribution': -0.04487797990441322}],
             dtype=object)                                                              ],
      dtype=object)

In [None]:
##Batch processing python code

# import pandas as pd
# from google.cloud import bigquery
# import time
# from datetime import datetime

# def process_retention_campaigns_in_batches():
#     """
#     Process customer retention campaigns in manageable batches
#     to handle large datasets and API rate limits
#     """
#     client = bigquery.Client(project='trilink-471315')
    
#     # Define batch configurations
#     batch_configs = [
#         {
#             'name': 'critical_risk',
#             'where_clause': 'probability >= 0.8',
#             'limit': 20,
#             'priority': 1
#         },
#         {
#             'name': 'high_risk', 
#             'where_clause': 'probability >= 0.6 AND probability < 0.8',
#             'limit': 50,
#             'priority': 2
#         },
#         {
#             'name': 'medium_risk',
#             'where_clause': 'probability >= 0.4 AND probability < 0.6', 
#             'limit': 100,
#             'priority': 3
#         }
#     ]
    
#     all_results = []
    
#     for config in batch_configs:
#         print(f"🔄 Processing {config['name']} customers...")
#         print(f"   Target: {config['limit']} customers with {config['where_clause']}")
        
#         query = f"""
#         WITH customer_analysis AS (
#           SELECT
#              customer_id,
#             predicted_mobile_churn,
#             ROUND(probability * 100, 1) AS churn_probability_percent,
#             age, plan_type, monthly_cost, contract_type, tenure_years,
#             data_overage_frequency, family_plan_flag, household_income,
            
#             CASE
#                WHEN probability >= 0.8 THEN 'CRITICAL RISK'
#               WHEN probability >= 0.6 THEN 'HIGH RISK'
#               WHEN probability >= 0.4 THEN 'MEDIUM RISK'
#               ELSE 'LOW RISK'
#             END AS risk_category,
            
#             top_feature_attributions
#            FROM ML.EXPLAIN_PREDICT(
#             MODEL `trilink-471315.database.mobile_churn_predictor`,
#             (SELECT * FROM `trilink-471315.database.mobile_churn_data`
#              WHERE {config['where_clause']}
#              LIMIT {config['limit']})
#           )
#         )

#         SELECT
#           '{config['name']}' as campaign_batch,
#           {config['priority']} as priority,
#           customer_id,
#           churn_probability_percent,
#           risk_category,
#           plan_type,
#           monthly_cost,
#           tenure_years,
#           data_overage_frequency,
#           top_feature_attributions,
          
#           AI.GENERATE(
#             CONCAT(
#               'Create a personalized retention message for this mobile customer:\\n\\n',
#               'Customer Profile:\\n',
#               '- Age: ', CAST(age AS STRING), '\\n',
#               '- Current Plan: ', plan_type, '\\n', 
#               '- Monthly Cost: $', CAST(monthly_cost AS STRING), '\\n',
#               '- Contract: ', contract_type, '\\n',
#               '- Tenure: ', CAST(tenure_years AS STRING), ' years\\n',
#               '- Data Overages: ', CAST(data_overage_frequency AS STRING), ' per month\\n',
#               '- Family Plan: ', CAST(family_plan_flag AS STRING), '\\n\\n',
#               'Churn Risk Analysis:\\n',
#               '- Churn Probability: ', CAST(churn_probability_percent AS STRING), '%\\n',
#               '- Risk Level: ', risk_category, '\\n\\n',
#               'Key Factors Affecting Churn Risk:\\n',
#               ARRAY_TO_STRING(
#                 ARRAY(
#                   SELECT CONCAT('- ', feature, ': ', CAST(ROUND(attribution, 3) AS STRING))
#                   FROM UNNEST(top_feature_attributions)
#                   ORDER BY ABS(attribution) DESC
#                   LIMIT 6
#                 ), '\\n'
#               ), '\\n\\n',
#               'Instructions:\\n',
#               '1. Write a personalized email message addressing their specific concerns\\n',
#               '2. Offer solutions based on the risk factors identified\\n',
#               '3. Include retention offer appropriate for risk level\\n',
#               '4. Keep tone friendly and helpful\\n',
#               '5. Include clear call-to-action\\n\\n',
#               'Generate the email message:'
#             ),
#             connection_id => 'us.vertex-ai-connection',
#             endpoint => 'gemini-2.5-flash'
#           ).result AS personalized_retention_message,
          
#           CURRENT_TIMESTAMP() AS message_generated_at

#         FROM customer_analysis
#         ORDER BY churn_probability_percent DESC
#         """
        
#         try:
#             # Execute query
#             start_time = time.time()
#             df_batch = client.query(query).to_dataframe()
#             end_time = time.time()
            
#             print(f"   ✅ Processed {len(df_batch)} customers in {end_time-start_time:.1f} seconds")
            
#             # Add to results
#             all_results.append(df_batch)
            
#             # Add delay between batches to respect API limits
#             if config != batch_configs[-1]:  # Don't wait after last batch
#                 print("   ⏳ Waiting 30 seconds before next batch...")
#                 time.sleep(30)
                
#         except Exception as e:
#             print(f"   ❌ Error processing {config['name']}: {str(e)}")
#             continue
    
#     # Combine all results
#     if all_results:
#         final_df = pd.concat(all_results, ignore_index=True)
        
#         # Save results
#         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
#         csv_filename = f'retention_campaigns_{timestamp}.csv'
#         final_df.to_csv(csv_filename, index=False)
        
#         print(f"\n🎯 CAMPAIGN GENERATION COMPLETE!")
#         print(f"📊 Total customers processed: {len(final_df)}")
#         print(f"💾 Results saved to: {csv_filename}")
        
#         # Display summary by batch
#         print(f"\n📈 BATCH SUMMARY:")
#         summary = final_df.groupby(['campaign_batch', 'risk_category']).size().unstack(fill_value=0)
#         print(summary)
        
#         # Save to BigQuery table for campaign management
#         table_id = f"trilink-471315.database.retention_campaigns_{timestamp.replace('_', '')}"
#         final_df.to_gbq(table_id, project_id='trilink-471315', if_exists='replace')
#         print(f"📋 Campaign data uploaded to BigQuery: {table_id}")
        
#         return final_df
#     else:
#         print("❌ No results generated")
#         return None

# # Run the batch processing
# if __name__ == "__main__":
#     results = process_retention_campaigns_in_batches()
    
#     # Optional: Display sample messages
#     if results is not None and len(results) > 0:
#         print(f"\n📧 SAMPLE RETENTION MESSAGE:")
#         print("="*80)
#         sample_message = results['personalized_retention_message'].iloc[0]
#         print(sample_message.replace('\\n', '\n'))
#         print("="*80)