# Using the Vertex AI PaLM API to explain BQML Clustering - Our Very Own Persona Builder

## Here's what we're going to build:

<p align="center">
  <img alt="Conceptual Flow" src="slides/process0.png" width="100%">
</p>

# Core code
Let's define some variables that will be used throughout this notebook.

These are the GCP Project ID `project_id`, the Model name `model_name` which is any name you prefer, and finally the Dataset name `dataset_name`.
The dataset needs to exist in the same Project as `project_id` and you'll need appropriate access to create and delete.

## Setup

In [1]:
from typing import Union
import sys

import os
import io
import json
import base64
import requests
import concurrent.futures
import time

import numpy as np
import pandas as pd

import vertexai
from vertexai.language_models import TextGenerationModel, TextEmbeddingModel
from vertexai.preview.language_models import TextGenerationModel as TextGenerationModel_preview
from vertexai.preview.generative_models import GenerativeModel, Part, Image

from google import genai # new unified SDK
from google.genai import types
from google.genai.types import GenerateContentConfig

from google.cloud import aiplatform
#from google.cloud import documentai
#from google.cloud.documentai_v1 import Document
from google.cloud import storage
from google.cloud import bigquery

from IPython.display import display, Markdown, Latex
import markdown

print("Vertex AI version: " + str(aiplatform.__version__))

Vertex AI version: 1.84.0


In [2]:
PROJECT_ID = 'mg-ce-demos'
REGION = 'us-central1'
DATASET = "bqml_demos" 
BQML_MODEL = "ecommerce_customer_segment_cluster5" 
EVAL = BQML_MODEL + "_eval"


In [3]:
# vertex ai clients
vertexai.init(project = PROJECT_ID, location = REGION)
aiplatform.init(project = PROJECT_ID, location = REGION)

# bigquery client
bq = bigquery.Client(project = PROJECT_ID)

In [4]:
# Use this if using GCP - Vertex
from google.oauth2 import service_account
import os

credentials = service_account.Credentials.from_service_account_file(
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'],
    scopes=['https://www.googleapis.com/auth/cloud-platform']
)

In [5]:
google_genai_client = genai.Client(vertexai=True, project=PROJECT_ID, location=REGION, credentials=credentials) 

In [6]:
gemini_2_pro = "gemini-2.0-pro-exp-02-05"
gemini_2_flash = "gemini-2.0-flash-001"
gemini_2_flash_thinking = 'gemini-2.0-flash-thinking-exp-01-21'

In [7]:
generate_content_config = types.GenerateContentConfig(
    temperature = 0.5,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
        category="HARM_CATEGORY_HATE_SPEECH",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_DANGEROUS_CONTENT",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_HARASSMENT",
        threshold="OFF"
    )]
)

tools = [
    types.Tool(google_search=types.GoogleSearch())
  ]

generate_content_config_w_search = types.GenerateContentConfig(
    temperature = 0.5,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    tools=tools,
    safety_settings = [types.SafetySetting(
        category="HARM_CATEGORY_HATE_SPEECH",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_DANGEROUS_CONTENT",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
        threshold="OFF"
    ),types.SafetySetting(
        category="HARM_CATEGORY_HARASSMENT",
        threshold="OFF"
    )]
)

## Create a K-means model to cluster ecommerce data

### Let's look at the data first

<p align="center">
  <img alt="Conceptual Flow" src="slides/process1.png" width="100%">
</p>

In [8]:
query = """
SELECT
  user_id,
  order_id,
  sale_price,
  created_at as order_created_date
FROM `mg-ce-demos.thelook_ecommerce.order_items`
WHERE created_at BETWEEN CAST('2022-01-01 00:00:00' AS TIMESTAMP)
AND CAST('2024-01-01 00:00:00' AS TIMESTAMP)
"""
df = bq.query(query).to_dataframe()
df.head()


I0000 00:00:1741867926.114506 71425698 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Unnamed: 0,user_id,order_id,sale_price,order_created_date
0,40085,50021,2.5,2023-04-07 08:09:34+00:00
1,90363,112961,2.5,2022-12-25 23:23:29+00:00
2,3922,4887,2.5,2022-09-29 13:47:16+00:00
3,56076,70152,2.5,2022-03-27 03:01:23+00:00
4,90828,113540,2.5,2023-03-18 08:30:11+00:00


### `CREATE MODEL` using `KMEANS`

Create a query then start the model creation job, using a python loop to wait for the job to complete

<p align="center">
  <img alt="Conceptual Flow" src="slides/process2.png" width="100%">
</p>

#### model code

In [9]:
query = """
CREATE MODEL IF NOT EXISTS `{0}.{1}`
OPTIONS (
  MODEL_TYPE = "KMEANS",
  NUM_CLUSTERS = 5,
  KMEANS_INIT_METHOD = "KMEANS++",
  STANDARDIZE_FEATURES = TRUE )
AS (
SELECT * EXCEPT (user_id)
FROM (
  SELECT user_id,
    DATE_DIFF(CURRENT_DATE(), CAST(MAX(order_created_date) as DATE), day) AS days_since_order, -- RECENCY
    COUNT(order_id) AS count_orders, -- FREQUENCY
    AVG(sale_price) AS avg_spend -- MONETARY
  FROM (
    SELECT user_id,
      order_id,
      sale_price,
      created_at as order_created_date
    FROM `mg-ce-demos.thelook_ecommerce.order_items`
    WHERE created_at BETWEEN CAST('2022-01-01 00:00:00' AS TIMESTAMP)
    AND CAST('2024-01-01 00:00:00' AS TIMESTAMP)
  )
  GROUP BY user_id, order_id
 )
)
""".format(DATASET, BQML_MODEL)


In [10]:
# Wrapper to use BigQuery client to run query/job, return job ID or result as DF
def run_bq_query(sql: str) -> Union[str, pd.DataFrame]:
    
    # Try dry run before executing query to catch any errors
    #job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    #bq.query(sql, job_config=job_config)

    # If dry run succeeds without errors, proceed to run query
    job_config = bigquery.QueryJobConfig()
    client_result = bq.query(sql, job_config=job_config)

    job_id = client_result.job_id

    # Wait for query/job to finish running. then get & return data frame
    df = client_result.result().to_arrow().to_pandas()
    print(f"Finished job_id: {job_id}")
    return df

In [11]:
%%timeit

run_bq_query(query)

Finished job_id: b8bbe9bc-1169-4415-b895-2eac61640b16
Finished job_id: d84d2e51-b425-4bd7-94ca-97f38645cd17
Finished job_id: b7340303-de5b-4619-a042-320176ecb119
Finished job_id: e50a70b1-77b0-4410-8b2c-da745afd4f96
Finished job_id: 1a7cfcb3-01a2-4d6d-b3f0-aadcf61a1972
Finished job_id: 8db258fa-1273-4cb0-9b8f-75470b480590
Finished job_id: 24019acd-01f3-4e7a-9d4d-3f9e10a14203
Finished job_id: 34e3674b-2d53-416b-9303-41fe656b42ed
The slowest run took 9.27 times longer than the fastest. This could mean that an intermediate result is being cached.
1.65 s ± 1.92 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


Let's take a look at the model's clustering performance, using these metrics - Davies Bouldin Index and Mean Squared Distance

In [12]:
query = """
SELECT *
FROM ML.EVALUATE(MODEL `{0}.{1}`)
""".format(DATASET, BQML_MODEL)

run_bq_query(query)

I0000 00:00:1741867941.414611 71425698 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Finished job_id: a141a0d7-f831-44e0-9908-e4b6a7ce75e1


Unnamed: 0,davies_bouldin_index,mean_squared_distance
0,1.023378,1.051785


### Now let's get the cluster (centroid) information

<p align="center">
  <img alt="Conceptual Flow" src="slides/process3.png" width="100%">
</p>

In [13]:
query = """
SELECT
  CONCAT('cluster ', CAST(centroid_id as STRING)) as centroid,
  avg_spend as average_spend,
  count_orders as count_of_orders,
  days_since_order
FROM (
  SELECT centroid_id, feature, ROUND(numerical_value, 2) as value
  FROM ML.CENTROIDS(MODEL `{0}.{1}`)
)
PIVOT (
  SUM(value)
  FOR feature IN ('avg_spend',  'count_orders', 'days_since_order')
)
ORDER BY centroid_id
""".format(DATASET, BQML_MODEL)

run_bq_query(query)

I0000 00:00:1741867943.610888 71425698 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Finished job_id: ff857555-755c-44e5-b44b-eac4dbc90428


Unnamed: 0,centroid,average_spend,count_of_orders,days_since_order
0,cluster 1,49.44,1.23,102.87
1,cluster 2,59.56,3.51,87.9
2,cluster 3,251.34,1.14,205.85
3,cluster 4,57.47,3.49,354.32
4,cluster 5,48.87,1.22,376.5


Whew! That's a lot of metrics and cluster info. How about we explain this to our colleagues using the magic of LLMs.

In [14]:
df = bq.query(query).to_dataframe()
df.to_string(header=False, index=False)

cluster_info = []
for i, row in df.iterrows():
  cluster_info.append("{0}, average spend ${2}, count of orders per person {1}, days since last order {3}"
    .format(row["centroid"], row["count_of_orders"], row["average_spend"], row["days_since_order"]) )

print(str.join("\n", cluster_info))

I0000 00:00:1741867945.483045 71425698 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


cluster 1, average spend $49.44, count of orders per person 1.23, days since last order 102.87
cluster 2, average spend $59.56, count of orders per person 3.51, days since last order 87.9
cluster 3, average spend $251.34, count of orders per person 1.14, days since last order 205.85
cluster 4, average spend $57.47, count of orders per person 3.49, days since last order 354.32
cluster 5, average spend $48.87, count of orders per person 1.22, days since last order 376.5


## Explain with Vertex AI PaLM API

### First, we want to instantiate the large language model and create the prompt

<p align="center">
  <img alt="Conceptual Flow" src="slides/process4.png" width="100%">
</p>

In [16]:
def gemini_generate(prompt, print_stream=True):
    result = []
    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=prompt)]
        )
    ]
    for chunk in google_genai_client.models.generate_content_stream(
        model = gemini_2_flash, 
        contents = contents, 
        config = generate_content_config,
    ):
        result.append(chunk.text)
        if print_stream == True:
            print(chunk.text, end="")

    result = ''.join(result)
    return result

In [17]:
preamble = """You are a creative strategist, given the following clusters come up with creative brand persona and title labels for each of these clusters, and explain step by step; what would be the next marketing step for these clusters:

"""
prompt = preamble + "\n" + str.join("\n", cluster_info)
display(Markdown('## Prompt:'))
print(prompt)
#print(preamble)
#print(str.join("\n", cluster_info))

## Prompt:

You are a creative strategist, given the following clusters come up with creative brand persona and title labels for each of these clusters, and explain step by step; what would be the next marketing step for these clusters:


cluster 1, average spend $49.44, count of orders per person 1.23, days since last order 102.87
cluster 2, average spend $59.56, count of orders per person 3.51, days since last order 87.9
cluster 3, average spend $251.34, count of orders per person 1.14, days since last order 205.85
cluster 4, average spend $57.47, count of orders per person 3.49, days since last order 354.32
cluster 5, average spend $48.87, count of orders per person 1.22, days since last order 376.5


### Now, we send our prompt to Google GenAI API for some LLM magic

<p align="center">
  <img alt="Conceptual Flow" src="slides/process5.png" width="100%">
</p>

In [19]:
response = gemini_generate(prompt)

Okay, here's a breakdown of the clusters, creative persona development, title labels, and the next marketing steps, designed to be actionable and strategically sound.

**Step 1: Data Analysis & Initial Impressions**

Before diving into personas, let's quickly recap what the data is telling us:

 *per order* on average.ow much each customer spends
*   **Count of Orders per Person:**  How frequently they're buying.
A measure of recency – how recently they've interacted with the brand.

**Step 2: Creative Brand Persona & Title Label Development**

, and a title label that summarizes their behavior. This helps us humanize the data and craft targeted messaging.

*   **Cluster 1: The "Casual Browser"**
. *Description:* Brenda is a busy individual who occasionally stumbles upon the brand, makes a small purchase, and then doesn't think about it again for a while. She's not particularly loyal or engaged. She's likely price-sensitive and driven by immediate needs.
asional Spender"Label:** "Occ
 

In [20]:
display(Markdown('## Markdown Response:'))
display(Markdown(response))

## Markdown Response:

Okay, here's a breakdown of the clusters, creative persona development, title labels, and the next marketing steps, designed to be actionable and strategically sound.

**Step 1: Data Analysis & Initial Impressions**

Before diving into personas, let's quickly recap what the data is telling us:

*   **Average Spend:** How much each customer spends *per order* on average.
*   **Count of Orders per Person:**  How frequently they're buying.
*   **Days Since Last Order:**  A measure of recency – how recently they've interacted with the brand.

**Step 2: Creative Brand Persona & Title Label Development**

We'll create a persona for each cluster, giving them a name, relatable characteristics, and a title label that summarizes their behavior. This helps us humanize the data and craft targeted messaging.

*   **Cluster 1: The "Casual Browser"**
    *   **Persona:**  *Name:* Brenda. *Description:* Brenda is a busy individual who occasionally stumbles upon the brand, makes a small purchase, and then doesn't think about it again for a while. She's not particularly loyal or engaged. She's likely price-sensitive and driven by immediate needs.
    *   **Title Label:** "Occasional Spender"
    *   **Rationale:** Low average spend, low order frequency, and long time since last order indicate infrequent, low-commitment engagement.

*   **Cluster 2: The "Value Seeker"**
    *   **Persona:** *Name:* Kevin. *Description:* Kevin likes the brand and buys relatively frequently, but he's price-conscious. He's likely looking for good deals and value for his money. He may be a regular user of the product/service but isn't necessarily a high-roller.
    *   **Title Label:** "Frequent Value Shopper"
    *   **Rationale:** Moderate average spend, high order frequency, and relatively short time since last order suggest regular purchases driven by value.

*   **Cluster 3: The "Big Ticket Buyer"**
    *   **Persona:** *Name:*  Olivia. *Description:* Olivia makes infrequent but significant purchases. She might be buying high-end items, gifts, or products/services with a long purchase cycle. She's not a frequent shopper, but when she does buy, she spends a lot.
    *   **Title Label:** "High-Value, Infrequent Purchaser"
    *   **Rationale:** High average spend, low order frequency, and long time since last order point to infrequent, high-value transactions.

*   **Cluster 4: The "Dormant Regular"**
    *   **Persona:** *Name:*  David. *Description:* David used to be a regular customer, buying frequently and spending a moderate amount. However, he hasn't purchased anything in a long time. He may have forgotten about the brand, found an alternative, or experienced a change in circumstances.
    *   **Title Label:** "Lapsed Loyal Customer"
    *   **Rationale:** Moderate average spend, high order frequency, but *very* long time since last order indicates a previously engaged customer who has become inactive.

*   **Cluster 5: The "Forgotten Visitor"**
    *   **Persona:** *Name:*  Sarah. *Description:* Sarah visited the store once, made a small purchase, and hasn't been back since. She may have been a one-time visitor driven by a specific need or promotion. She likely doesn't have a strong connection with the brand.
    *   **Title Label:** "One-Time Buyer"
    *   **Rationale:** Low average spend, low order frequency, and *extremely* long time since last order suggest a single, isolated transaction.

**Step 3: Next Marketing Steps - Tailored Strategies for Each Cluster**

Now, let's outline specific marketing actions for each cluster, focusing on maximizing ROI and customer lifetime value.

*   **Cluster 1: The "Casual Browser" (Occasional Spender)**
    *   **Goal:** Increase frequency and average spend.
    *   **Marketing Actions:**
        *   **Targeted Email Campaigns:**  Send personalized emails highlighting relevant products based on their past purchase.  Include a small discount or free shipping to incentivize a second purchase.
        *   **Retargeting Ads:**  Use retargeting ads on social media and other websites to remind them of the brand and showcase new arrivals or popular items.
        *   **"Welcome Back" Offers:**  If they haven't purchased in a while (e.g., 60 days), trigger a special "We Miss You!" email with a compelling offer.
        *   **Content Marketing:**  Create blog posts or social media content that addresses their potential needs and interests.
    *   **Metrics:** Track email open rates, click-through rates, conversion rates, and average order value.

*   **Cluster 2: The "Value Seeker" (Frequent Value Shopper)**
    *   **Goal:** Increase average spend and build loyalty.
    *   **Marketing Actions:**
        *   **Loyalty Program:**  Implement a loyalty program that rewards repeat purchases with points, discounts, or exclusive perks.
        *   **Bundling & Upselling:**  Offer product bundles or suggest higher-priced alternatives that provide better value.
        *   **Early Access to Sales:**  Give them early access to sales and promotions to reward their loyalty.
        *   **Personalized Recommendations:**  Use their purchase history to recommend relevant products and create a personalized shopping experience.
    *   **Metrics:** Track loyalty program enrollment, repeat purchase rate, average order value, and customer lifetime value.

*   **Cluster 3: The "Big Ticket Buyer" (High-Value, Infrequent Purchaser)**
    *   **Goal:** Increase purchase frequency and build a stronger relationship.
    *   **Marketing Actions:**
        *   **Personalized Outreach:**  Send personalized emails or even handwritten notes to thank them for their purchase and offer exclusive assistance.
        *   **VIP Treatment:**  Offer them access to exclusive events, product previews, or personalized styling advice.
        *   **Relationship Building:**  Focus on building a genuine relationship through personalized communication and exceptional customer service.
        *   **Product Education:**  Provide them with detailed information about new products or services that align with their interests.
    *   **Metrics:** Track response rates to personalized outreach, customer satisfaction scores, and repeat purchase rate.

*   **Cluster 4: The "Dormant Regular" (Lapsed Loyal Customer)**
    *   **Goal:** Reactivate them and win them back.
    *   **Marketing Actions:**
        *   **"We Miss You" Campaign:**  Send a series of emails with increasingly enticing offers, such as a significant discount, free shipping, or a free gift.
        *   **Personalized Apology:**  Acknowledge their past loyalty and apologize for any potential issues that may have caused them to leave.
        *   **Highlight New Features/Products:**  Showcase any new features, products, or services that have been introduced since their last purchase.
        *   **Survey for Feedback:**  Ask them for feedback on their past experience to understand why they stopped buying.
    *   **Metrics:** Track email open rates, click-through rates, reactivation rate, and customer satisfaction scores.

*   **Cluster 5: The "Forgotten Visitor" (One-Time Buyer)**
    *   **Goal:**  Introduce them to the brand and encourage a second purchase.
    *   **Marketing Actions:**
        *   **Welcome Series:**  Send a series of welcome emails that introduce the brand, its values, and its key products/services.
        *   **Educational Content:**  Provide them with valuable content that addresses their potential needs and interests.
        *   **Special Offer:**  Offer them a special discount or free gift to incentivize a second purchase.
        *   **Social Media Engagement:**  Encourage them to follow the brand on social media to stay updated on new products, promotions, and events.
    *   **Metrics:** Track email open rates, click-through rates, conversion rates, and social media engagement.

**Step 4: Testing, Iteration, and Refinement**

*   **A/B Testing:**  Continuously test different subject lines, email content, offers, and ad creatives to optimize campaign performance.
*   **Segmentation Refinement:**  Monitor the performance of each cluster and refine the segmentation criteria as needed.
*   **Data-Driven Decisions:**  Make all marketing decisions based on data and analytics.

**Key Considerations:**

*   **Personalization is Key:**  The more personalized the messaging, the more effective it will be.
*   **Value Proposition:**  Clearly communicate the value proposition of the brand and its products/services.
*   **Customer Journey:**  Map out the customer journey for each cluster and identify opportunities to improve the experience.
*   **Automation:**  Automate as much of the marketing process as possible to save time and resources.

By following these steps, you can transform your customer data into actionable insights and create targeted marketing campaigns that drive results. Remember to continuously monitor, analyze, and optimize your efforts to maximize your return on investment.


Voila! We've now used k-means clustering to create groups of spenders and explain their profiles.

Sometimes, though, you want a little bit [extra](https://cloud.google.com/blog/transform/prompt-debunking-five-generative-ai-misconceptions).

In [22]:
preamble = """Pretend you're a creative strategist, analyse the following clusters and come up with creative brand persona for each that includes the detail of their favorite movie, a summary of how this relates to their purchasing behavior, and a witty e-mail headline for marketing campaign targeted to their group:

"""
prompt = preamble + "\n" + str.join("\n", cluster_info)

display(Markdown('## Prompt:'))
print(prompt)

response = gemini_generate(prompt, print_stream=False)

display(Markdown('## Markdown Response:'))
display(Markdown(response))

## Prompt:

Pretend you're a creative strategist, analyse the following clusters and come up with creative brand persona for each that includes the detail of their favorite movie, a summary of how this relates to their purchasing behavior, and a witty e-mail headline for marketing campaign targeted to their group:


cluster 1, average spend $49.44, count of orders per person 1.23, days since last order 102.87
cluster 2, average spend $59.56, count of orders per person 3.51, days since last order 87.9
cluster 3, average spend $251.34, count of orders per person 1.14, days since last order 205.85
cluster 4, average spend $57.47, count of orders per person 3.49, days since last order 354.32
cluster 5, average spend $48.87, count of orders per person 1.22, days since last order 376.5


## Markdown Response:

Okay, here's a breakdown of the clusters, complete with creative brand personas, movie tie-ins, purchasing behavior analysis, and witty email headlines. I'm aiming for memorable and insightful, not just generic.

**Cluster 1: The "Occasional Browsers"**

*   **Description:** Low average spend, low order frequency, and a long time since their last purchase. These are likely casual customers who aren't particularly engaged.

*   **Brand Persona:** **"Serene Sally"** - A busy professional (maybe a teacher or librarian) who enjoys quality but doesn't have a lot of time for shopping. She appreciates simple, reliable products.

*   **Favorite Movie:** *Amelie* (2001) - A whimsical, feel-good movie about finding joy in small things and helping others. Sally appreciates the film's understated beauty and focus on everyday moments.

*   **Purchasing Behavior:** Sally's purchases are infrequent and often driven by specific needs or recommendations. She's likely to be influenced by positive reviews and clear, concise product descriptions. She values authenticity and ethical sourcing. She's not easily swayed by trends or aggressive marketing.

*   **Email Headline:** "Psst... Remember Us? A Little Something to Brighten Your Day." (Gentle reminder, personalized, and hints at value.)

**Cluster 2: The "Reliable Regulars"**

*   **Description:** Moderate average spend, high order frequency, and a relatively short time since their last purchase. These are loyal customers who appreciate value and convenience.

*   **Brand Persona:** **"Practical Pete"** - A pragmatic and efficient individual (perhaps an engineer or project manager) who values functionality and good deals. He's a planner and likes to stock up on essentials.

*   **Favorite Movie:** *Apollo 13* (1995) - A suspenseful and inspiring film about problem-solving under pressure. Pete admires the ingenuity, teamwork, and resourcefulness displayed in the face of adversity.

*   **Purchasing Behavior:** Pete is driven by value and efficiency. He's likely to respond to promotions, discounts, and subscription options. He appreciates clear product information and reliable shipping. He's a good candidate for loyalty programs and bundled offers.

*   **Email Headline:** "Your Favorites Are Back in Stock (Plus a Little Something Extra)." (Highlights availability and offers an incentive.)

**Cluster 3: The "Luxury Lifestylers"**

*   **Description:** High average spend, low order frequency, and a long time since their last purchase. These are high-value customers who likely make infrequent but significant purchases.

*   **Brand Persona:** **"Elegant Eleanor"** - A discerning and sophisticated individual (perhaps a consultant or entrepreneur) who appreciates luxury and exclusivity. She values quality, craftsmanship, and personalized service.

*   **Favorite Movie:** *Casablanca* (1942) - A classic film known for its timeless elegance, romance, and intrigue. Eleanor appreciates the film's sophisticated dialogue, iconic style, and sense of timelessness.

*   **Purchasing Behavior:** Eleanor is motivated by quality, exclusivity, and personalized experiences. She's willing to pay a premium for products that align with her refined taste. She's likely to be influenced by high-end marketing, celebrity endorsements, and personalized recommendations.

*   **Email Headline:** "A Curated Collection Just For You, Eleanor." (Personalized, emphasizes exclusivity and curation.)

**Cluster 4: The "Forgotten Fans"**

*   **Description:** Moderate average spend, high order frequency, but a *very* long time since their last purchase. These were once loyal customers who have drifted away.

*   **Brand Persona:** **"Nostalgic Nate"** - A sentimental and loyal individual (perhaps a retiree or long-time community member) who appreciates familiar brands and reliable service. He may have switched due to a bad experience or simply forgotten about the brand.

*   **Favorite Movie:** *Back to the Future* (1985) - A nostalgic and feel-good film about time travel and rediscovering the past. Nate appreciates the film's sense of adventure, humor, and heartwarming message about the importance of family and friendship.

*   **Purchasing Behavior:** Nate needs to be reminded of the positive experiences he had with the brand. He's likely to respond to personalized offers, heartfelt messages, and reminders of the brand's history and values. He may be hesitant to return without an incentive or reassurance.

*   **Email Headline:** "We Miss You, Nate! Here's a Little Something to Welcome You Back." (Acknowledges absence, offers a welcome-back gift.)

**Cluster 5: The "Bargain Hunters"**

*   **Description:** Low average spend, low order frequency, and a *very* long time since their last purchase. These customers are likely price-sensitive and only purchase when there's a significant discount.

*   **Brand Persona:** **"Savvy Sarah"** - A resourceful and budget-conscious individual (perhaps a student or young professional) who loves a good deal and is always on the lookout for discounts and promotions.

*   **Favorite Movie:** *Catch Me If You Can* (2002) - A clever and entertaining film about a con artist who outsmarts the authorities. Sarah appreciates the film's ingenuity, humor, and fast-paced plot.

*   **Purchasing Behavior:** Sarah is primarily motivated by price. She's likely to respond to flash sales, coupon codes, and clearance events. She's not particularly loyal to any one brand and will switch to a competitor if they offer a better deal.

*   **Email Headline:** "Major Savings Inside! Don't Miss Out on These Limited-Time Deals." (Highlights the urgency and potential savings.)

**Key Takeaways:**

*   **Personalization is Key:** Tailor your marketing messages to resonate with each cluster's unique motivations and preferences.
*   **Value Proposition:** Clearly communicate the value you offer to each customer segment, whether it's quality, convenience, exclusivity, or affordability.
*   **Re-engagement Strategies:** Focus on re-engaging lapsed customers (Clusters 4 and 5) with personalized offers and reminders of the positive experiences they had with your brand.
*   **Data-Driven Decisions:** Continuously analyze your customer data to refine your targeting and optimize your marketing campaigns.

By understanding your customers on a deeper level, you can create more effective marketing campaigns that drive engagement, loyalty, and ultimately, sales. Good luck!


In [27]:
def persona_builder(preamble):
    pre_text = """Pretend you're a creative strategist, given the following clusters """
    prompt = pre_text + preamble + "\n\n" + str.join("\n", cluster_info)
    #result = textgen_model.predict(
    #    prompt,
    #    max_output_tokens=1024,
    #    temperature=0.4,
    #    top_p=0.8,
    #    top_k=40,
    #)

    response = gemini_generate(prompt, print_stream=False)
    
    return prompt, response

## Now we can stick it behind a UI

<p align="center">
  <img alt="Conceptual Flow" src="slides/process6.png" width="100%">
</p>

In [28]:
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown(
    """
    ## Persona Builder and Marketing Bot
    """)
    with gr.Row():
        input_text = gr.Textbox(label="Pretend you're a creative strategist, given the following clusters: ", 
                                value=" analyse the following clusters and come up with creative brand persona for each that includes the detail of their favorite kind of sunglasses and a detailed description of an image including an animal wearing their favorite type of sunglasses:") 
    with gr.Row():
        generate = gr.Button("Generate Response")
    with gr.Row():
        label2 = gr.Textbox(label="Prompt")
    with gr.Row():
        label3 = gr.Markdown(label="Response generated by LLM")

    generate.click(persona_builder,input_text, [label2, label3])
    
demo.launch(share=False, debug=False)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


