In [22]:
import json
import base64
import vertexai
from google.cloud import bigquery
from vertexai.generative_models import GenerativeModel, Part, SafetySetting, GenerationConfig

In [61]:
project_id = "vtxdemos"
location = "us-central1"
dataset_id = "demos_us"
table_id = "etsy-embeddings-full-version1-title"

In [62]:
response_schema = {
    "type": "object",
    "properties": {
        "answer": {"type": "string"},
        "questions_to_ask": {
            "type": "object",
            "properties": {
                "category_1": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "category_2": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "category_3": {
                    "type": "array",
                    "items": {"type": "string"}
                }
            }
        },
        "category_picked": {
            "type": "object",
            "properties": {
                "local_context_rag": {
                    "type": "boolean",
                },
                "google_search_ground": {
                    "type": "boolean",
                },
                "similar_products_rag": {
                    "type": "boolean",
                }
            }
        }
    }
}

In [63]:
bq_client = bigquery.Client(project=project_id)
vertexai.init(project=project_id, location=location)

In [64]:
df = bq_client.query(f"SELECT * except (text_embedding, ml_generate_embedding_result) FROM `{project_id}.{dataset_id}.{table_id}`").to_dataframe()

In [65]:
df["content"][0]

'listing id: 256400053 title of the listing: DOG CROSSING Sign novelty gift animals pets the price in US dollars: 8.98 the description of the listing: This is a 12&quot; tall and 12&quot; wide diamond shape sign made from weatherproof plastic with premium grade vinyl. The sign is perfect for indoor or outdoor use, made to last at least 3-4 years outside. The sign has rounded corners and 2 holes pre-drilled for ea tags used for filter: novelty sign, funny sign, tin sign, street sign, parking sign, gag gift the materials item was made of: pvc plastic, the attributes: itemdimensionsunit: in; source: web; isnipsa: 0; itemweight: 3; whenmade: made_to_order; whomade: i_did; itemweightunit: oz; listingtype: physical; shouldautorenew: 1; isnotcustomizable: 1; listing_source_detail: web; issupply: 0, the price in usd: 8.98'

In [66]:
system_instruction = """
**You are Chatsy, a friendly and helpful assistant for Etsy customers.** Your primary goal is to provide satisfying answers based on the specific context of their questions.

**Tasks:**

1. **Categorize:** Analyze each user question and determine if it's best answered using:
    * **local_context_rag:** Information from the current listing or Etsy's internal data (title, description, materials, tags, etc.).
    * **google_search_ground:**  Broader knowledge found on the internet, related to the product but beyond the explicit listing details.
      Think about potential applications, material properties, comparisons to similar items, usage scenarios, care instructions, or historical/cultural context.
      These questions should pique the customer's interest and encourage them to explore the product further.
    * **similar_products_rag:** Information from similar products or Etsy's internal data (title, description, materials, tags, etc.).

2. **Respond:**
    * **Provide the answer:** Use ONLY the appropriate source (local_context_rag or google_search_ground, similar_products_rag) to give a concise, accurate response.
    * **Suggest further questions:**  Offer 2 additional questions per each category (local_context_rag and google_search_ground, similar_products_rag) that the **user might want to ask** related to the topic or listing.
    * **Provide the category you picked:** Indicate whether you used "local_context_rag", "google_search_ground" or "similar_products_rag" to answer the question.

**Rules:**
* **Be friendly and casual:** Write like you're chatting with a friend, no need for formal explanations.
* **Honesty is key:** If you don't know the answer based on the available information, say so politely and suggest potentially relevant questions the user might want to ask.
* **Question management:**
    * Start with the preloaded questions (if any).
    * After answering a question, remove it from the list.
    * Generate 2 NEW questions (per each category) that the user might find helpful.
    * Base new questions on the context of the conversation and the product information.

**Extra reasoning thoughts:**
* **Category Selection Accuracy:** If you recommend questions under category 2 and your next iteration you get that question you should follow categorization as it is; in this case google_search_ground.
The same applies for the other categories.
"""
model = GenerativeModel(
    "gemini-1.5-flash-001",
    system_instruction=system_instruction
    )
chat = model.start_chat()

In [67]:
query = f"""
local_context_rag:
{df["content"][0]}

user-question: what is the best way to get started?
"""
re=chat.send_message(query, generation_config=GenerationConfig(response_mime_type="application/json", response_schema=response_schema))

In [68]:
_ = json.loads(re.text)
_["questions_to_ask"]

{'category_1': ['Where do you think the sign would look best in your home? ',
  'Do you have any other pets that might enjoy their own personalized signs?'],
 'category_2': ["What are some other funny or creative ways you can use this sign to brighten someone's day? ",
  "Are there any other dog-themed decorations you'd like to add to your space?"],
 'category_3': ['Do they have any other types of signs with different messages or themes? ',
  'What other funny signs do they offer?']}

In [69]:
llm_model = GenerativeModel("gemini-1.5-flash-001",)

_re = llm_model.generate_content(
    [
        f"""
        By using both the next Context and the Question do as follorws.

        Tasks:
        * **summary_text:** Summarize the context and the question into a concise summary.
        * **concise_text:** From the summary, create a new text perfect to match with other summary listings.

        Rules:
        Only 1 text as output.

        Context:
        {str(df["content"][0])}

        Question:
        {str(_["questions_to_ask"]["category_3"][0])}

        Response (concise_text only) as plain text:

     """
     ]
)

print(_re.text)

This listing is for a 12" x 12" diamond-shaped "DOG CROSSING" sign made of weatherproof plastic. The question asks if the seller offers other types of signs with different messages or themes. 



In [96]:
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
text_emb_model = TextEmbeddingModel.from_pretrained("text-embedding-004")

In [97]:
texts = [_re.text]
inputs = [TextEmbeddingInput(text, "RETRIEVAL_DOCUMENT") for text in texts]
embeddings = text_emb_model.get_embeddings(inputs)[0].values

In [98]:
from vertexai.resources.preview import feature_store
fv_text = feature_store.FeatureView(name="projects/254356041555/locations/us-central1/featureOnlineStores/fs_etsy/featureViews/fs_etsy_view_text_emb_version1")

r = fv_text.search(
    embedding_value = embeddings,
    neighbor_count = 5,
    return_full_entity=True,  # returning entities with metadata
).to_dict()

INFO:vertexai.resources.preview.feature_store.feature_view:Public endpoint for the optimized online store fs_etsy is 1711658129470521344.us-central1-254356041555.featurestore.vertexai.goog


In [None]:
r

In [74]:
df["public_cdn_link"].iloc[0]

'https://gcpetsy.sonrobots.net/etsy-version1/il_570xN.869542867_u961.jpg'

In [85]:
r["neighbors"][0]["entity_key_values"]["key_values"]["features"][0]

{'name': 'llm_title', 'value': {'string_value': 'Buffalo Bills Canvas Art \n'}}

In [99]:
import pandas as pd

def response_process(result, multimodal: bool):
  neighbors = result["neighbors"]

  all_extracted_data = []
  for row in neighbors:
    extracted_data = {}
    if multimodal:
      extracted_data['image_distance'] = row['distance']  # Extract distance
    else:
      extracted_data['text_distance'] = row['distance']  # Extract distance

    for feature in row['entity_key_values']['key_values']['features']:
      name = feature['name']
      if name not in ['ml_generate_embedding_result', 'text_embedding']:
        if 'value' in feature:
          for value_type, value in feature['value'].items():
            extracted_data[name] = value
        else:
          extracted_data[name] = "no values"

    all_extracted_data.append(extracted_data)

  dataframe = pd.DataFrame(all_extracted_data)

  return dataframe

In [100]:
df = response_process(r, False)

In [102]:
df["public_cdn_link"].iloc[1]

'https://gcpetsy.sonrobots.net/etsy-version1/il_570xN.863823355_r6sk.jpg'