<a href="https://colab.research.google.com/github/abdurrahmanrussel/gmc-product-highlights/blob/main/02_generate_highlights_using%20_gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# =============================
# 1. Install Gemini Client
# =============================
!pip install -q -U google-genai

import os
from google import genai
from google.genai import types
import pandas as pd
from google.colab import files
import re

# =============================
# 2. Set your Gemini API Key (Secure Input)
# =============================
import getpass

api_key = getpass.getpass("🔐 Enter your Gemini API Key: ")
os.environ["GEMINI_API_KEY"] = api_key
client = genai.Client()


# =============================
# 3. Upload CSV File
# =============================
uploaded = files.upload()
input_file = list(uploaded.keys())[0]  # first uploaded file
print(f"Uploaded file: {input_file}")

df = pd.read_csv(input_file)

# =============================
# 4. Helper Functions
# =============================
def extract_all_product_info(row):
    """Convert row to dict, skipping NaN values"""
    return {col: str(row[col]) for col in row.index if pd.notna(row[col])}

def detect_language_from_row(row):
    """
    Scan all columns in a product row and look for a 'language: xx' pattern.
    Prioritize the exact 'language: xx' column from input CSV.
    Returns the 2-letter language code if found, else default to 'en'.
    """
    for col in row.index:
        value = str(row[col])
        # Match exact pattern 'language: xx' anywhere in the cell
        match = re.search(r'language\s*:\s*([a-z]{2})', value, re.IGNORECASE)
        if match:
            return match.group(1).lower()  # e.g., 'en', 'ar', 'sv', etc.
    return "en"  # default if not found


def generate_highlights_batch(product_infos, languages):
    """
    Generate highlights for a batch of products, preserving their language.
    'languages' is a list of language codes corresponding to each product.
    """
    products_text = "\n\n".join([
        "\n".join([f"{k}: {v}" for k, v in info.items()])
        for info in product_infos
    ])

    # Build the prompt with language info for each product
    prompt = f"""
    You are an e-commerce product data specialist.
For each product below, generate exactly 10 product highlights.
Each product has a language code given (e.g., 'en', 'ar', 'sv'), and all highlights must be in that language.

STRICT RULES:
- Each highlight must be factual, clean, meaningful, and variant-specific.
- Always use proper grammar and natural phrasing.
- Do not use vague words; expand vague phrases into specific, verifiable details.
- Do not mention stock, availability, or adult content.
- Always capitalize material names (e.g., "Calfskin Leather", "Stainless Steel").
- Each highlight must be one clean sentence fragment (no full sentences).
- No numbers, bullets, or markdown.
- Avoid repeating words across highlights.
- Skip measure entirely if missing or zero. Do not output "0" or any placeholder.

ADDITIONAL RULES TO FIX PREVIOUS ISSUES:
- Do NOT include brand names or company references. Focus only on product materials, design, and features.
- Avoid vague adjectives like "superior," "premium," "durable," "protective"; describe verifiable features like leather type, stitching, coating.
- Do NOT use marketing words like "boasts," "features," "offers," or "includes"; use functional/action phrasing (e.g., "Stitched with bonded nylon thread to resist fraying").
- Each highlight must contain only one product feature; split multi-feature highlights into separate highlights.
- Use natural phrasing for device compatibility: "Fits iPhone 14 Plus" or "Compatible with Galaxy S24 Ultra," avoid repeating product title.
- Weight highlights must connect to context, e.g., "Slim design, just 250 g (8.8 oz) for pocket-friendly carry."
- Color highlights must focus on a single aspect with undertone, patina, gloss, gradient, or perception.

ADDITIONAL ENHANCEMENTS FOR 10/10:
- Expand short highlights up to ~150 characters while remaining factual.
- Split multi-feature highlights into single-feature fragments.
- Vary phrasing across similar products to avoid templated repetition.
- Ensure language correctness for each highlight based on the product language code.
- Cover device fit, material, craftsmanship, protection, color, and weight when available.

=== SPECIAL RULES ===
Weight:
- Include both grams and ounces (1 g ≈ 0.035 oz) when present.
- Skip entirely if missing, zero, or empty.
- Connect weight naturally to product-specific usage, comfort, or portability.

Color:
- Focus strictly on color qualities.
- Each highlight must describe one color aspect with undertone, gloss, gradient, patina, or perception.
- Keep color highlights concise, 50–110 characters.

Products info:
{products_text}

Product languages (in order): {', '.join(languages)}

Output format:
=== Product 1 ===
highlight 1
...
highlight 10
=== Product 2 ===
highlight 1
...
=== Product N ===
    """

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(thinking_budget=0)
        )
    )

    lines = [l.strip() for l in response.text.split("\n") if l.strip()]
    highlights_per_product = []
    current = []

    for line in lines:
        if line.startswith("==="):
            if current:
                highlights_per_product.append(current[:10] + [""]*(10-len(current)))
                current = []
        else:
            clean_line = line.lstrip("0123456789.-* ").strip()
            if clean_line:
                current.append(clean_line)

    if current:
        highlights_per_product.append(current[:10] + [""]*(10-len(current)))

    return highlights_per_product

# =============================
# 5. Process Products in Batches
# =============================
batch_size = 3
new_rows = []

for start in range(0, len(df.head(30)), batch_size):
    batch_df = df.iloc[start:start+batch_size]
    product_infos = [extract_all_product_info(row) for _, row in batch_df.iterrows()]
    # Detect language from all columns; default to 'en'
    languages = [detect_language_from_row(row) or "en" for _, row in batch_df.iterrows()]
    batch_highlights = generate_highlights_batch(product_infos, languages)

    for (idx, row), highlights in zip(batch_df.iterrows(), batch_highlights):
        new_row = [row['id']] + highlights
        new_rows.append(new_row)

# =============================
# 6. Save Output CSV
# =============================
columns = ["id"] + [f"product_highlight_{i}" for i in range(1, 11)]
output_df = pd.DataFrame(new_rows, columns=columns)
output_df.to_csv("products_highlights_30.csv", index=False, encoding="utf-8-sig")
print("✅ Saved products_highlights_30.csv")

# =============================
# 7. Download File
# =============================
files.download("products_highlights_30.csv")

df = pd.read_csv("products_highlights_30.csv")
print("🔍 Preview of output data:")
display(df.head(5))




🔐 Enter your Gemini API Key: ··········


Saving products_part_1.csv to products_part_1.csv
Uploaded file: products_part_1.csv
✅ Saved products_highlights_30.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

🔍 Preview of output data:


Unnamed: 0,id,product_highlight_1,product_highlight_2,product_highlight_3,product_highlight_4,product_highlight_5,product_highlight_6,product_highlight_7,product_highlight_8,product_highlight_9,product_highlight_10
0,shopify_US_6964827127963_40737345863835,مصنوع من الجلد من مصادر مستدامة لتقليل التأثير...,يتميز بمعدن غير قابل للصدأ مصمم للمتانة ومقاوم...,يتضمن علامة زودياك مضغوطة بعمق لضمان عدم تلاشي...,مصمم بحرفية دقيقة لتقديم مظهر أنيق ومتميز.,مناسب للارتداء اليومي أو للمناسبات الخاصة.,يزن 50 جرامًا (1.76 أونصة)، مما يضمن خفة الوزن...,يجمع بين الأناقة العصرية والتصميم الكلاسيكي.,نسيج ناعم من الجلد يوفر شعورًا مريحًا على البشرة.,يتميز بلون برج الدلو، مما يمنح إطلالة فريدة وش...,سوار مصمم ليناسب معصم البالغين بشكل مثالي.
1,shopify_US_7043384770715_41052252536987,Fremstillet af vandafvisende Waxed Canvas for ...,"Har detaljer i Full-Grain Leather, der tilføje...",Udstyret med et justerbart polstret skulderrem...,Designet med dedikerede kamera-indsatser til s...,"Indeholder et rummeligt hovedrum, der passer t...",Integreret bomuldslærred sidelomme giver nem a...,Velegnet til DSLR- og spejlløse kameraer samt ...,Åbningsmetoden med et dæksel sikrer indholdet ...,"Tasken vejer 1120 g (39.5 oz), en robust konst...","Kaki-farven præsenterer en alsidig, diskret jo..."
2,shopify_US_6964739539099_40736949797019,"Valmistettu monikerroksisesta laadukkaasta, ke...","Sisältää lasikoristeen, joka lisää hienostunee...",Suunniteltu Unisex-käyttöön sopimaan monipuoli...,Rannekoru on käsintehty huolellisesti jokaisen...,Horoskooppimerkki Jousimies on integroitu rann...,"Kevyt muotoilu, painaa vain 13 g (0.46 oz) muk...",Sopii aikuisille tuomaan tyyliä ja merkityksel...,Yhdistettävissä moniin asukokonaisuuksiin casu...,Materiaalien yhdistelmä tarjoaa kestävyyttä ja...,Jousimies-värinen sävy ilmentää astrologista t...
3,shopify_US_7663237366017_43531638767873,Passar iPhone 14-modeller med exakt form och å...,"Tillverkat av italienskt Kalvskinnsläder, nogg...",Har ett unikt krokodilmönster präglat i lädret...,Designat med en glansig finish som ger en sofi...,Fodrad med mikrofibermocka på insidan för ett ...,Handgjord konstruktion som säkerställer precis...,"Kompatibelt med Qi trådlös laddning, vilket mö...",Inkluderar något upphöjda kanter runt skärmen ...,Har en slank profil som bidrar till en bekväm ...,"Väger endast 220 g (7.8 oz), vilket gör det lä..."
4,shopify_US_5741825032347_36530108399771,"Fits iPhone 12 models, ensuring precise cutout...","Constructed from Alcantara material, known for...","Finished in a deep, vibrant Blue, offering a r...","Features a slim profile, minimizing bulk for a...","Designed for precise fit, enhancing the phone’...",Offers tactile button covers for responsive us...,"Provides a comfortable, non-slip grip due to t...","Crafted to maintain a lightweight feel, adding...",Engineered for easy installation and removal w...,Complements the phone's aesthetics with its sl...
