In [8]:
from openai import OpenAI
import os
import base64
import requests

from dotenv import load_dotenv
# Load API key
_ = load_dotenv()

In [9]:
client = OpenAI()

In [10]:
from pathlib import Path
                               
BASE = Path.cwd()                
BASE.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    path = BASE / name
    df.to_csv(path, index=False)
    print("✅  Saved →", path)

In [14]:
import json
import random
import pandas as pd

# Setup data
industries = [
    "FinTech", "Healthcare", "E‑commerce", "Aerospace", "Gaming",
    "EdTech", "Agriculture", "Automotive", "Fashion", "Sports Analytics",
    "Digital Art", "Cloud Security", "Hospitality", "Real Estate", "Media",
    "Food Tech", "Robotics", "Pharma", "Non‑profit", "Energy Trading",
    "AI Research", "Legal Tech", "Logistics", "Insurance", "Telecom"
]

catalog = ["Electronics", "Fashion", "Home & Garden", "Sports", "Books", 
           "Beauty", "Toys", "Food & Grocery", "Health", "Automotive"]
offer_library = [f"OFFER{str(i).zfill(2)}" for i in range(1,11)]


def gpt_user_capsule(role, industry):
    prompt = (
        f"Create one fictitious LinkedIn headline that *must* read like "
        f"\"{role} | {industry}\". Generate three liked tweets *not* about sustainability "
        f"unless the industry logically implies it. Also invent a 3‑step e‑commerce "
        f"journey log (event_1..3), dwell_times (3 floats), sku_views (3 ints). "
        f"Finally, pick TWO product categories that would interest this user "
        f"from the catalog {catalog} and the single best offer ID from {offer_library}. "
        f"Return ONLY valid JSON with keys: linkedin_headline, liked_tweets, "
        f"event_1, event_2, event_3, dwell_times, sku_views, cat_gt_1, cat_gt_2, best_offer_id."
    )
    resp = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.8,
        response_format={"type":"json_object"}
    )
    # Convert the JSON string to a Python dictionary
    return json.loads(resp.choices[0].message.content)

# Generate data
rows = []
roles = [
    # Tech & Engineering
    "Chief Technology Officer", "Software Engineer", "DevOps Architect", "AI Research Scientist",
    "Cybersecurity Analyst", "Blockchain Developer",

    # Product & Design
    "Product Manager", "UX Lead", "Interaction Designer", "Design Systems Advocate",
    "Game Producer", "Hardware Product Owner",

    # Data & Analytics
    "Data Scientist", "Machine Learning Engineer", "Business Intelligence Lead",
    "Quantitative Analyst", "Customer Insights Manager",

    # Marketing & Growth
    "Growth Marketer", "Digital Campaign Strategist", "SEO Specialist",
    "Content Marketing Director", "Social Media Manager",

    # Sales & Customer
    "VP of Sales", "Account Executive", "Customer Success Lead",
    "Partnerships Manager", "Solutions Consultant",

    # Operations & Finance
    "Chief Operating Officer", "Supply‑Chain Analyst", "Finance Controller",
    "Revenue Operations Manager", "Procurement Specialist",

    # HR & People Ops
    "Chief People Officer", "Recruiting Coordinator", "L&D Program Manager",

    # Industry‑Specific / Misc
    "Biotech Lab Manager", "Pharmaceutical Regulatory Lead", "Renewable Energy Project Engineer",
    "Aerospace Systems Analyst", "Esports Team Coach", "Legal Tech Counsel"
]

random.shuffle(industries)
for uid in range(1, 26):
    role = random.choice(roles)
    industry = industries[uid-1]         # unique per user
    data = gpt_user_capsule(role, industry)
    data["user_id"] = uid
    rows.append(data)
    
personalization_df = pd.json_normalize(rows)
save_df(personalization_df, "personalization_data.csv")

✅  Saved → /Commjhub/jupyterhub/home/jaasminew/comm4190_S25/comm4190_S25_Research_Paper/personalization_data.csv


In [17]:
prompt_tmpl = """
Create ONE campaign brief for a product in the category: {CATEGORY}

Return as JSON with these keys:
- product (a specific product in the {CATEGORY} category - be very specific and creative with the name)
- theme (unique selling proposition)
- value_prop (≤12 words describing key benefit)
- target_audience (≤8 words describing ideal customer)
- spec_bullets (EXACTLY five technical specifications as a JSON array)
- seo_keywords (EXACTLY six SEO keywords as a JSON array)

Make the product specific, not generic. For example, instead of "Headphones", use "NightOwl Noise-Cancelling DJ Headphones".
"""

# Define diverse product categories
product_categories = [
    "Smart Home Technology",
    "Fitness Equipment",
    "Fashion Accessories",
    "Gourmet Food",
    "Personal Care",
    "Office Supplies",
    "Pet Products",
    "Outdoor Gear",
    "Educational Toys",
    "Home Décor"
]

def gpt_campaign_brief(category):
    prompt = prompt_tmpl.replace("{CATEGORY}", category)
    
    # Try up to 3 times to get valid data
    for attempt in range(3):
        resp = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role":"user","content":prompt}],
            temperature=0.7,
            response_format={"type":"json_object"}  
        )
        
        try:
            data = json.loads(resp.choices[0].message.content)
            
            # Validate required fields
            required_fields = ["product", "theme", "value_prop", "target_audience", 
                              "spec_bullets", "seo_keywords"]
            
            if all(field in data for field in required_fields):
                # Validate arrays have correct length and format
                if (isinstance(data["spec_bullets"], list) and len(data["spec_bullets"]) == 5 and
                   isinstance(data["seo_keywords"], list) and len(data["seo_keywords"]) == 6):
                    return data
        except Exception as e:
            print(f"Attempt {attempt+1} failed: {e}")
            continue
            
    # If we get here, all attempts failed
    raise Exception(f"Failed to generate valid campaign brief for {category} after 3 attempts")

# Generate the data
rows = []
for cid, category in enumerate(product_categories, 1):
    print(f"Generating campaign for category: {category}")
    data = gpt_campaign_brief(category)
    
    # Add campaign ID
    data["campaign_id"] = cid
    
    # Ensure lists are properly formatted for CSV
    data["spec_bullets"] = ", ".join(data["spec_bullets"])
    data["seo_keywords"] = ", ".join(data["seo_keywords"])
    
    rows.append(data)

# Create DataFrame and save
content_df = pd.DataFrame(rows)
content_df.to_csv("content_data.csv", index=False)
print("CSV generation complete!")

Generating campaign for category: Smart Home Technology
Generating campaign for category: Fitness Equipment
Generating campaign for category: Fashion Accessories
Generating campaign for category: Gourmet Food
Generating campaign for category: Personal Care
Generating campaign for category: Office Supplies
Generating campaign for category: Pet Products
Generating campaign for category: Outdoor Gear
Generating campaign for category: Educational Toys
Generating campaign for category: Home Décor
CSV generation complete!
