In [1]:
import pandas as pd

In [4]:
import pandas as pd
import re

# Read the CSV file
df = pd.read_csv('facebook_ads_electric_vehicles_with_openai_summaries.csv')

def extract_themes(summary):
    """Extract all themes from OpenAI summary text"""
    if pd.isna(summary):
        return []
    
    # Pattern to match **Theme:** format
    pattern = r'\*\*([^*]+):\*\*'
    themes = re.findall(pattern, summary)
    
    # Clean up the themes (remove extra spaces)
    themes = [theme.strip() for theme in themes]
    
    return themes

def extract_specific_theme_content(summary, theme_name):
    """Extract content for a specific theme"""
    if pd.isna(summary):
        return ""
    
    # Pattern to match **Theme:** followed by content until next ** or end
    pattern = rf'\*\*{re.escape(theme_name)}:\*\*\s*(.*?)(?=\*\*|$)'
    match = re.search(pattern, summary, re.DOTALL)
    
    if match:
        content = match.group(1).strip()
        # Clean up extra newlines and spaces
        content = re.sub(r'\n+', ' ', content)
        content = re.sub(r'\s+', ' ', content)
        return content
    return ""

# Extract all themes as lists
df['all_themes'] = df['openai_summary'].apply(extract_themes)

# Extract specific theme content
df['brand_product_focus'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Brand & Product Focus')
)

df['exterior_design'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Exterior Design')
)

# You can add more specific themes as needed
df['key_message_slogan'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Key Message/Slogan')
)

df['performance'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Performance')
)

df['range_charging'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Range/Charging')
)

df['interior_comfort'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Interior/Comfort')
)

df['safety_assistance'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Safety/Assistance')
)

df['connectivity_digital'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Connectivity/Digital')
)

df['infotainment_audio'] = df['openai_summary'].apply(
    lambda x: extract_specific_theme_content(x, 'Infotainment/Audio')
)

# Show sample results
print("Sample of extracted themes:")
print("\nAll themes found:")
sample_df = df[df['openai_summary'].notna()].head(5)
for idx, row in sample_df.iterrows():
    print(f"\nRecord {idx}:")
    print(f"All themes: {row['all_themes']}")
    if row['brand_product_focus']:
        print(f"Brand & Product Focus: {row['brand_product_focus'][:100]}...")
    if row['exterior_design']:
        print(f"Exterior Design: {row['exterior_design'][:100]}...")

# Get unique themes across all summaries
all_unique_themes = set()
for themes_list in df['all_themes'].dropna():
    all_unique_themes.update(themes_list)

print(f"\n\nAll unique themes found in the dataset:")
for theme in sorted(all_unique_themes):
    print(f"- {theme}")

# Count frequency of each theme
theme_counts = {}
for themes_list in df['all_themes'].dropna():
    for theme in themes_list:
        theme_counts[theme] = theme_counts.get(theme, 0) + 1

print(f"\n\nTheme frequency:")
for theme, count in sorted(theme_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{theme}: {count}")


Sample of extracted themes:

All themes found:

Record 0:
All themes: ['Brand & Product Focus', 'Key Message/Slogan', 'Dealership/Advertiser', 'Exterior Design']
Brand & Product Focus: The advertisement promotes the Cupra Born, an all-electric vehicle, targeting consumers interested i...
Exterior Design: The ad highlights the vehicle's sleek, modern, and sporty design set against an urban backdrop, emph...

Record 1:
All themes: ['Brand & Product Focus', 'Key Message/Slogan', 'Dealership/Advertiser', 'Performance', 'Exterior Design', 'Overall Theme']
Brand & Product Focus: The advertisement promotes the CUPRA Born, an all-electric vehicle, targeting a contemporary, urban ...
Exterior Design: The visual theme emphasizes a sleek, modern aesthetic with a stylish, dynamic presentation of the ca...

Record 2:
All themes: ['Brand & Product Focus', 'Key Message/Slogan', 'Dealership/Advertiser']
Brand & Product Focus: The advertisement promotes the CUPRA Born, an all-electric vehicle, targetin

In [7]:
df

Unnamed: 0,ad_archive_id,page_id,page_name,advertiser_name,advertiser_id,ad_title,ad_text,ad_caption,cta_text,cta_type,...,all_themes,brand_product_focus,exterior_design,key_message_slogan,performance,range_charging,interior_comfort,safety_assistance,connectivity_digital,infotainment_audio
0,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,"[Brand & Product Focus, Key Message/Slogan, De...","The advertisement promotes the Cupra Born, an ...","The ad highlights the vehicle's sleek, modern,...","""Ετοιμοπαράδοτο Born"" (Ready-to-deliver Born) ...",,,,,,
1,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,"[Brand & Product Focus, Key Message/Slogan, De...","The advertisement promotes the CUPRA Born, an ...","The visual theme emphasizes a sleek, modern ae...","""Ετοιμοπαράδοτο Born"" (Ready-to-deliver Born) ...",The ad highlights that the CUPRA Born is desig...,,,,,
2,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,"[Brand & Product Focus, Key Message/Slogan, De...","The advertisement promotes the CUPRA Born, an ...",,"""Ετοιμοπαράδοτο Born"" (Ready-to-deliver Born) ...",,,,,,
3,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,"[Brand & Product Focus, Key Message/Slogan, De...","The advertisement promotes the Cupra Born, an ...","The ad highlights the vehicle's sleek, modern,...","""Ετοιμοπαράδοτο Born"" (Ready-to-deliver Born) ...",,,,,,
4,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,"[Brand & Product Focus, Key Message/Slogan, De...","The advertisement promotes the CUPRA Born, an ...","The visual theme emphasizes a sleek, modern ae...","""Ετοιμοπαράδοτο Born"" (Ready-to-deliver Born) ...",The ad highlights that the CUPRA Born is desig...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7142,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,"[Brand & Product Focus, Key Message/Slogan, De...",The advertisement promotes the Hyundai Ioniq 5...,"The ad features a vibrant, beach-themed backgr...","""¡Descuento Ardiente en el Hyundai IONIQ 5 SE ...",Not mentioned in the ad.,,,,,
7143,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,"[Brand & Product Focus, Key Message/Slogan, De...",The advertisement promotes the Hyundai Ioniq 5...,"The ad features a vibrant, beach-themed backgr...","""¡Descuento Ardiente en el Hyundai IONIQ 5 SE ...",Not mentioned in the ad.,,,,,
7144,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,"[Brand & Product Focus, Key Message/Slogan, De...",The advertisement promotes the Hyundai Ioniq 5...,The ad highlights the new 2025 Hyundai Ioniq 5...,"""¡Descuento Ardiente en el Hyundai IONIQ 5 SE ...",Not mentioned in the ad.,,,,,
7145,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,"[Brand & Product Focus, Key Message/Slogan, De...",The advertisement promotes the Hyundai Ioniq 5...,"The ad features a vibrant, beach-themed visual...","""¡Descuento Ardiente en el Hyundai IONIQ 5 SE ...",Not mentioned in the ad.,,,,,


In [6]:
df["matched_car_models"]

0            Cupra Born
1            Cupra Born
2            Cupra Born
3            Cupra Born
4            Cupra Born
             ...       
7142    Hyundai Ioniq 5
7143    Hyundai Ioniq 5
7144    Hyundai Ioniq 5
7145    Hyundai Ioniq 5
7146    Hyundai Ioniq 5
Name: matched_car_models, Length: 7147, dtype: object

In [39]:
proxies = {
    'http': 'http://finmtozcdx303317:d3MU8i4MaJc2GF7P_country-UnitedStates@isp2.hydraproxy.com:9989',
    'https': 'http://finmtozcdx303317:d3MU8i4MaJc2GF7P_country-UnitedStates@isp2.hydraproxy.com:9989'
}

In [50]:
import requests

cookies = {
    '__cf_bm': 'n1mbwAvn1f4chZ274hh4pJ_qaitYxBVsrPPRGkdrkFo-1751046191-1.0.1.1-0rIIYiebbURQhALS7v15UY0.aLqtmbbUKZ_toWajiICxTsgC0_ne5.mn2P6tG_QPjOI8.7tawSouibYrpCZ3UhqMYTFcED86sEtd_cJw_kcfkOgrPEkYiPBnTRFe.LDi',
    '_cfuvid': 'K9r3u83M7OmbDnBgmwjXzKiciKxggw8_vXMRhH5o6_s-1751046196687-0.0.1.1-604800000',
    'sessionId': '3a334362-76b5-43ed-a044-5d9befdffb5b',
    'cf_clearance': 'vijikZwTg3zAiFW9oDIn40CiqoehcwZdEGU2VNK_s8w-1751046208-1.2.1.1-l_NCFOa9ERGxt1Ohe2OKW0zd3eNSQZZGp33eX5JIg6TOLTd4Fhom6QGSRZh5I2ILgfCOsMO5uLTwB1n6StCA.6ncJQumeZhjQnl6vsBtQUcYobHU1i3sxnbw640CeXdUUNWJyxrmQ19JEK5_oyUVTX3RE7mFyeg5tgpbCYegBFL5lFOwwyXKHJu6snK2_vOrDIQwWw5zAnp_t_I0Ng2njdZgqVjjvGmPCYvsAIenmpjcHVRmnvIxuBDcCHgaR_g5eNQdJKL4tEtu5uCoEXQ7IMc2WyhO0Q6RaOwB2ieHgAdQPdPCBmNQVQrukcl3PTo51T5M5zvAK69iG6bGcUiXWKlWlGJq.gxkDnVmQr3_hl.YMKtVXAAppLBGIx1SWIDf',
    'JSESSIONID': '3676791F21FC39A972E107F88C4AAF24',
}

headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'max-age=0',
    'priority': 'u=0, i',
    'referer': 'https://www.imovelweb.com.br/',
    'sec-ch-ua': '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"',
    'sec-ch-ua-arch': '""',
    'sec-ch-ua-bitness': '"64"',
    'sec-ch-ua-full-version': '"137.0.7151.120"',
    'sec-ch-ua-full-version-list': '"Google Chrome";v="137.0.7151.120", "Chromium";v="137.0.7151.120", "Not/A)Brand";v="24.0.0.0"',
    'sec-ch-ua-mobile': '?1',
    'sec-ch-ua-model': '"Nexus 5"',
    'sec-ch-ua-platform': '"Android"',
    'sec-ch-ua-platform-version': '"6.0"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Mobile Safari/537.36',
    # 'cookie': '__cf_bm=n1mbwAvn1f4chZ274hh4pJ_qaitYxBVsrPPRGkdrkFo-1751046191-1.0.1.1-0rIIYiebbURQhALS7v15UY0.aLqtmbbUKZ_toWajiICxTsgC0_ne5.mn2P6tG_QPjOI8.7tawSouibYrpCZ3UhqMYTFcED86sEtd_cJw_kcfkOgrPEkYiPBnTRFe.LDi; _cfuvid=K9r3u83M7OmbDnBgmwjXzKiciKxggw8_vXMRhH5o6_s-1751046196687-0.0.1.1-604800000; sessionId=3a334362-76b5-43ed-a044-5d9befdffb5b; cf_clearance=vijikZwTg3zAiFW9oDIn40CiqoehcwZdEGU2VNK_s8w-1751046208-1.2.1.1-l_NCFOa9ERGxt1Ohe2OKW0zd3eNSQZZGp33eX5JIg6TOLTd4Fhom6QGSRZh5I2ILgfCOsMO5uLTwB1n6StCA.6ncJQumeZhjQnl6vsBtQUcYobHU1i3sxnbw640CeXdUUNWJyxrmQ19JEK5_oyUVTX3RE7mFyeg5tgpbCYegBFL5lFOwwyXKHJu6snK2_vOrDIQwWw5zAnp_t_I0Ng2njdZgqVjjvGmPCYvsAIenmpjcHVRmnvIxuBDcCHgaR_g5eNQdJKL4tEtu5uCoEXQ7IMc2WyhO0Q6RaOwB2ieHgAdQPdPCBmNQVQrukcl3PTo51T5M5zvAK69iG6bGcUiXWKlWlGJq.gxkDnVmQr3_hl.YMKtVXAAppLBGIx1SWIDf; JSESSIONID=3676791F21FC39A972E107F88C4AAF24',
}

response = requests.get('https://www.zapimoveis.com.br/imovel/venda-apartamento-2-quartos-mobiliado-praia-da-cal-torres-rs-177m2-id-2809463489/?source=showcase%2Crp', cookies=cookies, headers=headers)

In [51]:
response.text.find("Avenida")

-1

In [53]:
response.text

'<!DOCTYPE html>\n<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->\n<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->\n<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->\n<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->\n<head>\n<title>Attention Required! | Cloudflare</title>\n<meta charset="UTF-8" />\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n<meta http-equiv="X-UA-Compatible" content="IE=Edge" />\n<meta name="robots" content="noindex, nofollow" />\n<meta name="viewport" content="width=device-width,initial-scale=1" />\n<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/cf.errors.css" />\n<!--[if lt IE 9]><link rel="stylesheet" id=\'cf_styles-ie-css\' href="/cdn-cgi/styles/cf.errors.ie.css" /><![endif]-->\n<style>body{margin:0;padding:0}</style>\n\n\n<!--[if gte IE 10]><!-->\n<script>\n  if (!navigator.cookieEnabled) {\n    window.addEventListene

In [2]:
import pandas as pd

In [1]:
pd.read_csv("facebook_ads_electric_vehicles_with_openai_summaries.csv")

Unnamed: 0,ad_archive_id,page_id,page_name,advertiser_name,advertiser_id,ad_title,ad_text,ad_caption,cta_text,cta_type,...,is_finserv_regulated,is_limited_delivery,anti_scam_limited,matched_car_models,page_classification,classification_reason,classification_confidence,openai_summary,has_openai_summary,summary_date
0,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,,,,Cupra Born,unknown,No clear indicators,0.3,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
1,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,,,,Cupra Born,unknown,No clear indicators,0.3,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
2,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,,,,Cupra Born,unknown,No clear indicators,0.3,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
3,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,,,,Cupra Born,unknown,No clear indicators,0.3,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
4,329729626383284,103393094402445,SEAT Βελμάρ,,,Ετοιμοπαράδοτο Born,Το αμιγώς ηλεκτρικό CUPRA Born. Σχεδιασμένο με...,seat-velmar.gr,Get quote,GET_QUOTE,...,,,,Cupra Born,unknown,No clear indicators,0.3,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7142,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,,,,Hyundai Ioniq 5,dealer,Matches dealer pattern: \b[A-Z][a-z]+\s+(bmw|a...,0.9,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
7143,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,,,,Hyundai Ioniq 5,dealer,Matches dealer pattern: \b[A-Z][a-z]+\s+(bmw|a...,0.9,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
7144,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,,,,Hyundai Ioniq 5,dealer,Matches dealer pattern: \b[A-Z][a-z]+\s+(bmw|a...,0.9,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
7145,729019106294690,710391195480124,Universal Hyundai Orlando,,,2025 IONIQ 5 $199/mes,⚡ ¡Descuento Ardiente en el Hyundai IONIQ 5 SE...,universal-hyundai.com,Shop now,SHOP_NOW,...,,,,Hyundai Ioniq 5,dealer,Matches dealer pattern: \b[A-Z][a-z]+\s+(bmw|a...,0.9,**Brand & Product Focus:** \nThe advertisemen...,True,2025-06-28
