<a href="https://colab.research.google.com/github/glombardo/Research/blob/main/Visual_Intelligence_for_Ad_Creative_Strategy_Using_Millions_of_Creative_Data_Points.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Step 1: Upload File (press play button)**

In [12]:
# @title
from google.colab import files
import pandas as pd

uploaded = files.upload()

for fn in uploaded.keys():
  # Assuming only one file is uploaded
  df = pd.read_csv(fn)
  print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')

print(df.head())

              Advertiser   Brand Root     Brand (Major)  \
0  Bayer HealthCare LLC.    One A Day  One A Day Multi+   
1       Pharmavite, LLC.  Nature Made       Nature Made   
2       Pharmavite, LLC.  Nature Made       Nature Made   
3       Pharmavite, LLC.  Nature Made       Nature Made   
4       Pharmavite, LLC.  Nature Made       Nature Made   

                          Brand (Minor)                          Brand (Leaf)  \
0  One A Day Multi+ Hair, Skin, & Nails  One A Day Multi+ Hair, Skin, & Nails   
1                           Nature Made                           Nature Made   
2                           Nature Made                           Nature Made   
3                           Nature Made                           Nature Made   
4                           Nature Made                           Nature Made   

  Publisher     Date    Device Direct/Indirect        Type First Seen  \
0  Facebook  10/1/23  Facebook          Direct  Video Post     8/8/22   
1  Facebook 

**Step 2: Run this cel (press play button <-)**

In [11]:
# @title
# Install required libraries
!pip install -q pillow tqdm matplotlib ipywidgets scikit-learn opencv-python

# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import requests
from tqdm import tqdm
from PIL import Image
from io import BytesIO
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import ipywidgets as widgets
from IPython.display import display, clear_output

# Mount Google Drive if needed
# from google.colab import drive
# drive.mount('/content/drive')

# Load your Pathmatics data
# Replace with your actual file path
#csv_path = 'vitamins_ads_data.csv'  # <--- change to your filename
#df = pd.read_csv(csv_path)

# Filter NaNs and malformed links
df = df.dropna()
df = df[df['Link to Creative'].str.startswith('https')]

# Create widgets
adv_dropdown = widgets.SelectMultiple(options=df['Advertiser'].dropna().unique(), description='Advertiser', layout=widgets.Layout(width='50%'))
brand_dropdown = widgets.SelectMultiple(options=df['Brand Root'].dropna().unique(), description='Brand', layout=widgets.Layout(width='50%'))
major_dropdown = widgets.SelectMultiple(options=df['Brand (Major)'].dropna().unique(), description='Sub-Brand', layout=widgets.Layout(width='50%'))
pub_dropdown = widgets.SelectMultiple(options=df['Publisher'].dropna().unique(), description='Publisher', layout=widgets.Layout(width='50%'))

button = widgets.Button(description="Generate Visual Summary", button_style='success')
output = widgets.Output()

display(widgets.VBox([adv_dropdown, brand_dropdown, major_dropdown, pub_dropdown, button, output]))

# Utility: Download and resize image
def download_image(url, size=(224, 224)):
    try:
        response = requests.get(url, timeout=10)
        img = Image.open(BytesIO(response.content)).convert("RGB")
        img = img.resize(size)
        return np.array(img)
    except:
        return None

# On button click
def on_button_click(b):
    output.clear_output()
    with output:
        clear_output(wait=True)
        print("Processing...")

        # Apply filters
        filt_df = df.copy()
        if adv_dropdown.value:
            filt_df = filt_df[filt_df['Advertiser'].isin(adv_dropdown.value)]
        if brand_dropdown.value:
            filt_df = filt_df[filt_df['Brand Root'].isin(brand_dropdown.value)]
        if major_dropdown.value:
            filt_df = filt_df[filt_df['Brand (Major)'].isin(major_dropdown.value)]
        if pub_dropdown.value:
            filt_df = filt_df[filt_df['Publisher'].isin(pub_dropdown.value)]

        # Limit to avoid overload
        urls = filt_df['Link to Creative'].drop_duplicates().tolist()#[:100]

        if not urls:
            print("No matching creatives found.")
            return

        print(f"Found {len(urls)} creatives. Downloading...")

        images = []
        for url in tqdm(urls):
            img = download_image(url)
            if img is not None:
                images.append(img)

        if len(images) < 5:
            print("Not enough valid images to analyze.")
            return

        print("Extracting visual features...")

        # Load VGG16 model
        base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

        img_array = np.array([preprocess_input(img.astype(np.float32)) for img in images])
        features = model.predict(img_array, verbose=0)
        flat_features = features.reshape(features.shape[0], -1)

        print("Reducing dimensionality...")
        n_components = min(30, flat_features.shape[0], flat_features.shape[1])
        pca = PCA(n_components=n_components)
        X_pca = pca.fit_transform(flat_features)

        print("Clustering...")
        kmeans = KMeans(n_clusters=1, random_state=42)
        kmeans.fit(X_pca)
        centroid_index = np.argmin(np.linalg.norm(X_pca - kmeans.cluster_centers_[0], axis=1))

        representative_img = images[centroid_index]

        print("Visual Summary Generated:")
        plt.figure(figsize=(6, 6))
        plt.imshow(representative_img)
        plt.axis('off')
        plt.title("Most Representative Creative Based on Visual Features")
        plt.show()

button.on_click(on_button_click)


VBox(children=(SelectMultiple(description='Advertiser', layout=Layout(width='50%'), options=('Bayer HealthCare…