In [100]:
import pickle
import pandas as pd
import shap
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [101]:
df = pd.read_csv('./data/sample_feature_engineered.csv')
other_targets = ['impressions', 'clicks', 'ctr', 'conversions', 'conversion_rate']
df = df.drop(columns=other_targets)

X = df.drop(columns=['roi'])
y = df['roi']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [102]:
df.columns

Index(['duration_seconds', 'brightness_score', 'text_to_image_ratio',
       'logo_size_ratio', 'has_human_face', 'face_count', 'sentiment_score',
       'word_count', 'music_tempo', 'speech_pace', 'roi',
       'color_palette_primary_R', 'color_palette_primary_G',
       'color_palette_primary_B', 'platform_encoded', 'ad_type_encoded',
       'industry_encoded', 'campaign_objective_encoded',
       'target_audience_gender_encoded', 'aspect_ratio_encoded',
       'cta_type_encoded'],
      dtype='object')

In [103]:
indices = np.random.choice(df.index, size=(1, df.shape[1]), replace=True)
ad_sample = pd.DataFrame(data=df.to_numpy()[indices, np.arange(len(df.columns))], 
                       columns=df.columns)

ad_sample.to_dict()

{'duration_seconds': {0: 0.0},
 'brightness_score': {0: 68.0},
 'text_to_image_ratio': {0: 0.25},
 'logo_size_ratio': {0: 0.06},
 'has_human_face': {0: 1.0},
 'face_count': {0: 0.0},
 'sentiment_score': {0: 0.15},
 'word_count': {0: 18.0},
 'music_tempo': {0: 95.0},
 'speech_pace': {0: 0.0},
 'roi': {0: 4.8},
 'color_palette_primary_R': {0: 255.0},
 'color_palette_primary_G': {0: 245.0},
 'color_palette_primary_B': {0: 220.0},
 'platform_encoded': {0: 3.0},
 'ad_type_encoded': {0: 1.0},
 'industry_encoded': {0: 1.0},
 'campaign_objective_encoded': {0: 2.0},
 'target_audience_gender_encoded': {0: 0.0},
 'aspect_ratio_encoded': {0: 0.0},
 'cta_type_encoded': {0: 2.0}}

In [104]:
import joblib

with open ("./data/label_encoders.pkl", "rb") as f:
    label_encoders = joblib.load(f)

In [105]:
for column, encoder in label_encoders.items():
    mapping = {class_: idx for idx, class_ in enumerate(encoder.classes_)}
    print((column, mapping))

('platform', {'LinkedIn': 0, 'Meta': 1, 'TikTok': 2, 'YouTube': 3})
('ad_type', {'Carousel': 0, 'Image': 1, 'Story': 2, 'Video': 3})
('industry', {'Automotive': 0, 'B2B Technology': 1, 'Beauty': 2, 'Cosmetics': 3, 'E-commerce': 4, 'Education': 5, 'Electronics': 6, 'Entertainment': 7, 'Fashion': 8, 'Finance': 9, 'Fitness': 10, 'Food & Beverage': 11, 'Gaming': 12, 'Health': 13, 'Home Goods': 14, 'Jewelry': 15, 'Professional Services': 16, 'Retail': 17, 'SaaS': 18, 'Travel': 19})
('campaign_objective', {'App Install': 0, 'Awareness': 1, 'Consideration': 2, 'Conversion': 3, 'Lead Generation': 4})
('target_audience_gender', {'all': 0, 'female': 1})
('aspect_ratio', {'16:9': 0, '1:1': 1, '4:3': 2, '4:5': 3, '9:16': 4})
('cta_type', {'Book Now': 0, 'Book Test Drive': 1, 'Browse Collection': 2, 'Contact Us': 3, 'Discover More': 4, 'Download Guide': 5, 'Download Now': 6, 'Download Report': 7, 'Get Demo': 8, 'Get Guide': 9, 'Join Now': 10, 'Learn More': 11, 'Order Now': 12, 'Register Now': 13, '

In [106]:
xgb_model = pickle.load(open('./xgb_model.pkl', 'rb'))

In [107]:
ad_sample_scaled = scaler.transform(ad_sample.drop(columns=['roi']))

In [108]:
xgb_model.predict(ad_sample_scaled)

array([0.60836613], dtype=float32)

In [109]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
explainer = shap.Explainer(xgb_model, X_train_scaled_df)

shap_values = explainer(ad_sample_scaled)

In [110]:
shap_df = pd.DataFrame({
    "feature": X.columns,
    "shap_value": shap_values.values[0],
})

worst_features = shap_df.sort_values(by="shap_value").head(10)

In [111]:
shap_df.sort_values(by="shap_value", ascending=False).head(5)

Unnamed: 0,feature,shap_value
13,platform_encoded,0.229561
12,color_palette_primary_B,0.08153
0,duration_seconds,0.026154
10,color_palette_primary_R,0.010468
3,logo_size_ratio,0.002517


In [112]:
worst_features

Unnamed: 0,feature,shap_value
19,cta_type_encoded,-1.793892
2,text_to_image_ratio,-0.526074
16,campaign_objective_encoded,-0.253091
11,color_palette_primary_G,-0.115412
1,brightness_score,-0.093463
6,sentiment_score,-0.074971
5,face_count,-0.072895
7,word_count,-0.041273
15,industry_encoded,-0.039834
9,speech_pace,-0.037777


In [113]:
ease_map = {
    "duration_seconds": 1.5,
    "brightness_score": 1,
    "text_to_image_ratio": 1,
    "logo_size_ratio": 1,
    "has_human_face": 2,
    "face_count": 2,
    "sentiment_score": 1,
    "word_count": 1,
    "music_tempo": 1.5,
    "speech_pace": 1.5,
    "color_palette_primary_R": 1,
    "color_palette_primary_G": 1,
    "color_palette_primary_B": 1,
    "platform_encoded": 3,
    "industry_encoded": 3,
    "campaign_objective_encoded": 3,
    "target_audience_gender_encoded": 2,
    "aspect_ratio_encoded": 1,
    "cta_type_encoded": 1,
    "default": 1
}

In [114]:
base_roi = xgb_model.predict(ad_sample_scaled)[0]

best_changes = {}
suggestions = []

for id, row in worst_features.iterrows():
    feature = row['feature']
    shap_value = row['shap_value']

    # Caso seja uma feature categórica (label encoded)
    if feature.replace('_encoded', '') in label_encoders:
        encoder_label = feature.replace('_encoded', '')
        current_feature_value = ad_sample[feature].values[0].astype(int)
        current_label = label_encoders[encoder_label].inverse_transform([current_feature_value])[0]
        best_roi_for_feature = base_roi
        best_change = None
        
        for label in label_encoders[encoder_label].classes_:
            if label == current_feature_value: continue

            new_sample = ad_sample.copy()
            new_sample[feature] = label_encoders[encoder_label].transform([label])[0]

            new_roi = xgb_model.predict(scaler.transform(new_sample.drop(columns=['roi'])))[0]
            delta_roi = new_roi - base_roi
            
            if delta_roi > 0 and (best_change is None or delta_roi > best_change["delta_roi"]):
                best_change = {
                    "feature": feature,
                    "from": current_label,
                    "to": label,
                    "roi": new_roi,
                    "delta_roi": delta_roi,
                    "ease_score": ease_map.get(feature, ease_map["default"]),
                }

        if best_change:
            suggestions.append(best_change)
    
    else:
        current_feature_value = ad_sample[feature].values[0]
        best_roi_for_feature = base_roi
        best_change = None

        # Gera 15 perturbações entre -30% e +30%
        for pct in np.linspace(-0.3, 0.3, 15):
            new_value = current_feature_value * (1 + pct)

            if new_value < 0:
                continue

            new_sample = ad_sample.copy()
            new_sample[feature] = new_value
            new_roi = xgb_model.predict(scaler.transform(new_sample.drop(columns=['roi'])))[0]
            delta_roi = new_roi - base_roi

            if delta_roi > 0 and (best_change is None or delta_roi > best_change["delta_roi"]):
                best_change = {
                    "feature": feature,
                    "from": round(current_feature_value, 2),
                    "to": round(new_value, 2),
                    "roi": new_roi,
                    "delta_roi": delta_roi,
                    "ease_score": ease_map.get(feature, ease_map["default"]),
                    "pct_change": f"{pct*100:.1f}%"
                }

        if best_change:
            suggestions.append(best_change)

In [115]:
suggestions_sorted = sorted(
    suggestions,
    key=lambda x: (-x['delta_roi'], x['ease_score'])
)

In [116]:
suggestions_sorted

[{'feature': 'cta_type_encoded',
  'from': 'Browse Collection',
  'to': 'Book Now',
  'roi': 11.008478,
  'delta_roi': 10.400112,
  'ease_score': 1},
 {'feature': 'brightness_score',
  'from': 68.0,
  'to': 47.6,
  'roi': 4.108036,
  'delta_roi': 3.49967,
  'ease_score': 1,
  'pct_change': '-30.0%'},
 {'feature': 'text_to_image_ratio',
  'from': 0.25,
  'to': 0.18,
  'roi': 2.3377774,
  'delta_roi': 1.7294112,
  'ease_score': 1,
  'pct_change': '-30.0%'},
 {'feature': 'campaign_objective_encoded',
  'from': 'Consideration',
  'to': 'Conversion',
  'roi': 1.9235252,
  'delta_roi': 1.3151591,
  'ease_score': 3},
 {'feature': 'industry_encoded',
  'from': 'B2B Technology',
  'to': 'Fitness',
  'roi': 0.70146775,
  'delta_roi': 0.09310162,
  'ease_score': 3},
 {'feature': 'word_count',
  'from': 18.0,
  'to': 14.14,
  'roi': 0.6445293,
  'delta_roi': 0.03616315,
  'ease_score': 1,
  'pct_change': '-21.4%'}]