In [277]:
import pickle
import pandas as pd
import shap
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [278]:
df = pd.read_csv('./data/sample_feature_engineered.csv')
other_targets = ['impressions', 'clicks', 'ctr', 'conversions', 'conversion_rate']
df = df.drop(columns=other_targets)

X = df.drop(columns=['roi'])
y = df['roi']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [279]:
df.columns

Index(['duration_seconds', 'brightness_score', 'text_to_image_ratio',
       'logo_size_ratio', 'has_human_face', 'face_count', 'sentiment_score',
       'word_count', 'music_tempo', 'speech_pace', 'roi',
       'color_palette_primary_R', 'color_palette_primary_G',
       'color_palette_primary_B', 'platform_encoded', 'ad_type_encoded',
       'industry_encoded', 'campaign_objective_encoded',
       'target_audience_gender_encoded', 'aspect_ratio_encoded',
       'cta_type_encoded'],
      dtype='object')

In [280]:
indices = np.random.choice(df.index, size=(1, df.shape[1]), replace=True)
ad_sample = pd.DataFrame(data=df.to_numpy()[indices, np.arange(len(df.columns))], 
                       columns=df.columns)

ad_sample.to_dict()

{'duration_seconds': {0: 0.0},
 'brightness_score': {0: 48.0},
 'text_to_image_ratio': {0: 0.28},
 'logo_size_ratio': {0: 0.1},
 'has_human_face': {0: 0.0},
 'face_count': {0: 2.0},
 'sentiment_score': {0: 0.68},
 'word_count': {0: 45.0},
 'music_tempo': {0: 82.0},
 'speech_pace': {0: 135.0},
 'roi': {0: 1.9},
 'color_palette_primary_R': {0: 240.0},
 'color_palette_primary_G': {0: 200.0},
 'color_palette_primary_B': {0: 255.0},
 'platform_encoded': {0: 2.0},
 'ad_type_encoded': {0: 1.0},
 'industry_encoded': {0: 3.0},
 'campaign_objective_encoded': {0: 2.0},
 'target_audience_gender_encoded': {0: 0.0},
 'aspect_ratio_encoded': {0: 1.0},
 'cta_type_encoded': {0: 11.0}}

In [281]:
import joblib

with open ("./data/label_encoders.pkl", "rb") as f:
    label_encoders = joblib.load(f)

In [282]:
for column, encoder in label_encoders.items():
    mapping = {class_: idx for idx, class_ in enumerate(encoder.classes_)}
    print((column, mapping))

('platform', {'LinkedIn': 0, 'Meta': 1, 'TikTok': 2, 'YouTube': 3})
('ad_type', {'Carousel': 0, 'Image': 1, 'Story': 2, 'Video': 3})
('industry', {'Automotive': 0, 'B2B Technology': 1, 'Beauty': 2, 'Cosmetics': 3, 'E-commerce': 4, 'Education': 5, 'Electronics': 6, 'Entertainment': 7, 'Fashion': 8, 'Finance': 9, 'Fitness': 10, 'Food & Beverage': 11, 'Gaming': 12, 'Health': 13, 'Home Goods': 14, 'Jewelry': 15, 'Professional Services': 16, 'Retail': 17, 'SaaS': 18, 'Travel': 19})
('campaign_objective', {'App Install': 0, 'Awareness': 1, 'Consideration': 2, 'Conversion': 3, 'Lead Generation': 4})
('target_audience_gender', {'all': 0, 'female': 1})
('aspect_ratio', {'16:9': 0, '1:1': 1, '4:3': 2, '4:5': 3, '9:16': 4})
('cta_type', {'Book Now': 0, 'Book Test Drive': 1, 'Browse Collection': 2, 'Contact Us': 3, 'Discover More': 4, 'Download Guide': 5, 'Download Now': 6, 'Download Report': 7, 'Get Demo': 8, 'Get Guide': 9, 'Join Now': 10, 'Learn More': 11, 'Order Now': 12, 'Register Now': 13, '

In [283]:
xgb_model = pickle.load(open('./xgb_model.pkl', 'rb'))

In [284]:
ad_sample_scaled = scaler.transform(ad_sample.drop(columns=['roi']))

In [285]:
xgb_model.predict(ad_sample_scaled)

array([5.7446885], dtype=float32)

In [286]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
explainer = shap.Explainer(xgb_model, X_train_scaled_df)

shap_values = explainer(ad_sample_scaled)

In [287]:
shap_df = pd.DataFrame({
    "feature": X.columns,
    "shap_value": shap_values.values[0],
})

worst_features = shap_df.sort_values(by="shap_value").head(5)

In [288]:
worst_features

Unnamed: 0,feature,shap_value
2,text_to_image_ratio,-0.330985
19,cta_type_encoded,-0.243058
16,campaign_objective_encoded,-0.126573
6,sentiment_score,-0.068657
11,color_palette_primary_G,-0.045447


In [None]:
base_roi = xgb_model.predict(ad_sample_scaled)[0]

best_changes = {}

for id, row in worst_features.iterrows():
    feature = row['feature']
    best_roi_for_feature = base_roi
    best_change = None

    # Caso seja uma feature categórica (label encoded)
    if feature.replace('_encoded', '') in label_encoders:
        encoder_label = feature.replace('_encoded', '')
        current_value = ad_sample[feature].values[0].astype(int)
        current_label = label_encoders[encoder_label].inverse_transform([current_value])[0]
        
        for label in label_encoders[encoder_label].classes_:
            if label == current_value: pass

            new_sample = ad_sample.copy()
            new_sample[feature] = label_encoders[encoder_label].transform([label])[0]

            new_roi = xgb_model.predict(scaler.transform(new_sample.drop(columns=['roi'])))[0]
            # print(new_roi)

            # if new_roi > base_roi:
            #     print(f"Changing {feature} from {current_label} to {label} increases ROI from {base_roi} to {new_roi}")
            
            if new_roi > best_roi_for_feature:
                best_roi_for_feature = new_roi
                best_change = {
                    "from": current_label,
                    "to": label,
                    "roi": new_roi
                }

        if best_change:
            best_changes[feature] = best_change
    
    else:
        current_value = ad_sample[feature].values[0]
        best_roi_for_feature = base_roi
        best_change = None

        # Gera 15 perturbações entre -30% e +30%
        for pct in np.linspace(-0.3, 0.3, 15):
            new_value = current_value * (1 + pct)

            if new_value < 0:
                continue

            new_sample = ad_sample.copy()
            new_sample[feature] = new_value
            new_roi = xgb_model.predict(scaler.transform(new_sample.drop(columns=['roi'])))[0]

            if new_roi > best_roi_for_feature:
                best_roi_for_feature = new_roi
                best_change = {
                    "from": round(current_value, 2),
                    "to": round(new_value, 2),
                    "roi": new_roi,
                    "pct_change": f"{pct * 100:.1f}%"
                }

        if best_change:
            best_changes[feature] = best_change

In [290]:
best_changes

{'text_to_image_ratio': {'from': 0.28,
  'to': 0.21,
  'roi': 6.158241,
  'pct_change': '-25.7%'},
 'cta_type_encoded': {'from': 'Learn More',
  'to': 'Book Now',
  'roi': 10.806419},
 'campaign_objective_encoded': {'from': 'Consideration',
  'to': 'Conversion',
  'roi': 6.3125815}}