# Causal Inference: Do Creative Features Affect CTR?

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from econml.dml import LinearDML

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
# Load dataset
df = pd.read_csv('/Users/camilojaureguiberry/Documents/Projects/Developments/NarrativeLens/src/analytics/tiktok_ads_us_toplikes.csv')

# Define outcome, treatment, and control features
outcome = 'ctr'
creative_features = [
    'creative_features_creative_theme',
    'creative_features_creative_concept',
    'creative_features_format_production_style',
    'creative_features_talent_type',
    'creative_features_demographic_representation',
    'creative_features_audience_focus',
    'creative_features_campaign_objective'
]
control_features = ['cost', 'industry_parent.value', 'objective_value']
df_model = df[[outcome] + creative_features + control_features].copy()
df_model = df_model[df_model[outcome].notna()]

In [3]:
# Preprocessing
categorical_features = creative_features + ['industry_parent.value', 'objective_value']
numerical_features = ['cost']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])
preprocessor = ColumnTransformer(transformers=[
    ('cat', categorical_transformer, categorical_features),
    ('num', numerical_transformer, numerical_features)
])

# Transform data
X = preprocessor.fit_transform(df_model.drop(columns=[outcome]))
y = df_model[outcome].values

# Extract treatment and control matrices
encoded_feature_names = preprocessor.named_transformers_['cat']['onehot'].get_feature_names_out(categorical_features)
num_treatments = sum([len(preprocessor.named_transformers_['cat']['onehot'].categories_[i]) for i in range(len(creative_features))])
T = X[:, :num_treatments]
W = X[:, num_treatments:]

In [4]:
# Fit Linear DML
model_y = RandomForestRegressor(n_estimators=100, min_samples_leaf=10)
model_t = RandomForestClassifier(n_estimators=100, min_samples_leaf=10)
dml = LinearDML(model_y=model_y, model_t=model_t, discrete_treatment=True, cv=3, random_state=0)
dml.fit(Y=y, T=T, X=W)

IndexError: tuple index out of range

In [None]:
# Summarize effects
import matplotlib.pyplot as plt
effects = pd.DataFrame({
    'treatment': encoded_feature_names[:num_treatments],
    'coef': dml.coef_,
    'stderr': dml.stderr_,
    'ci_lower': dml.coef_ - 1.96 * dml.stderr_,
    'ci_upper': dml.coef_ + 1.96 * dml.stderr_
})
effects.sort_values(by='coef', ascending=False).head(10)