In [1]:
import pandas as pd
import numpy as np

df = pd.read_excel('../data/meta_ads_cleaned.xlsx', engine='openpyxl')

df.head()

engagement_df = df[df['Result indicator'] == 'actions:post_engagement'].copy()
engagement_df.reset_index(drop=True, inplace=True)

In [2]:
categorical_cols = ['Platforme', 'Placement', 'Device', 'Ad set budget type']

engagement_df_encoded = pd.get_dummies(engagement_df, columns=categorical_cols, drop_first=True)


In [3]:
engagement_df_encoded['Starts'] = pd.to_datetime(engagement_df_encoded['Starts'])

engagement_df_encoded['day'] = engagement_df_encoded['Starts'].dt.day
engagement_df_encoded['month'] = engagement_df_encoded['Starts'].dt.month
engagement_df_encoded['weekday'] = engagement_df_encoded['Starts'].dt.weekday


In [4]:
import numpy as np

engagement_df_encoded['log_spend'] = np.log1p(engagement_df_encoded['Amount spent'])
engagement_df_encoded['log_reach'] = np.log1p(engagement_df_encoded['Reach'])
engagement_df_encoded['log_impressions'] = np.log1p(engagement_df_encoded['Impressions'])
engagement_df_encoded['log_post_eng'] = np.log1p(engagement_df_encoded['Post engagements'])
engagement_df_encoded['log_comments'] = np.log1p(engagement_df_encoded['Post comments'])
engagement_df_encoded['log_reactions'] = np.log1p(engagement_df_encoded['Post reactions'])
engagement_df_encoded['log_saves'] = np.log1p(engagement_df_encoded['Post saves'])
engagement_df_encoded['log_shares'] = np.log1p(engagement_df_encoded['Post shares'])
engagement_df_encoded['log_cost_per_eng'] = np.log1p(engagement_df_encoded['Cost per post engagement (USD)'])


In [5]:
# Target variable
target_col = 'log_post_eng'

# Choose predictors based on correlation analysis
base_features = [
    'log_spend', 'log_reactions', 'log_saves', 'log_shares', 'log_comments'
]

# Time features
time_features = ['day', 'month', 'weekday']

# One-hot encoded categorical columns
encoded_features = [col for col in engagement_df_encoded.columns if any(p in col for p in ['Platforme_', 'Placement_', 'Device_', 'Ad set budget type_'])]

# Final feature list
selected_features = base_features + time_features + encoded_features

# Create modeling DataFrame
engagement_model_df = engagement_df_encoded[[target_col] + selected_features].dropna()

# Split X and y
X_engagement = engagement_model_df.drop(target_col, axis=1)
y_engagement = engagement_model_df[target_col]


## Feature Engineering – Engagement Campaigns

We prepared the dataset for modeling `log_post_eng` using:
- One-hot encoding of platform, placement, device, and budget type
- Log-transformation of key engagement metrics
- Extraction of time features: day, month, weekday

### Selected Features:
- `log_spend`, `log_reactions`, `log_saves`, `log_shares`, `log_comments`
- Categorical encodings
- Time-based indicators

✅ Dataset is now ready for model training.
