In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("facebookadcampaigndataset.csv")

# Drop missing values for required fields
df = df.dropna(subset=['approved_conversion', 'spent'])
df = df[df['spent'] > 0].copy()

# Feature engineering
df['revenue'] = df['approved_conversion'] * 100  # Assume $100 per approved conversion
df['roas'] = df['revenue'] / df['spent']
df['ctr'] = df['clicks'] / df['impressions'].replace(0, 1)
df['cpm'] = (df['spent'] / df['impressions'].replace(0, 1)) * 1000

# Binary target: successful if ROAS > 1
df['is_successful'] = (df['roas'] > 1).astype(int)

# One-hot encoding of categorical variables
df_encoded = pd.get_dummies(df, columns=['age', 'gender'], drop_first=True)

# Features and target
X = df_encoded.drop(columns=[
    'ad_id', 'reporting_start', 'reporting_end',
    'campaign_id', 'fb_campaign_id',
    'revenue', 'roas', 'is_successful'
])
y = df_encoded['is_successful']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train base Random Forest
clf = RandomForestClassifier(random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)

# Evaluate base model
y_pred = clf.predict(X_test)
base_acc = accuracy_score(y_test, y_pred)
print(f"Base RandomForest Accuracy: {base_acc:.4f}")
print("\nClassification Report (Base Model):")
print(classification_report(y_test, y_pred))

# Optional: Grid search to improve accuracy
param_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_leaf': [1, 2, 5]
}
grid = GridSearchCV(RandomForestClassifier(random_state=42, n_jobs=-1),
                    param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid.fit(X_train, y_train)

# Evaluate optimized model
best_model = grid.best_estimator_
y_pred_opt = best_model.predict(X_test)
opt_acc = accuracy_score(y_test, y_pred_opt)
print(f"\nOptimized RandomForest Accuracy: {opt_acc:.4f}")
print("Best Parameters Found:", grid.best_params_)
print("\nClassification Report (Optimized Model):")
print(classification_report(y_test, y_pred_opt))


Base RandomForest Accuracy: 0.9821

Classification Report (Base Model):
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        58
           1       0.96      1.00      0.98        54

    accuracy                           0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112


Optimized RandomForest Accuracy: 0.9821
Best Parameters Found: {'max_depth': None, 'min_samples_leaf': 1, 'n_estimators': 200}

Classification Report (Optimized Model):
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        58
           1       0.96      1.00      0.98        54

    accuracy                           0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112

