In [1]:
import sys
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Add utility path
sys.path.append(str((Path().resolve().parent.parent / "src" / "utils").resolve()))
from load_data import load_csv, save_csv


# Load Raw Data
df = load_csv("campaigns.csv")
df.head()

ModuleNotFoundError: No module named 'load_data'

In [None]:


# -------------------------------
# 🔍 Initial Exploration
# -------------------------------
df.info()
df.isna().sum()
df.describe(include='all')

# -------------------------------
# 🧹 Data Cleaning
# -------------------------------
df['date_sent'] = pd.to_datetime(df['date_sent'], errors='coerce')
df['campaign_type'] = df['campaign_type'].str.strip().str.title()
df['clicked'] = df['clicked'].astype(bool)
df['converted'] = df['converted'].astype(bool)

# Drop rows missing essential fields
df = df.dropna(subset=['customer_id', 'date_sent'])

# -------------------------------
# 🧠 Feature Engineering
# -------------------------------
# Weekday & month
df['day_sent'] = df['date_sent'].dt.day_name()
df['month_sent'] = df['date_sent'].dt.to_period("M").astype(str)

# Campaign success logic
df['campaign_result'] = df.apply(
    lambda row: 'Converted' if row['converted']
    else ('Clicked, No Conversion' if row['clicked']
          else 'No Action'), axis=1
)

# Join with customer metadata
customers_df = load_csv("customers_clean.csv")
df = df.merge(customers_df[['customer_id', 'region', 'gender']], on='customer_id', how='left')

# -------------------------------
# 📊 Exploratory Data Analysis
# -------------------------------

# Overall campaign outcomes
sns.countplot(y='campaign_result', data=df, order=df['campaign_result'].value_counts().index)
plt.title("Campaign Outcome Breakdown")
plt.show()

# By campaign type
sns.countplot(y='campaign_type', hue='campaign_result', data=df)
plt.title("Campaign Performance by Type")
plt.legend(loc='upper right')
plt.show()

# Click-through and conversion by region
region_summary = df.groupby('region')[['clicked', 'converted']].mean().sort_values(by='converted', ascending=False)
region_summary.plot(kind='bar', figsize=(10, 6), title="Campaign Metrics by Region")
plt.ylabel("Rate")
plt.xticks(rotation=45)
plt.show()

# Time trends
df['date'] = df['date_sent'].dt.date
daily_conv = df.groupby('date')['converted'].mean()
daily_conv.plot(title="Daily Conversion Rate Trend", figsize=(10, 4))
plt.ylabel("Conversion Rate")
plt.show()

# -------------------------------
# 💾 Save Cleaned File
# -------------------------------
save_csv(df, "campaigns_clean.csv")