# Marketing Campaign Effectiveness Analysis  
## Notebook 02: Feature Engineering with NumPy

**Purpose:**  
Create derived variables such as age groups and income bands to support segmentation-based marketing analysis.

In [1]:
import pandas as pd
import numpy as np


In [None]:
import os
os.listdir("dataset")
df = pd.read_csv("dataset/Dataset_Ads.csv")
df.head()

Unnamed: 0,Age,Gender,Income,Location,Ad Type,Ad Topic,Ad Placement,Clicks,Click Time,Conversion Rate,CTR
0,61,Male,35717.43,Urban,Banner,Travel,Social Media,3,2024-01-18 20:45:56.898459,0.0981,0.0737
1,41,Male,47453.25,Rural,Video,Travel,Search Engine,5,2023-04-24 20:45:56.898459,0.0937,0.0592
2,49,Female,68126.35,Rural,Text,Food,Social Media,4,2024-02-24 20:45:56.898459,0.1912,0.0563
3,68,Female,64585.73,Suburban,Text,Health,Website,6,2023-12-13 20:45:56.898459,0.1122,0.0232
4,63,Male,21109.4,Urban,Native,Fashion,Search Engine,5,2023-07-02 20:45:56.898459,0.1426,0.0539


In [15]:
age_bins = [0, 24, 34, 44, 54, 64, np.inf]
age_labels = ["18–24", "25–34", "35–44", "45–54", "55–64", "65+"]

df["Age_Group"] = pd.cut(
    df["Age"],
    bins=age_bins,
    labels=age_labels,
    right=True
)
df["Age_Group"].value_counts()


Age_Group
25–34    2539
35–44    2461
18–24    2460
45–54    1560
55–64     660
65+       212
Name: count, dtype: int64

In [17]:
income_bins = [0, 30000, 60000, 90000, np.inf]
income_labels = ["Low", "Middle", "Upper-Middle", "High"]

df["Income_Band"] = pd.cut(
    df["Income"],
    bins=income_bins,
    labels=income_labels
)
df["Income_Band"].value_counts()

Income_Band
Middle          5374
Upper-Middle    2845
Low             1492
High             219
Name: count, dtype: int64

In [19]:
# Are we getting clicks without conversions?
df["High_CTR_Low_Conversion"] = np.where(
    (df["CTR"] > df["CTR"].median()) &
    (df["Conversion Rate"] < df["Conversion Rate"].median()),
    1,
    0
)
df["High_CTR_Low_Conversion"].value_counts()


High_CTR_Low_Conversion
0    7485
1    2515
Name: count, dtype: int64

In [None]:
df["High_CTR_Low_Conversion"].value_counts()

High_CTR_Low_Conversion
0    7485
1    2515
Name: count, dtype: int64

In [21]:
df["High_CTR_High_Conversion"] = np.where(
    (df["CTR"] > df["CTR"].median()) &
    (df["Conversion Rate"] > df["Conversion Rate"].median()),
    1,
    0
)


In [24]:
df[
    ["Age", "Age_Group", "Income", "Income_Band",
     "CTR", "Conversion Rate",
     "High_CTR_Low_Conversion", "High_CTR_High_Conversion"]
].head(10)
df.to_csv("dataset/feature_engineered_ads_data.csv", index=False)


### Notebook Summary

- Created age-based customer segments
- Categorized users into income bands
- Engineered engagement quality flags
- Prepared dataset for exploratory and performance analysis
