In [6]:
import pandas as pd

# Getting rid of the annoying SettingWithCopyWarning.
# This sets the pandas option for copy-on-write behavior.
pd.options.mode.copy_on_write = True


You are a Product Analyst on the **Amazon** Sponsored Advertising team investigating sponsored product ad engagement across electronics categories. Your team wants to understand CTR variations to optimize targeted advertising strategies.



In [7]:
# Load the CSV files into DataFrames
dim_product = pd.read_csv('dim_product.csv')
fct_ad_performance = pd.read_csv('fct_ad_performance.csv')

# Display the DataFrames
print("Dimension Product DataFrame:")
print(dim_product.head())

print("\nFact Ad Performance DataFrame:")
print(fct_ad_performance.head())


Dimension Product DataFrame:
   product_id       product_name        product_category
0           1           Smart TV        Home Electronics
1           2   Wireless Earbuds   Electronics & Gadgets
2           3       Refrigerator  Electronics Appliances
3           4  Bestselling Novel                   Books
4           5     Designer Jeans                 Fashion

Fact Ad Performance DataFrame:
   ad_id  clicks  product_id  impressions recorded_date
0    101      10           1          200    2024-10-02
1    102      15           1          300    2024-10-12
2    103      20           2          250    2024-10-05
3    104      18           2          230    2024-10-20
4    105       5           3          150    2024-10-15


### Question 1 of 3

What is the average click-through rate (CTR) for sponsored product ads for each product category that contains the substring 'Electronics' in its name during October 2024? This analysis will help determine which electronics-related categories are performing optimally.

In [8]:
# Filter categories containing 'Electronics'
electronics_products = dim_product[dim_product['product_category'].str.contains('Electronics', case=False)]

# Merge with ad performance
merged = pd.merge(
    fct_ad_performance,
    electronics_products[['product_id', 'product_category']],
    on='product_id',
    how='inner'
)

# Filter for October 2024
october_ads = merged[
    (merged['recorded_date'] >= '2024-10-01') &
    (merged['recorded_date'] <= '2024-10-31')
]

# Calculate CTR for each row
october_ads['ctr'] = october_ads['clicks'] / october_ads['impressions']

# Group by category and calculate average CTR
result = october_ads.groupby('product_category')['ctr'].mean().reset_index()

print("Average CTR for Electronics Categories in October 2024:")
print(result)


Average CTR for Electronics Categories in October 2024:
          product_category       ctr
0    Electronics & Gadgets  0.079130
1  Electronics Accessories  0.100000
2   Electronics Appliances  0.050000
3      Electronics Gadgets  0.072500
4         Home Electronics  0.066667


### Question 2 of 3

Which product categories have a CTR greater than the aggregated overall average CTR for sponsored product ads during October 2024? This analysis will identify high-performing categories for further optimization.

In [9]:
# Filter for October 2024
october_ads = fct_ad_performance[
    (fct_ad_performance['recorded_date'] >= '2024-10-01') &
    (fct_ad_performance['recorded_date'] <= '2024-10-31')
]

# Merge with product categories
merged = pd.merge(
    october_ads,
    dim_product[['product_id', 'product_category']],
    on='product_id',
    how='inner'
)

# Calculate CTR for each row
merged['ctr'] = merged['clicks'] / merged['impressions']

# Overall average CTR
overall_avg_ctr = merged['ctr'].mean()

print(f"Overall Average CTR for October 2024: {overall_avg_ctr:.4f}")

# Average CTR per category
category_avg_ctr = merged.groupby('product_category')['ctr'].mean().reset_index()

# Filter categories with CTR greater than overall average
high_performing = category_avg_ctr[category_avg_ctr['ctr'] > overall_avg_ctr]

print("\nHigh Performing Categories with CTR greater than Overall Average:")
print(high_performing)


Overall Average CTR for October 2024: 0.0680

High Performing Categories with CTR greater than Overall Average:
          product_category      ctr
1                    Books  0.10000
2    Electronics & Gadgets  0.07913
3  Electronics Accessories  0.10000
5      Electronics Gadgets  0.07250
8                  Kitchen  0.07000


### Question 3 of 3

For the product categories identified in the previous question, what is the percentage difference between their CTR and the overall average CTR for October 2024? This analysis will quantify the performance gap to recommend specific categories for targeted advertising optimization.

In [10]:
# Filter for October 2024
october_ads = fct_ad_performance[
    (fct_ad_performance['recorded_date'] >= '2024-10-01') &
    (fct_ad_performance['recorded_date'] <= '2024-10-31')
]

# Merge with product categories
merged = pd.merge(
    october_ads,
    dim_product[['product_id', 'product_category']],
    on='product_id',
    how='inner'
)

# Calculate CTR for each row
merged['ctr'] = merged['clicks'] / merged['impressions']

# Overall average CTR
overall_avg_ctr = merged['ctr'].mean()

# Average CTR per category
category_avg_ctr = merged.groupby('product_category')['ctr'].mean().reset_index()

# Filter categories with CTR greater than overall average
high_performing = category_avg_ctr[category_avg_ctr['ctr'] > overall_avg_ctr]
# Calculate percentage difference
high_performing['pct_diff_vs_overall'] = (
    (high_performing['ctr'] - overall_avg_ctr) / overall_avg_ctr * 100
)

print("High Performing Categories with CTR greater than Overall Average:")
print(high_performing[['product_category', 'ctr', 'pct_diff_vs_overall']])


High Performing Categories with CTR greater than Overall Average:
          product_category      ctr  pct_diff_vs_overall
1                    Books  0.10000            46.961679
2    Electronics & Gadgets  0.07913            16.291416
3  Electronics Accessories  0.10000            46.961679
5      Electronics Gadgets  0.07250             6.547217
8                  Kitchen  0.07000             2.873175
