<a href="https://colab.research.google.com/github/glaria/campaign-analysis/blob/main/marketing_groups.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from typing import List, Dict
import matplotlib.pyplot as plt
import seaborn as sns

class MarketingAnalyzer:
    def __init__(self, data: pd.DataFrame):
        """
        Initialize the MarketingAnalyzer with campaign data
        Parameters:
        data (pd.DataFrame): DataFrame containing campaign data with columns:
            - CUSTOMERNUMBER: unique customer identifier
            - TGCG: Target/Control group identifier
            - KPIs: performance metrics
            - Segmentation columns
        """
        self.data = data.copy()
        self.segment_columns = []
        self.kpi_columns = []

    def set_analysis_columns(self, segment_columns: List[str], kpi_columns: List[str]):
        """
        Set the columns to be used for segmentation and KPI analysis
        """
        self.segment_columns = segment_columns
        self.kpi_columns = kpi_columns

    def calculate_lift(self, segment_def: Dict[str, str] = None) -> pd.DataFrame:
        """
        Calculate lift for each KPI, optionally within a specific segment

        Parameters:
        segment_def (dict): Dictionary defining segment criteria (e.g., {'REGION': 'NORTH'})

        Returns:
        pd.DataFrame: Lift analysis results
        """
        # Filter data for segment if specified
        data = self.data
        if segment_def:
            for col, value in segment_def.items():
                data = data[data[col] == value]

        # Calculate metrics for target and control groups
        results = []
        for kpi in self.kpi_columns:
            target_mean = data[data['TGCG'] == 'TARGET'][kpi].mean()
            control_mean = data[data['TGCG'] == 'CONTROL'][kpi].mean()

            lift = ((target_mean - control_mean) / control_mean) * 100

            results.append({
                'KPI': kpi,
                'Target_Mean': target_mean,
                'Control_Mean': control_mean,
                'Lift_Percentage': lift
            })

        return pd.DataFrame(results)

    def identify_marketing_groups(self, kpi: str, threshold_percentile: float = 50) -> pd.DataFrame:
        """
        Classify customers into the 4 marketing groups based on their performance

        Parameters:
        kpi (str): The KPI to use for classification
        threshold_percentile (float): Percentile to use as threshold (default: 50)
        Returns:
        pd.DataFrame: Original data with marketing group classifications
        """
        # Calculate response in target and control groups
        target_data = self.data[self.data['TGCG'] == 'TARGET']
        control_data = self.data[self.data['TGCG'] == 'CONTROL']

        # Calculate thresholds
        target_threshold = np.percentile(target_data[kpi], threshold_percentile)
        control_threshold = np.percentile(control_data[kpi], threshold_percentile)

        def classify_customer(row):
            if row[kpi] >= target_threshold and row[kpi] >= control_threshold:
                return 'Sure Things'
            elif row[kpi] >= target_threshold and row[kpi] < control_threshold:
                return 'Persuadable'
            elif row[kpi] < target_threshold and row[kpi] >= control_threshold:
                return 'Sleeping Dogs'
            else:
                return 'Lost Causes'

        result = self.data.copy()
        result['Marketing_Group'] = result.apply(classify_customer, axis=1)
        return result

    def analyze_segments(self, kpi: str) -> pd.DataFrame:
        """
        Analyze the effectiveness of different segment combinations

        Parameters:
        kpi (str): The KPI to analyze

        Returns:
        pd.DataFrame: Segment analysis results
        """
        results = []

        # Generate all possible segment combinations
        for col in self.segment_columns:
            segments = self.data[col].unique()

            for segment_value in segments:
                segment_def = {col: segment_value}
                lift_analysis = self.calculate_lift(segment_def)

                # Get lift for specific KPI
                kpi_lift = lift_analysis[lift_analysis['KPI'] == kpi]['Lift_Percentage'].iloc[0]

                results.append({
                    'Segment_Column': col,
                    'Segment_Value': segment_value,
                    'Sample_Size': len(self.data[self.data[col] == segment_value]),
                    'Lift_Percentage': kpi_lift
                })

        return pd.DataFrame(results).sort_values('Lift_Percentage', ascending=False)

    def plot_segment_performance(self, kpi: str):
        """
        Create a visualization of segment performance

        Parameters:
        kpi (str): The KPI to visualize
        """
        segment_analysis = self.analyze_segments(kpi)

        plt.figure(figsize=(12, 6))
        sns.barplot(data=segment_analysis,
                   x='Segment_Value',
                   y='Lift_Percentage',
                   hue='Segment_Column')
        plt.title(f'Segment Performance - {kpi} Lift')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

In [4]:
from google.colab import files
uploaded = files.upload()

Saving kaggle_marketing_dataset.csv to kaggle_marketing_dataset.csv


In [7]:
# Read your data
data = pd.read_csv('kaggle_marketing_dataset.csv', sep=';')  # or pd.read_excel()


In [8]:
data.head()

Unnamed: 0,user id,converted,total ads,most ads day,most ads hour,TGCG
0,1069124,0,130,Monday,20,TARGET
1,1119715,0,93,Tuesday,22,TARGET
2,1144181,0,21,Tuesday,18,TARGET
3,1435133,0,355,Tuesday,10,TARGET
4,1015700,0,276,Friday,14,TARGET


In [9]:
# Initialize analyzer
analyzer = MarketingAnalyzer(data)

# Set up analysis columns
segment_columns = ['total ads', 'most ads day', 'most ads hour']  # your segment columns
kpi_columns = ['converted']  # your KPI columns
analyzer.set_analysis_columns(segment_columns, kpi_columns)

In [10]:
# Calculate overall lift
overall_lift = analyzer.calculate_lift()
print("Overall Campaign Performance:")
print(overall_lift)

Overall Campaign Performance:
         KPI  Target_Mean  Control_Mean  Lift_Percentage
0  converted     0.025547      0.017854        43.085064


In [12]:
# Classify customers into marketing groups
classified_data = analyzer.identify_marketing_groups(kpi='converted')
group_distribution = classified_data['Marketing_Group'].value_counts()
print("\nMarketing Group Distribution:")
print(group_distribution)


Marketing Group Distribution:
Marketing_Group
Sure Things    588101
Name: count, dtype: int64
