In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import beta
import plotly.express as px
from datetime import datetime

import warningsa
# Filter out all warnings
warnings.filterwarnings('ignore', category=Warning)

In [5]:
df = pd.read_csv('data/input/Conversion_Metrics_By_Type - Sheet2.csv')
#convert source_date to datetime
df['source_date'] = pd.to_datetime(df['source_date'])
#filter for only rows where rank <= 50
df = df[df['rank'] <= 50]
df.sample(5)

Unnamed: 0,source_date,month,brand,event_classification,customer_type,conv_count,ad_spend,rank
9808,2024-03-29,3,Oars + Alps,display,returning,1,43.0,48
6373,2024-03-09,3,Vegamour,display,new,2,100.0,10
6931,2024-03-12,3,Ursa Major,display,returning,3,29.94,27
6360,2024-03-09,3,Caden Lane,click,new,2,40.0,1
3462,2024-02-21,2,Snuggle Me Organic,display,returning,1,10.0,23


In [12]:
def calculate_customer_metrics(input_df):
    df = input_df.copy()
    # Define the date for comparison
    date_cutoff = pd.Timestamp('2024-02-20')
    
    # Prepare subsets for new customers
    new_customers = df[df['customer_type'] == 'new']
    
    # Aggregate for new customers by brand
    new_customer_aggregates = new_customers.groupby('brand').agg({
        'conv_count': 'sum',
        'ad_spend': 'sum'
    }).rename(columns={'conv_count': 'new_conv_count', 'ad_spend': 'new_ad_spend'})
    
    # Total aggregates by brand
    total_aggregates = df.groupby('brand').agg({
        'conv_count': 'sum',
        'ad_spend': 'sum'
    })
    
    # Merge new customer data with total data
    combined = new_customer_aggregates.join(total_aggregates)
    
    # Calculate percentage metrics
    combined['new_conv_pct'] = combined['new_conv_count'] / combined['conv_count']
    combined['new_ad_spend_pct'] = combined['new_ad_spend'] / combined['ad_spend']
    
    # Split by date and calculate for before and after
    before = df[df['source_date'] < date_cutoff]
    after = df[df['source_date'] >= date_cutoff]
    
    # Function to calculate percentages in a given dataset
    def calculate_percentages(data):
        new_customers = data[data['customer_type'] == 'new']
        new_customer_aggregates = new_customers.groupby('brand').agg({
            'conv_count': 'sum',
            'ad_spend': 'sum'
        }).rename(columns={'conv_count': 'new_conv_count', 'ad_spend': 'new_ad_spend'})
        total_aggregates = data.groupby('brand').agg({
            'conv_count': 'sum',
            'ad_spend': 'sum'
        })
        result = new_customer_aggregates.join(total_aggregates)
        result['new_conv_pct'] = result['new_conv_count'] / result['conv_count']
        result['new_ad_spend_pct'] = result['new_ad_spend'] / result['ad_spend']
        return result[['new_conv_pct', 'new_ad_spend_pct']]
    
    # Apply function to before and after dataframes
    before_pct = calculate_percentages(before)
    after_pct = calculate_percentages(after)
    
    # Merge back to combined
    combined = combined.join(before_pct, rsuffix='_before').join(after_pct, rsuffix='_after')
    
    # Calculate deltas
    combined['delta_conv_pct'] = combined['new_conv_pct_after'] - combined['new_conv_pct_before']
    combined['delta_ad_spend_pct'] = combined['new_ad_spend_pct_after'] - combined['new_ad_spend_pct_before']
    
    # Prepare final dataframe sorted by rank
    final_df = combined.reset_index()[['brand', 'new_conv_pct_before', 'new_conv_pct_after', 'delta_conv_pct',
                                       'new_ad_spend_pct_before', 'new_ad_spend_pct_after', 'delta_ad_spend_pct']]
    
    #Create a new rank_df that is the unique brands and their rank
    rank_df = df.drop_duplicates(subset='brand')[['brand', 'rank']]
    
    #Join rank to the final_df by brand
    final_df = final_df.merge(rank_df, on='brand')
    
    return final_df.sort_values('rank')

# Assuming df is your DataFrame
calculate_customer_metrics(df)

Unnamed: 0,brand,new_conv_pct_before,new_conv_pct_after,delta_conv_pct,new_ad_spend_pct_before,new_ad_spend_pct_after,delta_ad_spend_pct,rank
6,Caden Lane,0.411145,0.419664,0.00852,0.517168,0.556287,0.03912,1
8,Caraway,0.416667,0.372642,-0.044025,0.532319,0.488085,-0.044235,2
39,True Classic,0.8,0.804348,0.004348,0.981723,0.948718,-0.033005,3
21,ILIA,,0.228739,,,0.228739,,4
5,Bodily,0.412556,0.360215,-0.052341,0.650111,0.590903,-0.059208,5
35,Sol de Janeiro,0.419729,0.409046,-0.010683,0.601575,0.580645,-0.02093,6
32,Nanit,0.263736,0.353414,0.089677,0.396694,0.492997,0.096303,7
12,Dagne Dover,0.471154,0.40708,-0.064074,0.544444,0.489362,-0.055083,8
26,Liquid I.V.,0.56044,0.683849,0.123409,0.953271,0.961353,0.008082,9
45,Vegamour,0.425197,0.489177,0.063981,0.783745,0.904,0.120255,10
