#### import

In [3]:
import pandas as pd
import numpy as np

#### read & load dataset

In [4]:
marketing = pd.read_csv(
    filepath_or_buffer='marketing.csv',
    parse_dates=['date_served', 'date_subscribed', 'date_canceled']
)

In [5]:
marketing.head()

Unnamed: 0,user_id,date_served,marketing_channel,variant,converted,language_displayed,language_preferred,age_group,date_subscribed,date_canceled,subscribing_channel,is_retained
0,a100000029,2018-01-01,House Ads,personalization,True,English,English,0-18 years,2018-01-01,NaT,House Ads,True
1,a100000030,2018-01-01,House Ads,personalization,True,English,English,19-24 years,2018-01-01,NaT,House Ads,True
2,a100000031,2018-01-01,House Ads,personalization,True,English,English,24-30 years,2018-01-01,NaT,House Ads,True
3,a100000032,2018-01-01,House Ads,personalization,True,English,English,30-36 years,2018-01-01,NaT,House Ads,True
4,a100000033,2018-01-01,House Ads,personalization,True,English,English,36-45 years,2018-01-01,NaT,House Ads,True


In [6]:
marketing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10037 entries, 0 to 10036
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   user_id              10037 non-null  object        
 1   date_served          10021 non-null  datetime64[ns]
 2   marketing_channel    10022 non-null  object        
 3   variant              10037 non-null  object        
 4   converted            10022 non-null  object        
 5   language_displayed   10037 non-null  object        
 6   language_preferred   10037 non-null  object        
 7   age_group            10037 non-null  object        
 8   date_subscribed      1856 non-null   datetime64[ns]
 9   date_canceled        577 non-null    datetime64[ns]
 10  subscribing_channel  1856 non-null   object        
 11  is_retained          1856 non-null   object        
dtypes: datetime64[ns](3), object(9)
memory usage: 941.1+ KB


In [12]:
def conversion_rate(dataframe, column_names):
    """
    Doc String
    """
    # conversion rate = total_converted / total_users
    
    total_converted = dataframe[dataframe['converted'] == True].groupby(column_names)['user_id'].nunique()
    total_users = dataframe.groupby(column_names)['user_id'].nunique()
    
    conversion_rate = total_converted / total_users
    conversion_rate = round(conversion_rate*100, 2)
    
    # Fill missing values with 0
    conversion_rate = conversion_rate.fillna(0)
    
    # Unstack and convert to pd.DataFrame
    conversion_rate = conversion_rate.unstack(1)
    
    return conversion_rate

In [15]:
conversion_rate(marketing, ['variant', 'age_group'])

age_group,0-18 years,19-24 years,24-30 years,30-36 years,36-45 years,45-55 years,55+ years
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
control,8.18,8.35,8.62,9.57,9.06,9.06,10.21
personalization,23.09,34.18,26.92,4.92,4.92,5.12,5.63


In [21]:
def retention_rate(dataframe, column_names):
    """
    Doc String
    """
    # retention rate = total_retained / total_subscribed
    
    total_retained = dataframe[dataframe['is_retained'] == True].groupby(column_names)['user_id'].nunique()
    total_converted = dataframe[dataframe['converted'] == True].groupby(column_names)['user_id'].nunique()
    
    retention_rate = (total_retained / total_converted)
    
    # Fill missing values with 0
    retention_rate = retention_rate.fillna(0)
    
    # Unstack and convert to pd.DataFrame
    retention_rate = retention_rate.unstack(1)
    
    return retention_rate

In [24]:
retention_rate(marketing, ['language_displayed', 'subscribing_channel'])

subscribing_channel,Email,Facebook,House Ads,Instagram,Push
language_displayed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Arabic,1.0,,0.571429,,
English,0.900901,0.687783,0.591603,0.681034,0.701299
German,0.853659,,0.5,,
Spanish,1.25,0.0,0.588235,0.0,


In [None]:
def plotting(dataframe):
    pass