## Get Packages

In [2]:
import pandas as pd
import numpy as np
import sys
import itertools
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [3]:
from importlib import reload
import mab
reload(mab)

print(mab.mab_test())

Good - Abraham


## Create Synthetic Segment

In [5]:
row_count = 100000
seg_cols = ['gender', 'age',
            'income', 'buyer', 
            'region', 'area',
            'parent']

segments = ["Male", "Female",
            "Young", "Middle Age", "Older",
            "Low Income", "Medium Income", "High Income",
            "Prior Buyer", "First-Time Buyer",
            "North", "West", "South", "East",
            "Urban", "Suburban",
            "Non-Parent", "Parent"]

combo_weights = mab.create_all_combo_weights()
Segment_df = mab.create_synthetic_sample(row_count=row_count)

combo_weights.groupby('variant_assignment').aggregate({'combos_weights': ['mean', 'count', 'std']})

Unnamed: 0_level_0,combos_weights,combos_weights,combos_weights
Unnamed: 0_level_1,mean,count,std
variant_assignment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Control,0.028581,576,0.022639
Variant A,0.149877,576,0.053187
Variant B,0.154397,576,0.054886
Variant C,0.150554,576,0.054881


## Create Outcome Metric

In [10]:
############
## Step 1 ##
############

## Full random assignment for Variant
Segment_df = mab.add_conversion_rates(df=Segment_df, seg_cols=seg_cols, 
                                      segments=segments,all_combos_weights=combo_weights,
                                      print_diagnostics=True)

Unnamed: 0_level_0,converted,converted,converted
Unnamed: 0_level_1,mean,count,sum
was_modified,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
False,0.028008,4106,115
True,0.217396,95894,20847


In [12]:
#############
## Step #2 ##
#############

# Take the Target group and build an optimization score to determine how ads should be allocated
Segment_df.loc[Segment_df['variant_assignment'] == 'Variant A', 'Variant_a_performance'] = Segment_df['converted']
Segment_df.loc[Segment_df['variant_assignment'] == 'Variant B', 'Variant_b_performance'] = Segment_df['converted']
Segment_df.loc[Segment_df['variant_assignment'] == 'Variant C', 'Variant_c_performance'] = Segment_df['converted']

## Performance Scores all interactions
perf_scores_all_interactions = Segment_df.groupby(seg_cols).agg({'Variant_a_performance': ['mean'],
                                                                 'Variant_b_performance': ['mean'],
                                                                 'Variant_c_performance': ['mean']}).reset_index().droplevel(1, axis = 1)

In [14]:
###################
## Next Audience ##
###################
Segment_df_step2 = mab.create_synthetic_sample(300000)

Segment_df_step2 = mab.assignment_with_optimization(df = Segment_df_step2, prior_performance_scores=perf_scores_all_interactions, 
                                                    seg_cols=seg_cols,method='max', opt_target_size=0.20, learning_weight=2)

In [16]:
## Create performance score
Segment_df_step2 = mab.add_conversion_rates(df=Segment_df_step2, seg_cols=seg_cols, segments=segments,
                                            all_combos_weights=combo_weights, print_diagnostics=True, 
                                            assign_variant=False)

Segment_df_step2.groupby(['target_control','core_membership'], dropna=False).aggregate({'converted': ['min', 'mean','max', 'count', 'std']})

Unnamed: 0_level_0,converted,converted,converted
Unnamed: 0_level_1,mean,count,sum
was_modified,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
False,0.031635,14920,472
True,0.230907,285080,65827


Unnamed: 0_level_0,Unnamed: 1_level_0,converted,converted,converted,converted,converted
Unnamed: 0_level_1,Unnamed: 1_level_1,min,mean,max,count,std
target_control,core_membership,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
control,,0,0.031635,1,14920,0.175033
target_opt,No,0,0.269541,1,51417,0.443726
target_opt,Yes,0,0.351027,1,8421,0.47732
target_org,,0,0.217597,1,225242,0.412613


In [18]:
###############################
## Let's turn it into a loop ##
###############################

## Let's do 50 Loops, each time increasing the optimized target sample by 1
overall_performance = []
overall_target_performance = []
org_target_performance = []
opt_target_performance = []

steps = 5
row_count = 200000

for i in range(steps):

    if i == 0:
        
        Segment_df = mab.create_synthetic_sample(row_count=row_count)        
        Segment_df = mab.add_conversion_rates(df=Segment_df, seg_cols=seg_cols, segments=segments, all_combos_weights=combo_weights, print_diagnostics=False)

        ## Store results
        overall_performance.append(Segment_df['converted'].mean())
        org_target_performance.append(Segment_df.loc[Segment_df['variant_assignment'] != 'Control', 'converted'].mean())
        overall_target_performance.append(Segment_df.loc[Segment_df['variant_assignment'] != 'Control', 'converted'].mean())
        opt_target_performance.append(None)

        ### For Next Iteration ###
        
        # Take the Target group and build an optimization score to determine how ads should be allocated
        Segment_df.loc[Segment_df['variant_assignment'] == 'Variant A', 'Variant_a_performance'] = Segment_df['converted']
        Segment_df.loc[Segment_df['variant_assignment'] == 'Variant B', 'Variant_b_performance'] = Segment_df['converted']
        Segment_df.loc[Segment_df['variant_assignment'] == 'Variant C', 'Variant_c_performance'] = Segment_df['converted']
        
        ## Performance Scores all interactions
        perf_scores_all_interactions = Segment_df.groupby(seg_cols).agg({'Variant_a_performance': ['mean'],
                                                                         'Variant_b_performance': ['mean'],
                                                                         'Variant_c_performance': ['mean']}).reset_index().droplevel(1, axis = 1)

    if i > 0:
        
        Segment_df_step2 = mab.create_synthetic_sample(row_count=row_count)
        Segment_df_step2 = mab.assignment_with_optimization(df=Segment_df_step2, prior_performance_scores=perf_scores_all_interactions,seg_cols=seg_cols,method='max', opt_target_size=i/100, learning_weight=2)
        Segment_df_step2 = mab.add_conversion_rates(df=Segment_df_step2, seg_cols=seg_cols, segments=segments, all_combos_weights=combo_weights, print_diagnostics=False, assign_variant=False)
        Segment_df_step2 = Segment_df_step2.reset_index(drop=True)
        
        ## Store Results
        overall_performance.append(Segment_df_step2['converted'].mean())
        overall_target_performance.append(Segment_df_step2.loc[Segment_df_step2['target_control'] != 'control', 'converted'].mean())
        org_target_performance.append(Segment_df_step2.loc[Segment_df_step2['target_control'] == 'target_org', 'converted'].mean())
        opt_target_performance.append(Segment_df_step2.loc[Segment_df_step2['target_control'] == 'target_opt', 'converted'].mean())

        ## For Next Iteration ##
        
        # Take the Target group and build an optimization score to determine how ads should be allocated
        Segment_df_step2.loc[Segment_df_step2['variant_assignment'] == 'Variant A', 'Variant_a_performance'] = Segment_df_step2['converted']
        Segment_df_step2.loc[Segment_df_step2['variant_assignment'] == 'Variant B', 'Variant_b_performance'] = Segment_df_step2['converted']
        Segment_df_step2.loc[Segment_df_step2['variant_assignment'] == 'Variant C', 'Variant_c_performance'] = Segment_df_step2['converted']
        
        ## Performance Scores all interactions
        perf_scores_all_interactions = Segment_df_step2[Segment_df_step2['target_control'] != 'control'].groupby(seg_cols).agg({'Variant_a_performance': ['mean'],
                                                                                                                                'Variant_b_performance': ['mean'],
                                                                                                                                'Variant_c_performance': ['mean']}).reset_index().droplevel(1, axis = 1)

    i += 1 
    mab.progress_bar(i, steps, 40)




Progress: [--------------------------------------->] 100%


In [None]:
final_results = pd.DataFrame({'Overall Performance':overall_performance,
                             'Performance on All Target':overall_target_performance,
                             'Performance on Organic Target':org_target_performance,
                             'Performance on Optimized Target':opt_target_performance,})

final_results.to_csv('Final Results.csv')

In [173]:
### Create Table of Assignments
seg_data = Segment_df_step2[Segment_df_step2['target_control'] != 'target_org']
name = 'Optimized'

org_table = mab.get_variant_assignment_counts(df = Segment_df_step2[Segment_df_step2['target_control'] != 'target_org'], table_name='Optimized', seg_cols=seg_cols)
opt_table = mab.get_variant_assignment_counts(df = Segment_df_step2[Segment_df_step2['target_control'] != 'target_opt'], table_name='Organic', seg_cols=seg_cols)

pd.concat([org_table, opt_table], axis =1 )


Unnamed: 0_level_0,Unnamed: 1_level_0,Optimized,Optimized,Optimized,Organic,Organic,Organic
Unnamed: 0_level_1,Unnamed: 1_level_1,Variant A,Variant B,Variant C,Variant A,Variant B,Variant C
gender,Female,0.359272,0.345124,0.295604,0.334754,0.331934,0.333311
gender,Male,0.247493,0.339447,0.413059,0.332184,0.334184,0.333632
age,Middle Age,0.334626,0.378441,0.286933,0.331136,0.332881,0.335983
age,Older,0.312847,0.290625,0.396528,0.334502,0.332725,0.332772
age,Young,0.258887,0.363601,0.377512,0.334705,0.333583,0.331712
income,High Income,0.258973,0.337438,0.403589,0.33191,0.333563,0.334527
income,Low Income,0.360777,0.323495,0.315728,0.332959,0.332159,0.334882
income,Medium Income,0.292395,0.365779,0.341825,0.335577,0.333394,0.331029
buyer,First-Time Buyer,0.311575,0.388498,0.299927,0.332973,0.332013,0.335014
buyer,Prior Buyer,0.292919,0.293683,0.413398,0.333986,0.334108,0.331905


In [171]:
mab.get_variant_assignment_counts(df = Segment_df_step2[Segment_df_step2['target_control'] != 'target_org'], table_name='Optimized', seg_cols=seg_cols)

Unnamed: 0_level_0,Unnamed: 1_level_0,Optimized,Optimized,Optimized
Unnamed: 0_level_1,Unnamed: 1_level_1,Variant A,Variant B,Variant C
gender,Female,0.359272,0.345124,0.295604
gender,Male,0.247493,0.339447,0.413059
age,Middle Age,0.334626,0.378441,0.286933
age,Older,0.312847,0.290625,0.396528
age,Young,0.258887,0.363601,0.377512
income,High Income,0.258973,0.337438,0.403589
income,Low Income,0.360777,0.323495,0.315728
income,Medium Income,0.292395,0.365779,0.341825
buyer,First-Time Buyer,0.311575,0.388498,0.299927
buyer,Prior Buyer,0.292919,0.293683,0.413398


In [177]:
opt_table.columns[0]

('Organic', 'Variant A')

In [70]:
seg_counts.iloc[:,[1,2,3]].sum(axis=1)


0    3958
1    4089
dtype: int64

In [98]:
seg_counts

Unnamed: 0,gender,Variant_a_performance,Variant_b_performance,Variant_c_performance
0,Female,0.359272,0.345124,0.295604
1,Male,0.247493,0.339447,0.413059


In [183]:
for i in range(10):
    print("On iteration " + str(i+1) + " out of " + str(10))

On iteration 1 out of 10
On iteration 2 out of 10
On iteration 3 out of 10
On iteration 4 out of 10
On iteration 5 out of 10
On iteration 6 out of 10
On iteration 7 out of 10
On iteration 8 out of 10
On iteration 9 out of 10
On iteration 10 out of 10


In [205]:
org_table.style.format('{:,.0%}').background_gradient(cmap='Blues').set_properties(**{'text-align': 'center'}).set_table_styles([
   {'selector': 'th',
    'props': [
        ('text-align', 'center')
    ]
    }]
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Optimized,Optimized,Optimized
Unnamed: 0_level_1,Unnamed: 1_level_1,Variant A,Variant B,Variant C
gender,Female,36%,35%,30%
gender,Male,25%,34%,41%
age,Middle Age,33%,38%,29%
age,Older,31%,29%,40%
age,Young,26%,36%,38%
income,High Income,26%,34%,40%
income,Low Income,36%,32%,32%
income,Medium Income,29%,37%,34%
buyer,First-Time Buyer,31%,39%,30%
buyer,Prior Buyer,29%,29%,41%
