In [1]:
import pandas as pd
import numpy as np

import os
import pathlib

from sklearn.metrics import confusion_matrix

In [2]:
pd.set_option("display.max_columns", None)

In [3]:
repo_path = pathlib.Path().resolve()
os.chdir(str(repo_path))

In [4]:
modeling_data = pd.read_csv("Data/campaign_data_w_predictions.csv")

##### Defining some global variables

In [5]:
total_cost = 6720000
n_customers = 2240
rev_accepted = 3674000
rev_per_accepted = 3674000/modeling_data.Response.value_counts()[1]
cost_per_customer = total_cost/n_customers
# Coversion around 15%

##### Buildind the test confusion matrix and creating varibles for each vallue in it (i.e. True Positives, False Positives etc.)

In [6]:
y_test = modeling_data.loc[modeling_data['split'] == 'test',"Response"]
y_pred = modeling_data.loc[modeling_data['split'] == 'test',"Predictions"]
n_test_customers = len(y_test)
cf = confusion_matrix(y_test,y_pred)
print(cf)

[[370   6]
 [ 53  13]]


In [7]:
test_tp = cf[1][1]
test_tn = cf[0][0]
test_fp = cf[0][1]
test_fn = cf[1][0]

### Checking the Conversion Rate
##### original campaign vs campaign under proposed model

In [8]:
print("The proposed model have the potential of increasing the campaign success from \033[1m~{}%\033[0m to \033[1m~{}%\033[0m".
      format(
      round(pd.Series(y_test).value_counts(normalize=True)[1]*100),
          round(test_tp*100/(test_tp + test_fp))
      ))

The proposed model have the potential of increasing the campaign success from [1m~15%[0m to [1m~68%[0m


### Original Sample Campaign Results
##### for the customers in test split

In [9]:
print("Campaign Cost: {}\n".format(n_test_customers*cost_per_customer))
print("Offer Accepted by: {}\n".format(pd.Series(y_test).value_counts()[1]))
print("Revenue: {}\n".format(rev_per_accepted*pd.Series(y_test).value_counts()[1]))
print("Loss: {}\n".format(n_test_customers*cost_per_customer - rev_per_accepted*pd.Series(y_test).value_counts()[1]))

Campaign Cost: 1326000.0

Offer Accepted by: 66

Revenue: 732580.0604229607

Loss: 593419.9395770393



### Campaign Results with the Proposed Model
##### for the customers in test split

In [10]:
print("Campaign Cost: {}\n".format((test_tp+test_fp)*cost_per_customer))
print("Offer Accepted by: {}\n".format(test_tp))
print("Revenue: {}\n".format(rev_per_accepted*test_tp))
print("Profit: {}\n".format((rev_per_accepted*test_tp)- (test_tp+test_fp)*cost_per_customer))
print("Cost Cut: {}\n".format((test_tn+test_fn)*cost_per_customer))
print("Revenue Lost: {}\n".format(test_fn*rev_per_accepted))

Campaign Cost: 57000.0

Offer Accepted by: 13

Revenue: 144296.07250755286

Profit: 87296.07250755286

Cost Cut: 1269000.0

Revenue Lost: 588283.9879154079



We will now extrapolate the test confusion matrix to n number of customers and see the campaign results.

### Extraplolating Model Results to n Customers
##### Attainment of actual population data to predict and then run calculations is highly desirable
The following calculations assume that the cost of running the campaign increases linearly with the size of the target audience. Perhaps a different relationship/function could be modeled here

In [11]:
# Function to predict campaign results given n number of customers that the model was run on 
# out of which only true positives and false positives will be targeted with the campaign

def campaign_predictions(n,cf):
    """Takes in the total number of customers on which the model will run and test confusion matrix to predict the campaign results"""
    round_n = np.vectorize(round)
    cf_extrap = round_n(cf*n/sum(sum(cf)))
    n_tp = cf_extrap[1][1]
    n_tn = cf_extrap[0][0]
    n_fp = cf_extrap[0][1]
    n_fn = cf_extrap[1][0]
    
    global total_cost
    global n_customers
    global rev_accepted
    global rev_per_accepted
    global cost_per_customer
    
    print("Customers Reached {}\n".format(n))
    print("Campaign Cost: {}\n".format((n_tp + n_fp)*cost_per_customer))
    print("Offer Accepted by: {}\n".format(n_tp))
    print("Revenue: {}\n".format(rev_per_accepted*n_tp))
    print("Profit: {}\n".format((rev_per_accepted*n_tp)- (n_tp + n_fp)*cost_per_customer))
    print("Cost Saved: {}\n".format((n_tn + n_fn)*cost_per_customer))
    print("Revenue Lost: {}\n".format(n_fn*rev_per_accepted))
    
    return cf_extrap
    

##### Campaign predictions if this model was run on customers before running the original campaign
Not the best practice but for the lack of more data, this is what it is.

In [12]:
campaign_predictions(n=n_customers,cf = cf)

Customers Reached 2240

Campaign Cost: 288000.0

Offer Accepted by: 66

Revenue: 732580.0604229607

Profit: 444580.06042296067

Cost Saved: 6432000.0

Revenue Lost: 2985818.7311178246



array([[1875,   30],
       [ 269,   66]])

##### Campaign predicitons if as many customers were reached as in the original campaign but on the basis of model results.
also meaning that the campaign cost same as the original campaign

In [13]:
perc_predicted_targets = (test_tp + test_fp)/sum(sum(cf))
n_total = round(n_customers/perc_predicted_targets)

In [14]:
campaign_predictions(n=n_total,cf = cf)

Customers Reached 52109

Campaign Cost: 6720000.0

Offer Accepted by: 1533

Revenue: 17015836.85800604

Profit: 10295836.858006041

Cost Saved: 149607000.0

Revenue Lost: 69350912.38670695



array([[43621,   707],
       [ 6248,  1533]])