# Set-up

>[Set-up](#scrollTo=5SzClYKmTPmR)

>[Load Data](#scrollTo=_5nYMts5qn2p)

>[Final output, if no budget given](#scrollTo=ALmjOdcxTPmX)

>[Optimization](#scrollTo=HFeOwROkou2N)



In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

In [2]:
!pip install pulp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
# install pulp
from pulp import *

In [4]:
# check current path
import os
print(os.getcwd())


# Load the Drive helper and mount
from google.colab import drive
# This will prompt for authorization.
drive.mount('/content/drive/')

path_gdrive = '/content/drive/Shareddrives/Enova/2023_submission_package/data'
os.chdir(path_gdrive)
print(os.getcwd())

'''
path_gdrive = '/Users/anthony/Desktop/Enova/data'
os.chdir(path_gdrive)
print(os.getcwd())'''

/content
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/Shareddrives/Enova/2023_submission_package/data


"\npath_gdrive = '/Users/anthony/Desktop/Enova/data'\nos.chdir(path_gdrive)\nprint(os.getcwd())"

# Load Data

In [5]:
## Set up the data and random seed

# get validation
validation = pd.read_csv('optimization.csv') 
validation.head()

Unnamed: 0,promotion_type_suggest,ltv,conv_ind,probs_bronze,probs_silver,probs_gold
0,,0.0,0,0.102068,0.389531,0.707727
1,bronze,789.221262,1,0.986791,0.997621,0.999372
2,,0.0,0,0.029707,0.146658,0.394747
3,gold,2674.527593,1,0.573009,0.882809,0.966202
4,,0.0,0,0.181862,0.555121,0.82564


In [7]:
validation.shape

(10000, 6)

Since our work is almost 90% done. The rest of work is to:
- calculate profit: ltv - promotion cost
- make decision on promotion approval -- because sometimes the ltv is lower than the promotion cost

In [8]:
# get data
df = validation.copy()

# calculate profit
mapping = {'gold': 1400, 'silver': 1100, 'bronze': 500, 'none': 0}
df['promotion_type_suggest'] = df['promotion_type_suggest'].fillna('none')
df['cost'] = df['promotion_type_suggest'].map(mapping)
df['profit'] = df['ltv'] - df['cost']

# approve those with profit > 0
df['approval'] = df['profit'] > 0.0

# Final output, if no budget given

In [9]:
df_output = df.copy()

# remove 'none'
df_output['promotion_type_suggest'] = df['promotion_type_suggest'].replace('none', np.nan)

# preview
#df.head(5)

total_profit = df[df['approval'] == True]['profit'].sum()
print(f'Total profit generated from promotion: {total_profit}')

Total profit generated from promotion: 3414508.03474005


In [10]:
# adjust
df_output['approval'] = df_output['approval'].apply(lambda x: None if x == False else x)
df_output['promotion_type'] = df_output['promotion_type_suggest'] * df_output['approval']

# drop columns
df_output = df_output.drop(columns=['cost', 'approval', 'probs_bronze', 'probs_gold', 'probs_silver', 'promotion_type_suggest'])

In [11]:
df_output

Unnamed: 0,ltv,conv_ind,profit,promotion_type
0,0.000000,0,0.000000,
1,789.221262,1,289.221262,bronze
2,0.000000,0,0.000000,
3,2674.527593,1,1274.527593,gold
4,0.000000,0,0.000000,
...,...,...,...,...
9995,0.000000,0,0.000000,
9996,0.000000,0,0.000000,
9997,2538.689969,1,1138.689969,gold
9998,0.000000,0,0.000000,


In [12]:
# sanity check
total_profit = df_output[df_output['promotion_type'].notna()]['profit'].sum()
print(f'Total profit generated from promotion: {total_profit}')

# export to csv
df_output.to_csv('submission_1_no_constraint.csv', index=False)

Total profit generated from promotion: 3414508.03474005


# Optimization

In real-world, the company might not have unlimited budget for promotions. That is why we need a budget constrain and do the optimization within.

Recall that the `promotion_type_suggest` is made by finding the minimal possible values of conversion probabilities that is higher than our threshold probability. The goal is to find the least possible promotion while making sure the users will still have high chance (in our case is > 90%) to subscribe. So, we won't worry about the MaxMin complexity among different promotion suggestions. Instead, we can simply focus on maximing profit while having a upper bond for total promotion cost

Let's use 50,000 as the budget. This can be changed according to future demands.

In [13]:
# get data
df = validation.copy()

# calculate profit
mapping = {'gold': 1400, 'silver': 1100, 'bronze': 500, 'none': 0}
df['promotion_type_suggest'] = df['promotion_type_suggest'].fillna('none')
df['cost'] = df['promotion_type_suggest'].map(mapping)
df['profit'] = df['ltv'] - df['cost']

# set target column
df['approval'] = None

In [14]:
df.head()

Unnamed: 0,promotion_type_suggest,ltv,conv_ind,probs_bronze,probs_silver,probs_gold,cost,profit,approval
0,none,0.0,0,0.102068,0.389531,0.707727,0,0.0,
1,bronze,789.221262,1,0.986791,0.997621,0.999372,500,289.221262,
2,none,0.0,0,0.029707,0.146658,0.394747,0,0.0,
3,gold,2674.527593,1,0.573009,0.882809,0.966202,1400,1274.527593,
4,none,0.0,0,0.181862,0.555121,0.82564,0,0.0,


In [15]:
# Define the data for the problem as a DataFrame
prob = LpProblem("Promotion decision problem", sense=LpMaximize)

# Define the data and budget
df_lp = df.copy() # save a copy of df
df_lp = df_lp[['cost', 'profit']]
budget = 500000

# Define the decision variables
x = LpVariable.dicts("user", df_lp.index, lowBound=0, cat='Binary')

# Define the objective function
prob += sum([df_lp.loc[p, 'profit'] * x[p] for p in df_lp.index])

# Define the constraints
prob += sum([df_lp.loc[p, 'cost'] * x[p] for p in df_lp.index]) <= budget

# Solve the problem
prob.solve()


# Print the results
print("Status: ", prob.status)
print("Optimal Solution:")
#for p in df_lp.index:
#    print("{}: {}".format(p, x[p].varValue))
print("Total Profit: {}".format(prob.objective.value()))



Status:  1
Optimal Solution:
Total Profit: 1781865.1721916397


In [16]:
approval = []
for p in df_lp.index:
    decision = x[p].varValue
    if x[p].varValue == None:
        decision = 0.0
        approval.append(decision)
    else:
        approval.append(decision)

# check if the opmization is doing right
pd.Series(approval).value_counts()

0.0    9006
1.0     994
dtype: int64

In [17]:
df['approval'] = pd.Series(approval)
df.head(5)

Unnamed: 0,promotion_type_suggest,ltv,conv_ind,probs_bronze,probs_silver,probs_gold,cost,profit,approval
0,none,0.0,0,0.102068,0.389531,0.707727,0,0.0,0.0
1,bronze,789.221262,1,0.986791,0.997621,0.999372,500,289.221262,0.0
2,none,0.0,0,0.029707,0.146658,0.394747,0,0.0,0.0
3,gold,2674.527593,1,0.573009,0.882809,0.966202,1400,1274.527593,0.0
4,none,0.0,0,0.181862,0.555121,0.82564,0,0.0,0.0


In [18]:
df_output2 = df.copy()

# remove 'none' in suggestion
df_output2['promotion_type_suggest'] = df_output2['promotion_type_suggest'].replace('none', np.nan)
# adjust: convert binary to boolean, then remove False as None
df_output2['approval'] = df_output2['approval'].replace({0.0: False, 1.0: True})
df_output2['approval'] = df_output2['approval'].apply(lambda x: None if x == False else x)
df_output2['promotion_type'] = df_output2['promotion_type_suggest'] * df_output2['approval']

# drop columns
df_output2 = df_output2.drop(columns=['cost', 'approval', 'probs_bronze', 'probs_gold', 'probs_silver', 'promotion_type_suggest'])

In [19]:
# sanity check
#df_output2.head()
df_output2[df_output2['promotion_type'].notna()].head()

Unnamed: 0,ltv,conv_ind,profit,promotion_type
8,2213.363453,1,1713.363453,bronze
15,1986.264107,1,1486.264107,bronze
17,1609.183832,1,1109.183832,bronze
20,3135.470329,1,2635.470329,bronze
21,1796.056726,1,1296.056726,bronze


In [20]:
# export to csv
df_output2.to_csv('submission_2_optimized.csv', index=False)