In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import time
from datetime import datetime
import itertools

# hyperparameters
num_epochs = 1000
learning_rate = 0.001
l2_regularization_lambda = 0.1

# preprocessing
data = pd.read_csv("crowdbabble_data_processed.csv")
data.columns = [str.lower(i.replace(' ', '_').replace('?', '')) for i in list(data.columns)]
data.start_date = [datetime.strptime(i, '%m/%d/%y').strftime("%j") for i in data.start_date]
data.end_date = [datetime.strptime(i, '%m/%d/%y').strftime("%j") for i in data.end_date]

explanatory_data = data[['channel_id', 'customer_segment_id', 'location',
                       'campaign_id', 'marketing_goal_id', 
                       'start_date', 'end_date', 'video_campaign']]

response_data = data[['cpm', 'cpc', 'cpa', 'cpv']].as_matrix()
encoded_explanatory_data = pd.get_dummies(explanatory_data, 
                                          columns=['channel_id', 'customer_segment_id', 'location',
                                                   'campaign_id', 'marketing_goal_id', 
                                                   'start_date', 'end_date'])
encoded_explanatory_data_matrix = encoded_explanatory_data.as_matrix()

x_train, x_test, y_train, y_test = train_test_split(encoded_explanatory_data, response_data, test_size = .3)
# x_data, x_test, y_data, y_test = train_test_split(encoded_explanatory_data, response_data, test_size = .3)
# x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size = 0.5)
# np.save("x_valid", x_valid)
# np.save("y_valid", y_valid)
np.save("x_train", x_train)
np.save("y_train", y_train)
np.save("x_test", x_test)
np.save("y_test", y_test)

# set up tensorflow graph
inputs = tf.placeholder(tf.float32, [None, x_train.shape[1]])
answers = tf.placeholder(tf.float32, [None, y_train.shape[1]])

l1 = tf.contrib.layers.fully_connected(inputs, 
                                      200,
                                      activation_fn=None)
# l1 = tf.contrib.layers.fully_connected(l1, 
#                                      10,
#                                      activation_fn=None)
output = tf.contrib.layers.fully_connected(l1, 
                                      y_train.shape[1],
                                      activation_fn=tf.nn.relu)
loss = tf.reduce_mean(tf.squared_difference(output, answers))
# L2 regularization
for v in tf.trainable_variables():
    if not 'bias' in v.name.lower():
        loss += l2_regularization_lambda * tf.nn.l2_loss(v)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

In [2]:
# x_train = np.load("x_train.npy")
# y_train = np.load("y_train.npy")
# x_test = np.load("x_test.npy")
# y_test = np.load("y_test.npy")
# x_valid = np.load("x_valid.npy")
# y_valid = np.load("y_valid.npy")

In [13]:
# train
start_time = time.time()
best_valid_loss = float("inf")
for epoch in range(num_epochs):
    epoch_loss, _ = sess.run((loss, optimizer), feed_dict = {inputs:x_train, answers:y_train})
#     if epoch % 5 == 0:
#         valid_loss = sess.run(loss, feed_dict = {inputs:x_valid, answers:y_valid})
#         if valid_loss < (best_valid_loss - 0.5):
#             best_valid_loss = valid_loss
#             save_path = saver.save(sess, "saved-models/approach_1_best_model.ckpt")
    if epoch % 50 == 0:
        print("Epoch %d: %.2f%% complete, %d mins, Avg loss: %.5f" % (epoch, 
                                                                    epoch*100.0/num_epochs,
                                                                    (time.time() - start_time)/60,
                                                                    epoch_loss))                      
print("Duration: %d mins" % int((time.time() - start_time)/60))

Epoch 0: 0.00% complete, 0 mins, Avg loss: 817.58539
Epoch 50: 5.00% complete, 0 mins, Avg loss: 566.03766
Epoch 100: 10.00% complete, 1 mins, Avg loss: 331.92496
Epoch 150: 15.00% complete, 1 mins, Avg loss: 254.06219
Epoch 200: 20.00% complete, 2 mins, Avg loss: 224.16624
Epoch 250: 25.00% complete, 2 mins, Avg loss: 204.41512
Epoch 300: 30.00% complete, 3 mins, Avg loss: 190.10347
Epoch 350: 35.00% complete, 4 mins, Avg loss: 179.63498
Epoch 400: 40.00% complete, 4 mins, Avg loss: 172.12650
Epoch 450: 45.00% complete, 5 mins, Avg loss: 167.01678
Epoch 500: 50.00% complete, 5 mins, Avg loss: 163.82997
Epoch 550: 55.00% complete, 6 mins, Avg loss: 161.97314
Epoch 600: 60.00% complete, 6 mins, Avg loss: 160.88419
Epoch 650: 65.00% complete, 7 mins, Avg loss: 160.19954
Epoch 700: 70.00% complete, 8 mins, Avg loss: 159.72446
Epoch 750: 75.00% complete, 8 mins, Avg loss: 159.35803
Epoch 800: 80.00% complete, 9 mins, Avg loss: 159.04944
Epoch 850: 85.00% complete, 9 mins, Avg loss: 158.774

In [14]:
# test
test_loss, prediction = sess.run((loss, output), feed_dict = {inputs:x_test, answers:y_test})
print("MSE: %.2f" % test_loss)

MSE: 158.89


No validation set
1) 50,10: 
2) 25,10: 
3) 200: 158.81, 158.288

In [228]:
marketing_goal_names = ['Brand Engagement', 'Lead Generation', 'Acquisition', 'Discount coupons', 'Local/other']

In [157]:
# come up with all possible combinations of 'channel_id', 'customer_segment_id', 'location'
def create_combinations(campaign_id, marketing_goal_id, video_campaign):
    combinations = list(itertools.product(range(10), range(20), range(199)))
    dataset = np.zeros((len(combinations), x_train.shape[1]))
    for idx, combination in enumerate(combinations):
        dataset[idx, 0] = video_campaign
        dataset[idx, 1 + combination[0]] = 1
        dataset[idx, 1 + 10 + combination[1]] = 1
        dataset[idx, 1 + 10 + 20 + combination[2]] = 1
        dataset[idx, 1 + 10 + 20 + 199 + campaign_id - 1] = 1
        dataset[idx, 1 + 10 + 20 + 199 + 4 + marketing_goal_id - 1] = 1
        # assuming dates don't have an effect on costs. just choose first date for start and first for end
        start_date_id = end_date_id = 0
        dataset[idx, 1 + 10 + 20 + 199 + 4 + 5 + start_date_id] = 1
        dataset[idx, 1 + 10 + 20 + 199 + 4 + 5 + 76 + end_date_id] = 1
    return(dataset)

In [238]:
def get_optimal_strategy(campaign_id, marketing_goal_id, video_campaign):
    combinations_dataset = create_combinations(campaign_id, marketing_goal_id, video_campaign)
    fake_answers = np.zeros((combinations_dataset.shape[0], y_test.shape[1]))
    prediction = sess.run(output, feed_dict = {inputs:combinations_dataset, answers:fake_answers})
    
    # See which combination gives you the lowest CPM
    best_combination_idx = np.argmin(prediction[:,0])
    predicted_cpm_of_best_combination = prediction[best_combination_idx][0]
    
    ## find the observation from the real dataset that has the exact same combination
    encoded_explanatory_data_matrix_without_dates = encoded_explanatory_data_matrix[:, 0:(1 + 10 + 20 + 199 + 4 + 5)]
    combinations_dataset_without_dates = combinations_dataset[:, 0:(1 + 10 + 20 + 199 + 4 + 5)]
    best_combination_from_combinations_dataset_without_dates = combinations_dataset_without_dates[best_combination_idx, :]
    
    real_observation_of_best_combination_idx = np.where((encoded_explanatory_data_matrix_without_dates == best_combination_from_combinations_dataset_without_dates).all(axis=1))[0]
    # if there is no actual observation to compare the best combination to, just use the predicted one
    if real_observation_of_best_combination_idx.size == 0:
        real_cpm_of_best_combination = predicted_cpm_of_best_combination
    # otherwise use the actual observation
    else:
        real_observation_of_best_combination = encoded_explanatory_data_matrix_without_dates[real_observation_of_best_combination_idx, :]
        # get its CPM
        real_cpm_of_best_combination = response_data[real_observation_of_best_combination_idx, 0]
    
    ## find other combinations in the real dataset to compare the best combination with
    column_ids_of_constants = [0] + range(1 + 10 + 20 + 199, 1 + 10 + 20 + 199 + 4, 1) + range(1 + 10 + 20 + 199 + 4, 1 + 10 + 20 + 199 + 4 + 5, 1)
    encoded_explanatory_data_matrix_only_constants = encoded_explanatory_data_matrix[:, column_ids_of_constants]
    best_combination_constants = best_combination_from_combinations_dataset_without_dates[column_ids_of_constants]
    # find observations from the real dataset that have combinations that are slightly different from that of the recommended combination
    real_observations_of_suboptimal_combinations_idx = np.where((encoded_explanatory_data_matrix_only_constants == best_combination_constants).all(axis=1))[0]
    # get their cpms
    cpms_of_suboptimal_combinations = response_data[real_observations_of_suboptimal_combinations_idx,0]

    # find out what % of the other combinations the recommended combination performs better than
    how_many_combinations_is_best_combination_better_than = np.mean(real_cpm_of_best_combination > cpms_of_suboptimal_combinations)
    # find how_much_better the_best_combination is than_other_combinations
    how_much_better_is_the_best_combination_than_other_combinations_in_percent = 100.0*(real_cpm_of_best_combination/np.mean(cpms_of_suboptimal_combinations) - 1)
    # get spread of cpm of suboptimal combinations
    cpm_spread_of_suboptimal_combinations = np.percentile(cpms_of_suboptimal_combinations, [100, 75, 50, 25, 0])

    # convert combination into human readable format
    optimal_channel_id = np.where(best_combination_from_combinations_dataset_without_dates[1:11] == 1)[0][0] + 1
    optimal_customer_segment_id = np.where(best_combination_from_combinations_dataset_without_dates[11:31] == 1)[0][0] + 1
    optimal_location = np.where(best_combination_from_combinations_dataset_without_dates[31:230] == 1)[0][0] + 1

    print("For a %s video campaign with a Campaign ID of %d, and a Marketing Goal of %s, the optimal combination is:\n\nChannel ID: %d\nCustomer Segment ID: %d\nLocation: %d\n\nThis combination would perform %.2f%% better than other approaches." %
         (('' if video_campaign else 'non'), 
          campaign_id, 
          marketing_goal_names[marketing_goal_id - 1], 
          optimal_channel_id, 
          optimal_customer_segment_id, 
          10000+optimal_location, 
          how_much_better_is_the_best_combination_than_other_combinations_in_percent))

In [239]:
get_optimal_strategy(3, 3, 0)

For a non video campaign with a Campaign ID of 3, and a Marketing Goal of Acquisition, the optimal combination is:

Channel ID: 7
Customer Segment ID: 17
Location: 10168

This combination would perform 62.87% better than other approaches.


In [2]:
# model doesn't perform much better than the mean
mean_prediction = np.ones((y_test.shape)) * np.mean(y_test)
np.mean((mean_prediction - y_test)**2)