In [1]:
####################################
### Re-constructing FFF MMM Code ###
####################################

####################################
### Import various modules
####################################

import pandas as pd
import os
import numpy as np
import gc
from sklearn.linear_model import LinearRegression
from scipy.optimize import minimize
# path = os.getcwd()


In [2]:

####################################
### Import Raw Data File ###
#################################### 

df1 = pd.read_csv ('Sample_MMM_Data.csv')

In [12]:

####################################
### Set up Box Dummy Variables ###
####################################

df_temp1 = df1[["Box_Season"]]
df_temp2 = pd.get_dummies(df_temp1)

df_temp3 = pd.concat([df1, df_temp2], axis=1, join="inner")
df1 = df_temp3

del [[df_temp1, df_temp2, df_temp3]]
gc.collect()
df_temp1 = pd.DataFrame()
df_temp2 = pd.DataFrame()
df_temp3 = pd.DataFrame()

#####################################################
### Transformations (80% Daily Retention) on FBIG ###
#####################################################

df_temp1 = df1[["FBIG_Impressions"]]
no_rows = len(df_temp1.index)

df_temp2 = pd.DataFrame(np.zeros((no_rows,1)),dtype = float)
df_temp2.columns = ['FBIG_Imp_AdStock']

for i in range (0,no_rows):
   if i == 0:
        df_temp2.iloc[0,0] = df_temp1.iloc[0,0]
   else:
        df_temp2.iloc[i,0] = (df_temp2.iloc[i-1,0] * 0.8) + df_temp1.iloc[i,0]

df_temp3 = pd.concat([df1, df_temp2], axis=1, join="inner")
df1 = df_temp3

del [[df_temp1, df_temp2, df_temp3]]
gc.collect()
df_temp1 = pd.DataFrame()
df_temp2 = pd.DataFrame()

#####################################################
## Transformations (70% Daily Retention) on YT Imp ##
#####################################################

df_temp1 = df1[["YT_Impressions"]]
no_rows = len(df_temp1.index)

df_temp2 = pd.DataFrame(np.zeros((no_rows,1)),dtype = float)
df_temp2.columns = ['YT_Imp_AdStock']

for i in range (0,no_rows):
   if i == 0:
        df_temp2.iloc[0,0] = df_temp1.iloc[0,0]
   else:
        df_temp2.iloc[i,0] = (df_temp2.iloc[i-1,0] * 0.7) + df_temp1.iloc[i,0]

df_temp3 = pd.concat([df1, df_temp2], axis=1, join="inner")
df1 = df_temp3

del [[df_temp1, df_temp2, df_temp3]]
gc.collect()
df_temp1 = pd.DataFrame()
df_temp2 = pd.DataFrame()

#####################################################
## Create constant, add to modeling DataFrame #######
#####################################################
    
df_temp2 = pd.DataFrame(np.ones((no_rows,1)),dtype = float)
df_temp2.columns = ['Constant']

df_temp3 = pd.concat([df1, df_temp2], axis=1, join="inner")
df1 = df_temp3

del [[df_temp1, df_temp2, df_temp3]]
gc.collect()
df_temp1 = pd.DataFrame()
df_temp2 = pd.DataFrame()

#####################################################
## Run Regression, various variables against Volume #
#####################################################

y_vars = df1[["Volume"]]
x_vars = df1[["Promo_40%_Off", 
              "Promo_50%_Off",
              "Box_Season_Box_1",
              "Box_Season_Box_2",
              "Box_Season_Box_3",
              "Box_Season_Box_4",
              "FBIG_Imp_AdStock",
              "YT_Imp_AdStock",
              "Influencer_CC_Redemptions"]]

regression_results = LinearRegression().fit(x_vars, y_vars)
print(regression_results.score(x_vars, y_vars))
print(regression_results.coef_)

#####################################################
## Contribution percentages from all variables ######
#####################################################

coeff_array = regression_results.coef_
constant_coeff = regression_results.intercept_

sum_volume = y_vars['Volume'].sum()
sum_volume_from_indep_vars = np.sum([x_vars.values], axis=1)
sum_volume_from_indep_vars = sum_volume_from_indep_vars * coeff_array
sum_volume_from_constant = no_rows * constant_coeff

contrib_coeff_array = sum_volume_from_indep_vars / sum_volume
contrib_constant_coeff = sum_volume_from_constant / sum_volume

#####################################################
## Constrained Regression (Minimization Problem) ####
#####################################################

def constrained_reg_function(coeffs):
    no_rows = len(y_vars.index) 
    sum_errors_squared = 0
    sum_errors = 0
    for i in range (0,no_rows):
        pred_volume = (coeffs[0] * x_vars.iloc[i,0]) + (coeffs[1] * x_vars.iloc[i,1]) + (coeffs[2] * x_vars.iloc[i,2]) + (coeffs[3] * x_vars.iloc[i,3]) + (coeffs[4] * x_vars.iloc[i,4]) + (coeffs[5] * x_vars.iloc[i,5]) + (coeffs[6] * x_vars.iloc[i,6]) + (coeffs[7] * x_vars.iloc[i,7]) + (coeffs[8] * x_vars.iloc[i,8]) + (coeffs[9])
        error_squared = (pred_volume - y_vars.iloc[i,0]) ** 2
        sum_errors = (pred_volume - y_vars.iloc[i,0]) + sum_errors
        sum_errors_squared = error_squared + pred_volume
    total_value = (sum_errors ** 8) + sum_errors_squared
    return total_value

constrained_reg_coeff = [0,0,0,0,0,0,0,0,0,0]

constrained_reg_result = minimize(constrained_reg_function, constrained_reg_coeff,
                                  method = 'Nelder-Mead',
                                  bounds = ((-100,100), (-100,100), (-100,100), (-100,100), (-100,100), (-100,100), (-100,100), (-100,100), (-100,100), (-100,100)))
print (constrained_reg_result)


0.9847903205573091
[[ 1.09528721e+03  8.18904543e+02 -8.08693562e+08 -1.88396225e+08
  -1.06793472e+08  2.91946121e+09 -1.17569132e+09 -8.62203230e+08
  -2.94975246e+09 -5.77540693e+08  1.98182096e+08  1.03198032e+08
   1.00158446e+09  2.00201145e+09 -3.44717455e+09  1.45035945e+09
   6.06759769e+08  3.89245968e+08  3.89245968e+08  3.89245968e+08
  -3.70563226e+07 -3.70563226e+07 -3.70458806e+07 -3.70458806e+07
  -3.70563226e+07 -3.70563226e+07  4.98153629e+08  3.61333773e+09
   3.61333773e+09  3.61333773e+09 -5.66908341e+09 -5.66908341e+09
  -7.58818739e+07 -2.03183557e+08 -2.03183557e+08 -2.03183557e+08
   3.42716273e+08  3.42716273e+08 -3.84924887e-01]]


  warn('Method %s cannot handle constraints nor bounds.' % method,


 final_simplex: (array([[  294.59330787, -2285.57685804,  1038.71321265, -1794.53112164,
         -130.01314347,   183.04459624,    22.85567243,  -553.04113856,
          671.69994251,  4923.50000943],
       [  294.59328543, -2285.57693023,  1038.71326696, -1794.53109505,
         -130.01308373,   183.04461112,    22.85570333,  -553.04112228,
          671.69994969,  4923.49999156],
       [  294.59331854, -2285.57682012,  1038.71318488, -1794.53113267,
         -130.0131726 ,   183.04458875,    22.85565727,  -553.04114592,
          671.69993824,  4923.50001271],
       [  294.59331347, -2285.57683969,  1038.71319892, -1794.53112812,
         -130.01315844,   183.04459249,    22.85566468,  -553.04114259,
          671.69994064,  4923.5000134 ],
       [  294.59329125, -2285.57690486,  1038.71324929, -1794.53109893,
         -130.01310053,   183.04460649,    22.85569445,  -553.04112579,
          671.69994623,  4923.49998648],
       [  294.5933016 , -2285.57688419,  1038.71323106, -1

In [13]:
constrained_reg_result.x

array([  294.59330787, -2285.57685804,  1038.71321265, -1794.53112164,
        -130.01314347,   183.04459624,    22.85567243,  -553.04113856,
         671.69994251,  4923.50000943])