In [1]:
import numpy as np
import pandas as pd
import nevergrad as ng
from scipy.optimize import minimize, lsq_linear
from sklearn.linear_model import LinearRegression

import plotly.graph_objects as go

from tqdm import tqdm
from datetime import datetime

import import_ipynb
from GenerateRandomMMMData import GenerateRandomMMMData
from GeometricDecay import GeometricDecay
from LogisticGrowth import LogisticGrowth, LogisticGrowthNoOffset
from Hill import Hill

importing Jupyter notebook from GenerateRandomMMMData.ipynb
importing Jupyter notebook from LogisticGrowth.ipynb
importing Jupyter notebook from Hill.ipynb
importing Jupyter notebook from GeometricDecay.ipynb


In [2]:
class GeometricDecayMinimizer:
    def __init__(self, data_df, medias, other, target):
        self.data_df = data_df.copy()
        self.other = other
        self.medias = medias
        self.target = target
        
    def to_minimize(self, *args):
        self.args = args
        medias = self.transform_media(self.data_df)
        
        lb  = [-np.inf for m in self.other]
        lb += [0 for m in self.medias]
        ub = [np.inf for m in self.other+self.medias]
        
        self.model = lsq_linear(medias[self.other+self.medias], self.target, bounds=[lb, ub])
    
        diff = np.zeros(medias.index.size)
        for i,m in enumerate(self.other+self.medias):
            diff += self.model.x[i]*medias[m]#+10*self.model.x[i]*self.model.x[i]

        cond = self.target != 0
        diff = np.power((diff[cond]-self.target[cond])/self.target[cond], 2.0).mean()

        return diff
    
    def transform_media(self, df):
        n_medias = len(self.medias)
        thetas = self.args[0][:n_medias]
        scales = self.args[0][n_medias:2*n_medias]
        offset = self.args[0][2*n_medias:]
        
        medias = df.copy()
        for i,m in enumerate(self.medias):
            GD = GeometricDecay(thetas[i])
#             LG = LogisticGrowth(scales[i], offset[i])
            LG = Hill(scales[i], offset[i])
            medias[m] = LG(GD(medias[m]))
            
        return medias
    
    def predict(self, df):
        medias = self.transform_media(df)
        results = np.zeros(medias.index.size)
        for i,m in enumerate(self.other+self.medias):
            results += self.model.x[i]*medias[m]
        return results
    
    def nevergrad_optimize(self):
        optimizer = ng.optimizers.NGOpt(parametrization=3, budget=100)
        recommendation = optimizer.minimize(self.to_minimize)
        return recommendation
    
    def scipy_minimize(self, **kwargs):
        return minimize(self.to_minimize, **kwargs)

In [3]:
if __name__ == '__main__':
    """generate random data and get true values"""
    GR = GenerateRandomMMMData(size=100, noise_intensity = 0.0)
    data_df = GR.get_data()
    data_df['intercept'] = 1
    
    coefs = GR.coefs
    params = GR.params
    thetas = GR.thetas
    adstocked = GR.adstocked_media
    saturated = GR.saturated_media

    true_x0 = []
    true_x0 += list(thetas.values)
    true_x0 += [params[m]['shape'] for m in params.keys()]
    true_x0 += [params[m]['offset'] for m in params.keys()]

In [4]:
if __name__ == '__main__':
    """run MMM many times and choose model that minimizes objective"""
    r_list = []
    t1 = datetime.now()
    for i in tqdm(range(10)):
        GDM = GeometricDecayMinimizer(data_df, GR.media_columns, GR.var_columns+['intercept'], data_df['sales'])
        bounds = [(0,1),(0,1),(0,1), (0.1,1.2),(0.1,1.2),(0.1,1.2), (0,10), (0,10), (0,10)]

        x0 = [np.mean(t) for t in bounds]
        x0 = [t[1]*np.random.random() for t in bounds]

        r = GDM.scipy_minimize(x0=x0, tol=1e-3, bounds=bounds, jac='3-point')
        r_list.append([r.fun, r.x, GDM.model.x])
    
    r_df = pd.DataFrame(
        np.asarray(r_list, dtype='object'),
        columns = ['fun', 'x', 'x0']
    )
    r_df = r_df.sort_values(by='fun', ignore_index=True)
    print(f"elapsed time = {datetime.now() - t1}")
    display(r_df.head())
    display(true_x0)

100%|██████████| 10/10 [00:20<00:00,  2.04s/it]

elapsed time = 0:00:20.421361





Unnamed: 0,fun,x,x0
0,0.00086,"[0.040269837194097546, 0.42771708835254446, 0....","[69.25227607183744, 66.83303752886678, -43.068..."
1,0.002648,"[0.03193370870796236, 0.5326890876651101, 0.30...","[69.24125329275694, 66.76852938398966, -42.994..."
2,0.003371,"[0.0, 0.5705340959098584, 0.6127824453544889, ...","[69.29852182195287, 66.7802042202969, -43.0375..."
3,0.003784,"[0.25696661569158047, 0.6025687404408224, 0.12...","[69.22242750589291, 66.72260575322122, -43.014..."
4,0.015521,"[0.10180306745849908, 0.7293162344558586, 0.68...","[69.02989913117425, 66.81643819364719, -43.177..."


[0.041475079576498874,
 0.3218287999702445,
 0.03711266449399442,
 0.4258099666132048,
 0.8517077209192668,
 0.5694790285599257,
 8.89827341447375,
 7.626320954451106,
 2.107645024687054]

In [5]:
if __name__ == '__main__':
    """show final minimization value"""
    display(r_df.head()['fun'])
    
    """compare true values with values from MMM"""
    display(r_df.loc[0,'x'])
    display(true_x0)

0     0.00086
1    0.002648
2    0.003371
3    0.003784
4    0.015521
Name: fun, dtype: object

array([0.04026984, 0.42771709, 0.17412664, 0.10011946, 0.24206767,
       0.4282807 , 9.19545885, 8.06411794, 8.79341105])

[0.041475079576498874,
 0.3218287999702445,
 0.03711266449399442,
 0.4258099666132048,
 0.8517077209192668,
 0.5694790285599257,
 8.89827341447375,
 7.626320954451106,
 2.107645024687054]

In [8]:
if __name__ == '__main__':
    """plot best model"""
    GDM.model.x = r_df.loc[0,'x0']
    GDM.args = [r_df.loc[0, 'x']]
    y1 = GDM.predict(data_df)
    y2 = data_df['sales']

    y1 = GDM.predict(data_df)
    y2 = data_df['sales']

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x = y1.index, y = y1, name = 'mmm', mode='lines')
    )
    fig.add_trace(
        go.Scatter(x = y1.index, y = y2, name = 'data', mode='markers')
    )
    fig.show()

In [9]:
if __name__ == '__main__':
    """save model and data"""
    data_df.to_csv('mmm_data.csv', index=False)
    np.savetxt('params.csv', r_df.loc[0, 'x'])
    np.savetxt('betas.csv', r_df.loc[0,'x0'])