In [1]:
import pandas as pd
import numpy as np

import import_ipynb
from LogisticGrowth import LogisticGrowth
from Hill import Hill
from GeometricDecay import GeometricDecay

np.random.seed(1)

importing Jupyter notebook from LogisticGrowth.ipynb
importing Jupyter notebook from Hill.ipynb
importing Jupyter notebook from GeometricDecay.ipynb


In [2]:
class GenerateRandomMMMData:
    def __init__(self, n_var=7, n_media=3, size=100, noise_intensity=0.05, intercept = 10):
        self.n_var = n_var
        self.n_media = n_media
        self.size = size
        self.noise_intensity = noise_intensity
        self.intercept = intercept
        
    def _get_random_data(self):
        self.var_columns  = [f"var_{i}" for i in range(self.n_var)]
        self.media_columns = [f"media_{i}" for i in range(self.n_media)]

        df = {c: np.random.random(self.size) for c in self.var_columns+self.media_columns}

        self.data_df = pd.DataFrame( df)
        
    def _propagate_media(self):
        self.params = {}
        for m in self.media_columns:
            self.params[m] = {
                'shape' : 1*np.random.random(1)[0]+0.1,
                'offset': 10*np.random.random(1)[0],
            }
        self.thetas = pd.Series(np.random.random(self.n_media), index = self.media_columns)
        
        self.adstocked_media = {}
        self.saturated_media = {}
        for m in self.media_columns:
            GD = GeometricDecay(self.thetas[m])
            self.adstocked_media[m] = GD(self.data_df[m])
        
#             LG = LogisticGrowth(*self.params[m].values())
            LG = Hill(*self.params[m].values())
            self.saturated_media[m] = LG(self.adstocked_media[m])
        
    def _generate_coefs(self):
        self.coefs = 100*pd.Series(np.random.random(self.n_var+self.n_media), index = self.data_df.columns)
        signs = -1+np.random.randint(5, size=len(self.var_columns))
        signs[signs == 0] = 1
        signs = signs/np.fabs(signs)
        for i, c in enumerate(self.var_columns):    
            self.coefs[c] *= signs[i]
        
    def _generate_sales(self):
        self._generate_coefs()
        self.data_df['sales'] = 0
        
        for c in self.var_columns:
            self.data_df['sales'] += self.coefs[c]*self.data_df[c]
            
        for c in self.media_columns:
            self.data_df['sales'] += self.coefs[c]*self.saturated_media[c]
            
        self.data_df['sales'] += self.intercept
        self.data_df['sales'] += self.data_df['sales']*self.noise_intensity*np.random.random(self.data_df.index.size)
            
    def get_data(self):
        self._get_random_data()
        self._propagate_media()
        self._generate_sales()
        return self.data_df.copy()


In [3]:
if __name__ == '__main__':
    GR = GenerateRandomMMMData(noise_intensity=0.0)
    data = GR.get_data()
    display(GR.coefs)
    display(data)

var_0      69.385541
var_1      67.035003
var_2     -43.047178
var_3     -76.778898
var_4     -53.600849
var_5      -3.985993
var_6      13.479312
media_0    19.341640
media_1    33.566380
media_2     5.231295
dtype: float64

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,media_0,media_1,media_2,sales
0,0.417022,0.326645,0.950176,0.811859,0.959434,0.087482,0.674564,0.138355,0.888386,0.245519,-26.947302
1,0.720324,0.527058,0.556653,0.874962,0.803961,0.227310,0.799777,0.290144,0.184384,0.911019,29.091217
2,0.000114,0.885942,0.915606,0.688413,0.032323,0.314377,0.080530,0.613871,0.585348,0.043534,33.366337
3,0.302333,0.357270,0.641566,0.569494,0.709387,0.174766,0.231702,0.324139,0.898205,0.950753,6.126448
4,0.146756,0.908535,0.390008,0.160971,0.465001,0.607094,0.207626,0.457360,0.446117,0.556407,85.532071
...,...,...,...,...,...,...,...,...,...,...,...
95,0.237027,0.931972,0.068209,0.843840,0.931861,0.198948,0.220284,0.808278,0.364378,0.974403,31.562677
96,0.903380,0.013952,0.377924,0.381016,0.936868,0.142518,0.302097,0.295289,0.774410,0.311703,39.520506
97,0.573679,0.234362,0.079626,0.749858,0.844330,0.377083,0.883029,0.544121,0.552768,0.668797,27.796633
98,0.002870,0.616778,0.982817,0.511141,0.920207,0.026628,0.543166,0.487921,0.889131,0.325967,-13.976650
