In [1]:
import pandas as pd
import numpy as np

import import_ipynb
from LogisticGrowth import LogisticGrowth
from GeometricDecay import GeometricDecay

np.random.seed(1)

importing Jupyter notebook from LogisticGrowth.ipynb
importing Jupyter notebook from GeometricDecay.ipynb


In [2]:
class GenerateRandomMMMData:
    def __init__(self, n_var=7, n_media=3, size=100, noise_intensity=0.05, intercept = 10):
        self.n_var = n_var
        self.n_media = n_media
        self.size = size
        self.noise_intensity = noise_intensity
        self.intercept = intercept
        
    def _get_random_data(self):
        self.var_columns  = [f"var_{i}" for i in range(self.n_var)]
        self.media_columns = [f"media_{i}" for i in range(self.n_media)]

        df = {c: np.random.random(self.size) for c in self.var_columns+self.media_columns}

        self.data_df = pd.DataFrame( df)
        
    def _propagate_media(self):
        self.params = {}
        for m in self.media_columns:
            self.params[m] = {
                'shape' : (self.size/20)*np.random.random(1),
                'offset':(self.size/40)*np.random.random(1),
            }
        self.thetas = pd.Series(np.random.random(self.n_media), index = self.media_columns)
        
        self.adstocked_media = {}
        self.saturated_media = {}
        for m in self.media_columns:
            GD = GeometricDecay(self.thetas[m])
            self.adstocked_media[m] = GD(self.data_df[m])
        
            LG = LogisticGrowth(*self.params[m].values())
            self.saturated_media[m] = LG(self.data_df[m])
        
    def _generate_coefs(self):
        self.coefs = pd.Series(np.random.random(self.n_var+self.n_media), index = self.data_df.columns)
        self.coefs[self.var_columns] *= -1+2*np.random.randint(4, size=len(self.var_columns))
        
    def _generate_sales(self):
        self._generate_coefs()
        self.data_df['sales'] = 0
        
        for c in self.var_columns:
            self.data_df['sales'] += self.coefs[c]*self.data_df[c]
            
        for c in self.media_columns:
            self.data_df['sales'] += self.coefs[c]*self.saturated_media[c]
            
        self.data_df['sales'] += self.intercept
        self.data_df['sales'] += self.noise_intensity*np.random.random(self.data_df.index.size)
            
    def get_data(self):
        self._get_random_data()
        self._propagate_media()
        self._generate_sales()
        return self.data_df.copy()


In [4]:
if __name__ == '__main__':
    GR = GenerateRandomMMMData()
    data = GR.get_data()
    display(GR.coefs)
    display(data)

var_0      0.692653
var_1      4.807265
var_2     -0.282411
var_3      0.225893
var_4      4.393953
var_5      2.011804
var_6     -0.203194
media_0    0.406439
media_1    0.112937
media_2    0.669955
dtype: float64

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,media_0,media_1,media_2,sales
0,0.438093,0.436896,0.871361,0.407425,0.396585,0.640005,0.487444,0.211770,0.172267,0.970009,15.837036
1,0.052713,0.109961,0.686625,0.144289,0.924608,0.717651,0.870256,0.873120,0.165106,0.981170,16.400945
2,0.547494,0.571274,0.939200,0.885650,0.845589,0.384170,0.290413,0.902311,0.228521,0.319852,18.058886
3,0.202117,0.325464,0.008124,0.665384,0.530862,0.618845,0.739327,0.817320,0.156496,0.034913,15.806346
4,0.853310,0.104100,0.363740,0.183679,0.840514,0.375461,0.995986,0.645938,0.875059,0.003447,15.785866
...,...,...,...,...,...,...,...,...,...,...,...
95,0.837249,0.418412,0.934159,0.660961,0.875138,0.928579,0.121104,0.317903,0.113353,0.077885,18.696343
96,0.947571,0.502573,0.500037,0.500221,0.839541,0.519199,0.190876,0.632888,0.358469,0.522839,18.337093
97,0.192349,0.552711,0.293004,0.344814,0.582397,0.240145,0.365411,0.016138,0.271148,0.992581,16.385713
98,0.105189,0.395849,0.905736,0.503870,0.073469,0.385910,0.535522,0.997628,0.908152,0.813449,13.481851
