In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az

import IPython

from meridian import constants
from meridian.data import load
from meridian.data import test_utils
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution
from meridian.analysis import optimizer
from meridian.analysis import analyzer
from meridian.analysis import visualizer
from meridian.analysis import summarizer
from meridian.analysis import formatter

# check if GPU is available
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

Your runtime has 17.2 gigabytes of available RAM

Num GPUs Available:  0
Num CPUs Available:  1


<a name="load-data"></a>
## Step 1: Load the data

In [2]:
data_path = "../data/clean_data.csv"

In [3]:
import pandas as pd
# Afficher seulement les colonnes numériques
df = pd.read_csv(data_path)  # remplace par ton dataframe réels
numeric_df = df.select_dtypes(include='number')

print("Means:\n", numeric_df.mean())
print("Variances:\n", numeric_df.var())
print("Correlations:\n", numeric_df.corr())

Means:
 Unnamed: 0                           89.000000
dependantVar                        362.706215
Cinema                              920.903931
Cinema_exposure                     920.903931
Sponsoring_Activation              2372.881337
Sponsoring_Activation_exposure     2372.881337
Content                             334.081355
Content_exposure                    334.081355
Display                            4159.785143
Display_exposure                   4159.785143
OLA                                1096.209636
OLA_exposure                       1096.209636
OLV                                6481.474327
OLV_exposure                       6481.474327
OOH                               16893.084735
OOH_exposure                      16893.084735
Print                                67.796610
Print_exposure                       67.796610
Radio                             25480.499483
Radio_exposure                    25480.499483
SearchBing                          803.912315
Searc

Load the [simulated dataset in CSV format](https://github.com/google/meridian/blob/main/meridian/data/simulated_data/csv/geo_all_channels.csv) as follows.

1\. Map the column names to their corresponding variable types. For example, the column names 'GQV' and 'Competitor_Sales' are mapped to `controls`. The required variable types are `time`, `controls`, `population`, `kpi`, `revenue_per_kpi`, `media` and `spend`. If your data includes organic media or non-media treatments, you can add them using `organic_media` and `non_media_treatments` arguments. For the definition of each variable, see
[Collect and organize your data](https://developers.google.com/meridian/docs/user-guide/collect-data).

In [4]:
coord_to_columns = load.CoordToColumns(
    time='date',
    kpi='dependantVar',
    controls=[],  # obligatoire, même vide
    media=[
        'Cinema_exposure', 'Sponsoring_Activation_exposure', 'Content_exposure',
        'Display_exposure', 'OLA_exposure', 'OLV_exposure', 'OOH_exposure',
        'Print_exposure', 'Radio_exposure', 'SearchBing_exposure',
        'DemandGen_exposure', 'Discovery_exposure', 'Pmax_exposure',
        'GoogleDisplay_exposure', 'SearchGoogle_exposure',
        'Social_exposure', 'Télé_exposure',
    ],
    media_spend=[
        'Cinema', 'Sponsoring_Activation', 'Content', 'Display', 'OLA',
        'OLV', 'OOH', 'Print', 'Radio', 'SearchBing', 'DemandGen',
        'Discovery', 'Pmax', 'GoogleDisplay', 'SearchGoogle', 'Social', 'Télé',
    ],
)


2\. Map the media variables and the media spends to the designated channel names intended for display in the two-page HTML output. In the following example,  'Channel0_impression' and 'Channel0_spend' are connected to the same channel, 'Channel0'.

In [5]:
correct_media_to_channel = {
    'Cinema_exposure': 'Cinema',
    'Sponsoring_Activation_exposure': 'Sponsoring_Activation',
    'Content_exposure': 'Content',
    'Display_exposure': 'Display',
    'OLA_exposure': 'OLA',
    'OLV_exposure': 'OLV',
    'OOH_exposure': 'OOH',
    'Print_exposure': 'Print',
    'Radio_exposure': 'Radio',
    'SearchBing_exposure': 'SearchBing',
    'DemandGen_exposure': 'DemandGen',
    'Discovery_exposure': 'Discovery',
    'Pmax_exposure': 'Pmax',
    'GoogleDisplay_exposure': 'GoogleDisplay',
    'SearchGoogle_exposure': 'SearchGoogle',
    'Social_exposure': 'Social',
    'Télé_exposure': 'Télé',
}

correct_media_spend_to_channel = {
    'Cinema': 'Cinema',
    'Sponsoring_Activation': 'Sponsoring_Activation',
    'Content': 'Content',
    'Display': 'Display',
    'OLA': 'OLA',
    'OLV': 'OLV',
    'OOH': 'OOH',
    'Print': 'Print',
    'Radio': 'Radio',
    'SearchBing': 'SearchBing',
    'DemandGen': 'DemandGen',
    'Discovery': 'Discovery',
    'Pmax': 'Pmax',
    'GoogleDisplay': 'GoogleDisplay',
    'SearchGoogle': 'SearchGoogle',
    'Social': 'Social',
    'Télé': 'Télé',
}


3\. Load the CSV data using `CsvDataLoader`. Note that `csv_path` is the path to the data file location.

In [6]:
loader = load.CsvDataLoader(
    csv_path=data_path,
    kpi_type='non_revenue',
    coord_to_columns=coord_to_columns,
    media_to_channel=correct_media_to_channel,
    media_spend_to_channel=correct_media_spend_to_channel,
)
data = loader.load()

  self.df[geo_column_name] = self.df[geo_column_name].replace(
  if (constants.GEO) not in self.dataset.dims.keys():
  if constants.MEDIA_TIME not in self.dataset.dims.keys():


<a name="configure-model"></a>
## Step 2: Configure the model

Meridian uses Bayesian framework and Markov Chain Monte Carlo (MCMC) algorithms to sample from the posterior distribution.

1\. Inititalize the `Meridian` class by passing the loaded data and the customized model specification. One advantage of Meridian lies in its capacity to calibrate the model directly through ROI priors, as described in [Media Mix Model Calibration With Bayesian Priors](https://research.google/pubs/media-mix-model-calibration-with-bayesian-priors/). In this particular example, the ROI priors for all media channels are identical, with each being represented as Lognormal(0.2, 0.9).

In [7]:
roi_mu = -1.5  # ROI central plus bas (~0.22)
roi_sigma = 1.5  # Très tolérant, large éventail
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm = model.Meridian(input_data=data, model_spec=model_spec)

I0000 00:00:1748601746.988981   14378 service.cc:148] XLA service 0x11dc12a30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748601746.989194   14378 service.cc:156]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1748601747.016505   14378 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


2\. Use the `sample_prior()` and `sample_posterior()` methods to obtain samples from the prior and posterior distributions of model parameters. If you are using the T4 GPU runtime this step may take about 10 minutes for the provided data set.

In [8]:
%%time
mmm.sample_prior(500)
mmm.sample_posterior(n_chains=10, n_adapt=2000, n_burnin=500, n_keep=1000, seed=1)

2025-05-30 12:42:52.131498: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
W0000 00:00:1748601772.955490   14378 assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert


CPU times: user 2min 16s, sys: 6.96 s, total: 2min 23s
Wall time: 2min 11s


For more information about configuring the parameters and using a customized model specification, such as setting different ROI priors for each media channel, see [Configure the model](https://developers.google.com/meridian/docs/user-guide/configure-model).

<a name="model-diagnostics"></a>
## Step 3: Run model diagnostics

After the model is built, you must assess convergence, debug the model if needed, and then assess the model fit.

1\. Assess convergence. Run the following code to generate r-hat statistics. R-hat close to 1.0 indicate convergence. R-hat < 1.2 indicates approximate convergence and is a reasonable threshold for many problems.

In [9]:
model_diagnostics = visualizer.ModelDiagnostics(mmm)
model_diagnostics.plot_rhat_boxplot()

MCMCSamplingError: MCMC sampling failed with a maximum R-hat value of 1359308390400.0.