In [12]:
!pip install --upgrade google-meridian[colab,and-cuda] --quiet
!pip install google-cloud-storage --quiet


In [55]:
from google.cloud import bigquery
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az
import re
import pickle
import os
from google.cloud import storage
import matplotlib.pyplot as plt


import IPython
from IPython.display import display, Markdown
import time

from meridian import constants
from meridian.data import load
from meridian.data import test_utils
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution
from meridian.analysis import optimizer
from meridian.analysis import analyzer
from meridian.analysis import visualizer
from meridian.analysis import summarizer
from meridian.analysis import formatter



In [14]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
project_id="meridianmmm-462513"
dataset_id="mmm_dataset"
table_id="mmm_table"
organisation_id="784d6aa3cda59f59f2400332b2420a49"

In [15]:

def generate_total_spend_query_casted(project_id: str, dataset_id: str, table_id: str) -> str:
    """
    Generates a BigQuery query that:
    - Casts all *_SPEND columns to FLOAT64
    - Handles NULLs with IFNULL
    - Aggregates total spend per ORGANISATION_ID and CURRENCY_CODE
    - Orders by TOTAL_SPEND descending

    Returns:
        str: Executable SQL query string
    """
    from google.cloud import bigquery
    client = bigquery.Client(project=project_id)

    table_ref = f"{project_id}.{dataset_id}.{table_id}"
    table = client.get_table(table_ref)

    spend_exprs = []
    for f in table.schema:
        if f.name.endswith("_SPEND"):
            spend_exprs.append(f"IFNULL(SAFE_CAST(`{f.name}` AS FLOAT64), 0)")

    if not spend_exprs:
        raise ValueError("No *_SPEND columns found.")

    total_expr = " + ".join(spend_exprs)

    return f"""
    SELECT
      ORGANISATION_ID,
      CURRENCY_CODE,
      SUM({total_expr}) AS TOTAL_SPEND
    FROM `{table_ref}`
    GROUP BY ORGANISATION_ID, CURRENCY_CODE
    ORDER BY TOTAL_SPEND DESC
    """


In [16]:

query_sum = generate_total_spend_query_casted(
    project_id,
    dataset_id,
    table_id
)

In [17]:
def run_query_return_df(query: str, project_id: str) -> 'pd.DataFrame':
    """
    Executes a BigQuery SQL query and returns the result as a pandas DataFrame.

    Args:
        query (str): SQL query to execute.
        project_id (str): GCP project ID.

    Returns:
        pd.DataFrame: Result of the query.
    """
    from google.cloud import bigquery
    import pandas as pd

    client = bigquery.Client(project=project_id)
    query_job = client.query(query)
    return query_job.result().to_dataframe()


In [18]:
sum_spend = run_query_return_df(query_sum, project_id)

In [19]:
def convert_spend_to_eur(df, fx_rates, spend_col='TOTAL_SPEND', currency_col='CURRENCY_CODE'):
    """
    Returns a DataFrame with ORGANISATION_ID and TOTAL_SPEND_EUR, all spend converted to EUR and sorted descending.

    Args:
        df (pd.DataFrame): Input DataFrame.
        fx_rates (dict): Dict of currency code to EUR conversion rates.
        spend_col (str): Name of spend column.
        currency_col (str): Name of currency code column.

    Returns:
        pd.DataFrame: DataFrame with ORGANISATION_ID and TOTAL_SPEND_EUR columns, sorted descending.
    """
    df = df.copy()
    df['TOTAL_SPEND_EUR'] = df.apply(
        lambda row: row[spend_col] / fx_rates.get(row[currency_col], 1), axis=1
    )
    result = df[['ORGANISATION_ID', 'TOTAL_SPEND_EUR']].sort_values('TOTAL_SPEND_EUR', ascending=False)
    return result


In [20]:
fx_rates = {
    'GBP': 0.84241, 'USD': 1.139893, 'EUR': 1, 'AUD': 1.754467, 'CAD': 1.56117,
    'HKD': 8.945029, 'NZD': 1.892901, 'JPY': 165.07827, 'DKK': 7.459725,
    'CLP': 1065.242251, 'NOK': 11.524783, 'SEK': 10.984543,
    'CHF': 0.937039, 'ZAR': 20.279118
}

eur_spend_df = convert_spend_to_eur(sum_spend, fx_rates)
display(eur_spend_df)

Unnamed: 0,ORGANISATION_ID,TOTAL_SPEND_EUR
1,784d6aa3cda59f59f2400332b2420a49,25656973.8397
2,7569a6a9c156a0f9398fa6cfd51df5bb,15115414.9896
5,ba773ebd7ec0a08f1d042187d086ccb4,12545006.5619
7,429c8d00704a9ef6307b49f22d5dfade,10476425.2066
8,882ce7e286d66facc66518783e2192c7,8366939.1506
...,...,...
83,6dded930ff616b1ec8a3094208238208,20703.5104
116,f1504c791a2b4aa6fac0861259e0e212,10861.8725
117,882ce7e286d66facc66518783e2192c7,1446.4428
118,882ce7e286d66facc66518783e2192c7,415.8699


In [21]:
from google.cloud import bigquery
import pandas as pd

def get_business_data(organisation_id, project_id, dataset, table):
    """
    Query BigQuery to get all rows for a given organisation id.

    Args:
        organisation_id (str or int): The id to filter on.
        project_id (str): GCP project id.
        dataset (str): Dataset name.
        table (str): Table name.

    Returns:
        pd.DataFrame: All rows matching organisation id.
    """
    client = bigquery.Client(project=project_id)
    query = f"""
        SELECT *
        FROM `{project_id}.{dataset}.{table}`
        WHERE ORGANISATION_ID = @org_id
    """
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("org_id", "STRING", str(organisation_id))
        ]
    )
    query_job = client.query(query, job_config=job_config)
    return query_job.to_dataframe()


In [22]:
organisation_data = get_business_data(
    organisation_id,
    project_id,
    dataset_id,
    table_id
)

In [23]:
def convert_spend_to_eur(df: pd.DataFrame, fx_rates: dict) -> pd.DataFrame:
    # Identify all spend columns ending with '_SPEND'
    spend_cols = [col for col in df.columns if col.endswith('_SPEND')]
    df_new = df.copy()
    for col in spend_cols:
        # Make new column name
        new_col = col + '_EUR'
        # Use row-wise conversion based on CURRENCY_CODE
        df_new[new_col] = df_new.apply(
            lambda row:
                row[col] / fx_rates.get(row['CURRENCY_CODE'], 1)
                if pd.notnull(row[col]) and row['CURRENCY_CODE'] in fx_rates
                else row[col],
            axis=1
        )
        # Optionally drop the old spend column
        df_new.drop(columns=[col], inplace=True)
    return df_new


In [24]:
org_data_converted = convert_spend_to_eur(organisation_data, fx_rates)
del organisation_data

In [25]:
def drop_unused_columns(df):
    cols_to_drop = [
        "MMM_TIMESERIES_ID",
        "ORGANISATION_ID",
        "ORGANISATION_VERTICAL",
        "ORGANISATION_SUBVERTICAL",
        "ORGANISATION_MARKETING_SOURCES",
        "ORGANISATION_PRIMARY_TERRITORY_NAME",
        "CURRENCY_CODE",
        "FIRST_PURCHASES",
        "FIRST_PURCHASES_UNITS",
        "FIRST_PURCHASES_ORIGINAL_PRICE",
        "FIRST_PURCHASES_GROSS_DISCOUNT",
        "ALL_PURCHASES_UNITS",
        "ALL_PURCHASES_ORIGINAL_PRICE",
        "ALL_PURCHASES_GROSS_DISCOUNT",

    ]
    # Drop only if column exists to avoid KeyError
    return df.drop(columns=[col for col in cols_to_drop if col in df.columns])


In [26]:
org_data_cleaned = drop_unused_columns(org_data_converted)
del org_data_converted


In [27]:
def drop_low_coverage_channels(df, geo_col='TERRITORY_NAME', min_geos=2, spend_suffix='_SPEND_EUR'):
    """
    Drops entire channel groups when spend columns don't meet minimum geo coverage.

    Args:
        df: Your dataframe
        geo_col: Column name with geography info (default: 'TERITORY_NAME')
        min_geos: Minimum number of geos required (default: 2)
        spend_suffix: What spend columns end with (default: '_SPEND_EUR')

    Returns:
        Cleaned dataframe with low-coverage channels removed
    """
    # Find all spend columns
    spend_cols = [col for col in df.columns if col.endswith(spend_suffix)]

    # Find channels to drop and their related columns
    all_columns_to_drop = []
    dropped_channels = []

    for spend_col in spend_cols:
        # Count how many geos have spending > 0 for this channel
        geos_with_spend = df[df[spend_col] > 0][geo_col].nunique()

        if geos_with_spend < min_geos:
            # Get channel name (e.g., 'TIKTOK' from 'TIKTOK_SPEND_EUR')
            channel_name = spend_col.replace(spend_suffix, '')

            # Find ALL columns for this channel
            channel_columns = [col for col in df.columns if col.startswith(channel_name + '_')]

            all_columns_to_drop.extend(channel_columns)
            dropped_channels.append(f"{channel_name} (only in {geos_with_spend} geos)")

    # Drop the columns
    cleaned_df = df.drop(columns=all_columns_to_drop)

    # Print what was dropped
    if dropped_channels:
        print(f"Dropped {len(dropped_channels)} channels:")
        for channel in dropped_channels:
            print(f"  - {channel}")
        print(f"Removed {len(all_columns_to_drop)} columns total")
    else:
        print("No channels were dropped - all meet minimum geo requirements")

    return cleaned_df

In [28]:
org_data_dropped = drop_low_coverage_channels(org_data_cleaned)
del org_data_cleaned

Dropped 1 channels:
  - TIKTOK (only in 0 geos)
Removed 3 columns total


In [29]:
def impute_nans_with_zero(df):
    """
    Imputes NaN values with 0 in all numeric columns of the DataFrame.
    Returns a new DataFrame with imputed values.
    """
    numeric_cols = df.select_dtypes(include='number').columns
    return df.copy().fillna({col: 0 for col in numeric_cols})


In [30]:
org_data_imputed = impute_nans_with_zero(org_data_dropped)
del org_data_dropped


In [31]:
def filter_balanced_panel(df, geo_col='TERRITORY_NAME', date_col='DATE_DAY'):
    """
    Filters the DataFrame to keep only the dates that appear for all geos.
    Returns a balanced panel with complete time series for every geo.
    """
    # 1. Find all unique geos
    geos = df[geo_col].unique()

    # 2. Find dates available for each geo
    dates_per_geo = {g: set(df[df[geo_col] == g][date_col]) for g in geos}

    # 3. Intersection of all date sets = dates present for all geos
    common_dates = set.intersection(*dates_per_geo.values())

    # 4. Filter DataFrame to keep only common_dates
    filtered_df = df[df[date_col].isin(common_dates)].copy()

    # 5. (Optional) Sort for neatness
    filtered_df = filtered_df.sort_values([geo_col, date_col]).reset_index(drop=True)

    return filtered_df




In [32]:
org_data_balanced = filter_balanced_panel(org_data_imputed)
del org_data_imputed

In [33]:
def add_population_column(df, territory_col, population_dict):
    """
    Adds a POPULATION column to the dataframe based on the territory.

    Args:
        df (pd.DataFrame): Input DataFrame.
        territory_col (str): Column name containing territory info (e.g., 'TERRITORY').
        population_dict (dict): Dictionary with populations. E.g.,
            {'US': 332_000_000, 'UK': 68_000_000, 'ALL_TERRITORIES': 400_000_000}

    Returns:
        pd.DataFrame: DataFrame with added 'POPULATION' column.
    """
    def lookup_population(territory):
        return population_dict.get(territory, None)

    df = df.copy()
    df['POPULATION'] = df[territory_col].apply(lookup_population)
    return df


In [34]:
population_dict = {
    'US': 332000000,
    'UK': 68000000,
    'All Territories': 332000000 + 68000000,
}

org_data_balanced = add_population_column(org_data_balanced, 'TERRITORY_NAME', population_dict)

In [35]:
def aggregate_weekly_grouped(df):
    """
    Groups by DATE_DAY, TERRITORY_NAME, and POPULATION.
    Sums all other numeric columns.
    DATE_DAY is set to the week start (Monday).
    """
    df = df.copy()
    df['DATE_DAY'] = pd.to_datetime(df['DATE_DAY'])
    # Set to week start (Monday) for all DATE_DAY
    df['DATE_WEEK'] = df['DATE_DAY'] - pd.to_timedelta(df['DATE_DAY'].dt.weekday, unit='d')

    groupby_cols = ['DATE_WEEK', 'TERRITORY_NAME', 'POPULATION']
    # Only sum numeric columns not in groupby
    num_cols = [col for col in df.select_dtypes(include='number').columns if col not in groupby_cols]

    df_grouped = df.groupby(groupby_cols, as_index=False)[num_cols].sum()
    return df_grouped

In [36]:
df_weekly = aggregate_weekly_grouped(org_data_balanced)
del org_data_balanced

In [37]:
def apply_smart_scaling(data):
    """
    Scale variables to similar magnitudes without removing any
    """
    data_scaled = data.copy()

    # Scale large impression variables
    impression_vars = [col for col in data.columns if 'IMPRESSIONS' in col]
    for var in impression_vars:
        data_scaled[var] = data_scaled[var] / 1000000  # Scale to millions

    # Scale population
    data_scaled['POPULATION'] = data_scaled['POPULATION'] / 1000000  # Scale to millions

    # Scale large spend variables
    large_spend_vars = ['META_FACEBOOK_SPEND_EUR', 'META_INSTAGRAM_SPEND_EUR',
                        'GOOGLE_SHOPPING_SPEND_EUR', 'GOOGLE_PMAX_SPEND_EUR']
    for var in large_spend_vars:
        if var in data_scaled.columns:
            data_scaled[var] = data_scaled[var] / 1000  # Scale to thousands

    # Scale large click variables
    large_click_vars = ['META_FACEBOOK_CLICKS', 'META_INSTAGRAM_CLICKS',
                        'GOOGLE_SHOPPING_CLICKS', 'GOOGLE_PMAX_CLICKS']
    for var in large_click_vars:
        if var in data_scaled.columns and data_scaled[var].max() > 50000:
            data_scaled[var] = data_scaled[var] / 1000  # Scale to thousands

    return data_scaled

In [38]:
df_weekly = apply_smart_scaling(df_weekly)

In [39]:
def save_df_and_show_path(df, filename="df_weekly.csv"):
    """
    Saves a DataFrame to a CSV in Colab and prints its full path.
    """
    path = f"/content/{filename}"
    df.to_csv(path, index=False)
    print(f"✅ Saved to: {path}")
    return path


In [40]:
save_df_and_show_path(df_weekly)


✅ Saved to: /content/df_weekly.csv


'/content/df_weekly.csv'

In [41]:
def create_meridian_mappings(columns):
    """
    Smart function: channels with spend = paid, channels without spend = organic.

    Args:
        columns: List of your column names

    Returns:
        dict: Contains 'media', 'media_spend', 'organic_media' lists and mapping dicts
    """
    # Find all channels and their metrics
    channels = {}

    for col in columns:
        # Extract channel name using regex
        if col.endswith('_SPEND_EUR'):
            channel = col.replace('_SPEND_EUR', '')
            if channel not in channels:
                channels[channel] = {}
            channels[channel]['spend'] = col

        elif col.endswith('_IMPRESSIONS'):
            channel = col.replace('_IMPRESSIONS', '')
            if channel not in channels:
                channels[channel] = {}
            channels[channel]['impressions'] = col

        elif col.endswith('_CLICKS'):
            channel = col.replace('_CLICKS', '')
            if channel not in channels:
                channels[channel] = {}
            channels[channel]['clicks'] = col

    # Smart logic: has spend = paid, no spend = organic
    media = []
    media_spend = []
    organic_media = []
    media_to_channel = {}
    media_spend_to_channel = {}

    for channel, metrics in channels.items():
        # Choose impressions over clicks for media
        media_col = metrics.get('impressions') or metrics.get('clicks')

        if 'spend' in metrics:  # Paid channel
            if media_col:
                media.append(media_col)
                media_to_channel[media_col] = channel
            media_spend.append(metrics['spend'])
            media_spend_to_channel[metrics['spend']] = channel
        else:  # Organic channel
            if media_col:
                organic_media.append(media_col)

    return {
        'media': media,
        'media_spend': media_spend,
        'organic_media': organic_media,
        'media_to_channel': media_to_channel,
        'media_spend_to_channel': media_spend_to_channel
    }

In [42]:
config = create_meridian_mappings(df_weekly.columns)
del df_weekly

In [43]:
coord_to_columns = load.CoordToColumns(
    time='DATE_WEEK',
    geo='TERRITORY_NAME',
    population='POPULATION',
    kpi='ALL_PURCHASES',  # or whatever your conversion column is
    media=config['media'],
    media_spend=config['media_spend'],
    organic_media=config['organic_media']
)

correct_media_to_channel = config['media_to_channel']
correct_media_spend_to_channel = config['media_spend_to_channel']

In [44]:
loader = load.CsvDataLoader(
    csv_path="/content/df_weekly.csv",
    kpi_type='non_revenue',
    coord_to_columns=coord_to_columns,
    media_to_channel=correct_media_to_channel,
    media_spend_to_channel=correct_media_spend_to_channel,
)
data = loader.load()



In [45]:

def save_obj_and_show_path(obj, filename="my_object.pkl"):
    """
    Pickles a Python object to disk in Colab and prints its full path.
    """
    path = f"/content/{filename}"
    with open(path, "wb") as f:
        pickle.dump(obj, f)
    print(f"✅ Saved to: {path}")
    return path


In [None]:
roi_mu = 0.2     # Mu for ROI prior for each media channel.
roi_sigma = 0.9  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm = model.Meridian(input_data=data, model_spec=model_spec)

display(Markdown("⏳ **Sampling from prior...**"))
#mmm.sample_prior(500)
display(Markdown("✅ **Done sampling prior!**"))

display(Markdown("⏳ **Sampling from posterior... (this may take a while)**"))
#mmm.sample_posterior(n_chains=4, n_adapt=2000, n_burnin=500, n_keep=1000, seed=1)
display(Markdown("✅ **Done sampling posterior!**"))

#save_obj_and_show_path(mmm, "meridian_model.pkl")
#del mmm




⏳ **Sampling from prior...**

✅ **Done sampling prior!**

⏳ **Sampling from posterior... (this may take a while)**

✅ **Done sampling posterior!**

In [None]:
roi_mu = 0.1     # Mu for ROI prior for each media channel.
roi_sigma = 1  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm2 = model.Meridian(input_data=data, model_spec=model_spec)


display(Markdown("⏳ **Sampling from prior...**"))
#mmm2.sample_prior(500)
display(Markdown("✅ **Done sampling prior!**"))

display(Markdown("⏳ **Sampling from posterior... (this may take a while)**"))
#mmm2.sample_posterior(n_chains=5, n_adapt=3000, n_burnin=700, n_keep=2000, seed=1)
display(Markdown("✅ **Done sampling posterior!**"))

#save_obj_and_show_path(mmm2, "meridian_model2.pkl")
#del mmm2


⏳ **Sampling from prior...**

✅ **Done sampling prior!**

⏳ **Sampling from posterior... (this may take a while)**

✅ **Done sampling posterior!**

In [None]:
roi_mu = 0.3     # Mu for ROI prior for each media channel.
roi_sigma = 0.9  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm3 = model.Meridian(input_data=data, model_spec=model_spec)


display(Markdown("⏳ **Sampling from prior...**"))
mmm3.sample_prior(2000)
display(Markdown("✅ **Done sampling prior!**"))

display(Markdown("⏳ **Sampling from posterior... (this may take a while)**"))
mmm3.sample_posterior(n_chains=5, n_adapt=3000, n_burnin=700, n_keep=2000, seed=1)
display(Markdown("✅ **Done sampling posterior!**"))

save_obj_and_show_path(mmm3, "meridian_model3.pkl")
del mmm3




⏳ **Sampling from prior...**

✅ **Done sampling prior!**

⏳ **Sampling from posterior... (this may take a while)**

✅ **Done sampling posterior!**

✅ Saved to: /content/meridian_model3.pkl


In [None]:
roi_mu = 0.8     # Mu for ROI prior for each media channel.
roi_sigma = 0.8  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm4 = model.Meridian(input_data=data, model_spec=model_spec)


display(Markdown("⏳ **Sampling from prior...**"))
mmm4.sample_prior(2000)
display(Markdown("✅ **Done sampling prior!**"))

display(Markdown("⏳ **Sampling from posterior... (this may take a while)**"))
mmm4.sample_posterior(n_chains=5, n_adapt=8000, n_burnin=2000, n_keep=2000, seed=1)
display(Markdown("✅ **Done sampling posterior!**"))


save_obj_and_show_path(mmm4, "meridian_model4.pkl")
del mmm4

⏳ **Sampling from prior...**

✅ **Done sampling prior!**

⏳ **Sampling from posterior... (this may take a while)**

✅ **Done sampling posterior!**

✅ Saved to: /content/meridian_model4.pkl


In [46]:
roi_mu = 0.5     # Mu for ROI prior for each media channel.
roi_sigma = 0.5  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm5 = model.Meridian(input_data=data, model_spec=model_spec)


display(Markdown("⏳ **Sampling from prior...**"))
mmm5.sample_prior(1000)
display(Markdown("✅ **Done sampling prior!**"))

display(Markdown("⏳ **Sampling from posterior... (this may take a while)**"))
mmm5.sample_posterior(n_chains=4, n_adapt=4000, n_burnin=500, n_keep=1000, seed=1)
display(Markdown("✅ **Done sampling posterior!**"))

save_obj_and_show_path(mmm5, "meridian_model5.pkl")
del mmm5

⏳ **Sampling from prior...**

✅ **Done sampling prior!**

⏳ **Sampling from posterior... (this may take a while)**

✅ **Done sampling posterior!**

✅ Saved to: /content/meridian_model5.pkl


In [47]:

def upload_files_to_gcs(bucket_name):
    """
    Upload all files from /content/ to Google Cloud Storage bucket.

    Args:
        bucket_name (str): Name of the GCS bucket
    """
    client = storage.Client()
    bucket = client.bucket(bucket_name)

    files = os.listdir("/content/")

    for filename in files:
        file_path = f"/content/{filename}"

        if os.path.isfile(file_path):
            blob = bucket.blob(filename)
            blob.upload_from_filename(file_path)
            print(f"Uploaded: {filename}")

# Usage
upload_files_to_gcs("rezameridianmmm")

Uploaded: meridian_model5.pkl
Uploaded: df_weekly.csv


In [48]:
def download_from_gcs(bucket_name, source_blob_name, destination_file_name):
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)
    print(f"Downloaded: {source_blob_name} to {destination_file_name}")

In [49]:
bucket_name = 'rezameridianmmm'

model_files = [
    'meridian_model.pkl',
    'meridian_model2.pkl',
    'meridian_model3.pkl',
    'meridian_model4.pkl',
    'meridian_model5.pkl'
]

for model_file in model_files:
    download_from_gcs(bucket_name, model_file, f'/content/{model_file}')

Downloaded: meridian_model.pkl to /content/meridian_model.pkl
Downloaded: meridian_model2.pkl to /content/meridian_model2.pkl
Downloaded: meridian_model3.pkl to /content/meridian_model3.pkl
Downloaded: meridian_model4.pkl to /content/meridian_model4.pkl
Downloaded: meridian_model5.pkl to /content/meridian_model5.pkl


In [50]:
def load_model(file_path):
    """
    Unpickle and load a model from a file.

    Parameters:
    - file_path (str): Path to the pickle file.

    Returns:
    - Loaded model object.
    """
    with open(file_path, 'rb') as file:
        model = pickle.load(file)
    return model

In [51]:
model1 = load_model('/content/meridian_model.pkl')
model2 = load_model('/content/meridian_model2.pkl')
model3 = load_model('/content/meridian_model3.pkl')
model4 = load_model('/content/meridian_model4.pkl')
model5 = load_model('/content/meridian_model5.pkl')

In [59]:
model_diagnostics1 = visualizer.ModelDiagnostics(model1)
model_diagnostics1.plot_rhat_boxplot()

In [61]:
model_diagnostics2 = visualizer.ModelDiagnostics(model2)
model_diagnostics2.plot_rhat_boxplot()


In [62]:
model_diagnostics3 = visualizer.ModelDiagnostics(model3)
model_diagnostics3.plot_rhat_boxplot()


In [60]:
model_diagnostics4 = visualizer.ModelDiagnostics(model4)
model_diagnostics4.plot_rhat_boxplot()


In [63]:
model_diagnostics5 = visualizer.ModelDiagnostics(model5)
model_diagnostics5.plot_rhat_boxplot()


In [64]:
model_fit = visualizer.ModelFit(model5)
model_fit.plot_model_fit()

In [66]:
mmm_summarizer = summarizer.Summarizer(model5)


filepath = '/content'
start_date = '2021-01-25'
end_date = '2024-01-15'
mmm_summarizer.output_model_results_summary(
    'summary_output.html', filepath, start_date, end_date
)

  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  diff_b_a = subtract(b, a)


In [67]:
IPython.display.HTML(filename='/content/summary_output.html')

Dataset,R-squared,MAPE,wMAPE
All Data,0.47,14%,13%
