# Libraries and funcitons

In [None]:
!pip install arm-mango

Collecting arm-mango
  Downloading arm_mango-1.5.1-py3-none-any.whl.metadata (24 kB)
Downloading arm_mango-1.5.1-py3-none-any.whl (29 kB)
Installing collected packages: arm-mango
Successfully installed arm-mango-1.5.1


In [None]:
import random
random.seed(123) ## semilla

import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import glob
import os

import plotly.express as px
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

##Data normalization & PCA
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import FunctionTransformer

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.graph_objects as go
from plotly.subplots import make_subplots


from prophet import Prophet

import itertools

import time
import json
import datetime


from mango import scheduler, Tuner
from scipy.stats import uniform


from google.colab import auth
from google.cloud import bigquery

In [None]:
def cargar_datos(query):
    """Función para cargar datos desde BigQuery."""
    client = bigquery.Client()
    datos = client.query(query).to_dataframe()

    #if 'Date' in datos.columns:
    #    datos['Date'] = pd.to_datetime(datos['Date'])
    #elif 'fecha' in datos.columns:
    #    datos['fecha'] = pd.to_datetime(datos['fecha'])
    return datos

In [None]:
########################## Anomaly detection ################################


def detect_anomalies(forecast):
  '''
  Function that classify the anamolies and give them an importance level according with their distance to
  the confidence boundss
  :param forecast: Dataframe with fit model
  :return: Forecast dataframe with anomaly type and importance
  '''

  forecasted = forecast[['ds','trend', 'yhat', 'yhat_lower', 'yhat_upper', 'fact']].copy()
  #forecast['fact'] = df['y']

  forecasted['anomaly'] = 0
  forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
  forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = -1

  #anomaly importances
  forecasted['importance'] = 0
  forecasted.loc[forecasted['anomaly'] ==1, 'importance'] = \
      (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
  forecasted.loc[forecasted['anomaly'] ==-1, 'importance'] = \
      (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']

  return forecasted

def outliers_detection(df):
  '''
  Function that fits the data with a prophet model and identifies outliers in a dataframe
  :param df: Dataframe with a datetime column and the variable with outliers
  :return: Dataframe with outliers, their importance and the fit model (yhat, upper and lower values)
  '''
  df.columns = ['ds','y']

  #### prophet

  m = Prophet()
  m = m.fit(df)
  forecast = m.predict(df)
  forecast['fact'] = df['y'].reset_index(drop = True)

  ####
  pred = detect_anomalies(forecast)

  ####
  outliers = pred[pred.anomaly!=0]

  return outliers


########################## Convert to datetime ################################

def toDate(dataSet, atributeName, formato):
  '''
  Convert the date indicator to a datetime format
  :param dataSet: dataframe including a datetime indicator
  :param atributeName: Name of the column with the datetime indicator
  :param formato: '%Y%m%d %H:%M:%S'
  :return: Datetime column
  '''
  dataSet[atributeName] = dataSet[atributeName].apply(lambda x : str(x))
  return pd.to_datetime(dataSet[atributeName], format=formato)

########################## Error metrics ################################

def mape(y_true, y_pred):
  '''
  Mean Absolute Percentage Error
  :param y_true:  List with real values
  :param y_pred: List with predict values
  :return: MAPE
  '''
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def rmse(y_true, y_pred):
  '''
  Root Mean Squared Error
  :param y_true:  List with real values
  :param y_pred: List with predict values
  :return: RMSE
  '''
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.sqrt(np.mean(np.abs((y_true - y_pred)**2)))

def mae(y_true, y_pred):
  '''
  Mean Absolute Error
  :param y_true:  List with real values
  :param y_pred: List with predict values
  :return: MAE
  '''
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs(y_true - y_pred))

def correlation(y_true,y_pred):
  '''
  Correlation between y_pred with the y_true shifted -1 timestep
  :param y_true:  List with real values
  :param y_pred: List with predict values
  :return: correlation lag -1
  '''
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  y_true = y_true[:-1]
  y_pred = y_pred[1:]
  return np.corrcoef(y_true,y_pred)[0,1]



def cross_validation_prophet(data,prophet_model,scal,init_split,folds,params,freq,lista_exogenas,variable,verbose=0): # no tener por hora

  start = time.time()

  init_train_size = int(init_split*int(len(data)))
  corr = (len(data) - init_train_size)%folds
  init_train_size = init_train_size + corr
  fold_size = int((len(data) - init_train_size)/folds)

  df_train = []
  df_test =[]
  for i in range(folds):
    df_train.append(data[i*fold_size:init_train_size + i*fold_size])
    df_test.append(data[init_train_size + i*fold_size:init_train_size + (i+1)*fold_size])

  all_errors_train = []
  all_errors_test = []
  for i in range(folds):

    if verbose==1: print(f"train {i+1}/{folds}. Train size = {len(df_train[i])}")
    model,predict = prophet_model(df_train[i],df_test[i],params,freq,lista_exogenas)

    lista_errores_test,lista_errores_train = evaluation_dataframe(predict,df_train[i],df_test[i],scal,variable,verbose)
    all_errors_test.append(lista_errores_test)
    all_errors_train.append(lista_errores_train)

  if verbose == 1:
    print(f"rolling window test size = {fold_size} --> {round(fold_size/len(data)*100,2)}%")
    print(f"init train size =  {init_train_size} --> {round(init_train_size/len(data)*100,2)}%")
    print(f'Time: {time.time() - start}')

  return all_errors_test, all_errors_train

def evaluation_dataframe(predict,df_train,df_test,scal,variable,verbose=0): #función que evalua como nos fue con los datos entrnamiento y de testeo.
  mape_test = mape(scal.inverse_transform(df_test[variable]),scal.inverse_transform(predict[predict['ds']>df_train['ds'].iloc[-1]]['yhat']))#yhat la predicion del modelos
  rmse_test = rmse(scal.inverse_transform(df_test[variable]),scal.inverse_transform(predict[predict['ds']>df_train['ds'].iloc[-1]]['yhat']))# ds el tiempo, la fecha
  mae_test = mae(scal.inverse_transform(df_test[variable]),scal.inverse_transform(predict[predict['ds']>df_train['ds'].iloc[-1]]['yhat']))

  mape_train = mape(scal.inverse_transform(df_train['y']),scal.inverse_transform(predict[predict['ds']<=df_train['ds'].iloc[-1]]['yhat']))# y la variable objetivo.
  rmse_train = rmse(scal.inverse_transform(df_train['y']),scal.inverse_transform(predict[predict['ds']<=df_train['ds'].iloc[-1]]['yhat']))
  mae_train = mae(scal.inverse_transform(df_train['y']),scal.inverse_transform(predict[predict['ds']<=df_train['ds'].iloc[-1]]['yhat']))

  if verbose == 1:
    print(f"MAPE_test = {mape_test.round(2)}%")
    print(f"MSE_test = {rmse_test.round(2)}")
    print(f"MAE_test = {mae_test.round(2)}")
    print(f"Tamaño Test = {len(df_test)}")
    print("-----------------------------")
    print(f"MAPE_train = {mape_train.round(2)}%")
    print(f"MSE_train = {rmse_train.round(2)}")
    print(f"MAE_train = {mae_train.round(2)}")
    print(f"Tamaño train = {len(df_train)}")
    print("---------------------------------")

  return [mape_test,rmse_test,mae_test],[mape_train,rmse_train,mae_train]

def prophet_model(df_train,df_test,params,freq,lista_exogenas): # funcion de entramiento del modelo.
  df_train.columns = ['ds','y']  + lista_exogenas
  model = Prophet(**params)
  for item in lista_exogenas:
    model.add_regressor(item)
  model.add_country_holidays(country_name='MX')
  model.fit(df_train)

  future = model.make_future_dataframe(periods=len(df_test),freq=freq,include_history=True)

  if len(lista_exogenas) > 0 :
    data_future = df_train.copy()
    data_future.columns = ['ds','y'] + lista_exogenas
    future = future.merge(data_future,on='ds')

  predict = model.predict(future)

  return model,predict

def create_param_combinations(**param_dict):
    param_iter = itertools.product(*param_dict.values())
    params =[]
    for param in param_iter:
        params.append(param)
    params_df = pd.DataFrame(params, columns=list(param_dict.keys()))
    return params_df
def get_key(val,my_dict):
    for key, value in my_dict.items():
         if val == value:
             return key

    return "key doesn't exist"


# reutilizar esta funcion  para el tratamiento de los datos.
def data_preparation(bool_outliers,inital_date,val_size,scaler,lista_exogenas,variable,variable_fecha,campana_especifica,verbose=0):

  query_sabana = "SELECT * FROM `ga4-advance-analytics-alk-ktr.ZONE_STAGGING.forecast_tabla_final`"
  df = cargar_datos(query_sabana)

  # Filtrar la campaña específica

  df = df[df["Campaign"] == campana_especifica][[variable_fecha, variable]]
  df[variable_fecha] = pd.to_datetime(df[variable_fecha])


  ## relleno de valores


  # Establecer la columna de 'Fecha' como índice
  df.set_index(variable_fecha, inplace=True)

  # Crear un rango de fechas completo desde la fecha mínima hasta la máxima en el DataFrame
  rango_fechas = pd.date_range(start=df.index.min(), end=df.index.max(), freq='D')

  # Reindexar el DataFrame para incluir todas las fechas del rango
  df_completo = df.reindex(rango_fechas)

  # Rellenar valores faltantes con la media de las impresiones
  #media_impresiones = df[variable].mean()
  #df_completo[variable].fillna(media_impresiones, inplace=True)

  # Alternativamente, puedes usar interpolación para rellenar los valores faltantes
  df_completo[variable].interpolate(method='linear', inplace=True)


  df = df_completo.reset_index()
  df.columns = [variable_fecha,variable]


  ## Validation size
  size_val = len(df) - int(val_size*len(df))

  # outliers
  if bool_outliers:
    outliers = outliers_detection(df[[variable_fecha,variable]])
    for i in [-1,1]:
      for item in outliers[outliers['anomaly'] == i]['ds']:
        if i == -1:
          df.loc[df[variable_fecha] == item,variable] = outliers[outliers['ds'] == item]['yhat_lower']
        else:
          df.loc[df[variable_fecha] == item,variable] = outliers[outliers['ds'] == item]['yhat_upper']

  #Add list of extra regressors: time indicators,lags, exogenous variables

  df_filt =  df[[variable_fecha,variable]+lista_exogenas]

  #Imputation of missing values by interpolation and then 0
  for i in lista_exogenas:
    df_filt[i] = df_filt[i].interpolate()

  for i in lista_exogenas:
    df_filt[i] = df_filt[i].fillna(0)

  #Estandarize regressors
  lista_scal = []
  for item in lista_exogenas:
    scal_item = StandardScaler()
    df_filt[item] = scal_item.fit_transform(np.array(df_filt[item]).reshape(-1, 1))
    lista_scal.append(scal_item)

  #Estandarize
  if scaler == 'standard':
    scal = StandardScaler()
  elif scaler == 'exp':
    scal = FunctionTransformer(np.log,inverse_func=np.exp)
  elif scaler == 'minMax':
    scal = MinMaxScaler(feature_range=(-1, 1))
  else:
    scal = FunctionTransformer(None,inverse_func=None)


  df_filt[variable] = scal.fit_transform(np.array(df_filt[variable]).reshape(-1, 1))


  #Set aside a portion of data for validation: Recommended 5%

  data_eva = df_filt[:size_val]


  #Filter data if necessary and check missing values
  data_eva = data_eva[data_eva[variable_fecha]>=inital_date].copy()
  data_eva = data_eva.set_index(variable_fecha)

  df_filt = df_filt[df_filt[variable_fecha]>=inital_date].copy()
  df_filt = df_filt.set_index(variable_fecha)

  if verbose == 1:
    print(f"Full data: {len(df_filt)}")
    print(f"Data size whitout validation: {len(data_eva)}")
    print(f"Validation size: {len(df_filt) - len(data_eva)}")


  return data_eva,scal,df_filt # data entrenamiento y testeo, funcion de estandarizacion de la variable objetivo, Data completa.


# Data preparation

In [None]:
variable = "impressions"
variable_fecha = "Date"
campana_especifica = "AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB"
val_size = 0.05
bool_outliers = True  # True or False
lista_exogenas = []
scaler = "None" #"exp" #None, exp
verbose = 1
inital_date = '2021-01-01'
granularity = 'daily'

In [None]:
freq_dic = {'daily':'D','hourly':'H','monthly':'MS'}
freq = freq_dic[granularity]

In [None]:
data_eva,scal,df_complete  = data_preparation(bool_outliers,inital_date,val_size,scaler,lista_exogenas,variable,variable_fecha,campana_especifica,verbose)
data_eva.reset_index(inplace=True)
df_complete.reset_index(inplace=True)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/24dro1i1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/54ai2cqw.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=54613', 'data', 'file=/tmp/tmph5a2awn9/24dro1i1.json', 'init=/tmp/tmph5a2awn9/54ai2cqw.json', 'output', 'file=/tmp/tmph5a2awn9/prophet_modelzxrlqy9l/prophet_model-20250521143123.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:31:23 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:31:23 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Full data: 888
Data size whitout validation: 844
Validation size: 44


In [None]:
fig = go.Figure()
plots = lista_exogenas + [variable]

for i in plots:
  fig.add_trace(go.Scatter(x=df_complete[variable_fecha], y= df_complete[i],
                      #line=dict(color='gray', width=2,dash='solid'),
                      name=i))

fig.update_layout(
    legend=dict(x=.1, y=.92,bordercolor="Black",borderwidth=1),
    legend_orientation="v",
    autosize=False,
    width=900,
    height=500,
    #template='ggplot2',
    #plot_bgcolor='#FFFFFF',
    title=f'Histórico Mensual',
    xaxis_title="Fecha",
    xaxis_tickformat = '%d %B <br>%Y',
    yaxis_title="all variables",
    font=dict(
        family='helvetica light',#"Courier New, monospace",
        size=18,
        color='black'))
fig.show()

# Single training Prophet


In [None]:
#daily

params = {
    'changepoint_prior_scale': 0.03, # 0.05
    'seasonality_mode': 'multiplicative',
    'changepoint_range': 0.4, # 0.5
    'holidays_prior_scale': 10,
    'yearly_seasonality': 3, #5
    'weekly_seasonality':20,
    'n_changepoints':25
          }

split = 0.9

In [None]:
##split
train = data_eva[:int(split*len(data_eva))] ##
test = data_eva[len(train):]

In [None]:
model,predict = prophet_model(train,test,params,freq,lista_exogenas)
lista_errores_test,lista_errores_train = evaluation_dataframe(predict,train,test,scal,variable,1)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/muvhm4bc.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/2ij23xlf.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=39406', 'data', 'file=/tmp/tmph5a2awn9/muvhm4bc.json', 'init=/tmp/tmph5a2awn9/2ij23xlf.json', 'output', 'file=/tmp/tmph5a2awn9/prophet_modelrlyuso0z/prophet_model-20250521143149.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:31:49 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:31:49 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


MAPE_test = 30.91%
MSE_test = 61914.45
MAE_test = 49304.92
Tamaño Test = 85
-----------------------------
MAPE_train = 47.65%
MSE_train = 52745.53
MAE_train = 42451.79
Tamaño train = 759
---------------------------------


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['ds'], y= scal.inverse_transform(train['y']),
                    line=dict(color='black', width=2,dash='solid'),
                    name='Training'))
fig.add_trace(go.Scatter(x=test[variable_fecha], y=scal.inverse_transform(test[variable]),
                    line=dict(color='blue', width=2),
                    name='Test'))
fig.add_trace(go.Scatter(x=predict['ds'], y=scal.inverse_transform(predict['yhat']),
                    line=dict(color='green', width=2),
                    name='Forecast'))

fig.update_layout(
legend=dict(x=.6, y=1.1,bordercolor="Black",borderwidth=1),
legend_orientation="h",
autosize=False,
width=900,
height=500,
#template='ggplot2',
#plot_bgcolor='#FFFFFF',
title=f'Prophet model:',
xaxis_title="Fecha",
xaxis_tickformat = '%d %B <br>%Y',
yaxis_title=f"{variable}",
font=dict(
family='helvetica light',#"Courier New, monospace",
size=18,
color='black'))#"#7f7f7f"))
fig.show()

# hyperparameters

In [None]:
param_space = dict(growth = ['linear', 'logistic', 'flat'],
                   n_changepoints  = range(0, 55, 5),
                   changepoint_range  = uniform(0.5, 0.5),
                   yearly_seasonality = [True, False],
                   weekly_seasonality = [True, False],
                   daily_seasonality = [True, False],
                   seasonality_mode = ['additive', 'multiplicative'],
                   seasonality_prior_scale=uniform(5.0, 15.0),
                   changepoint_prior_scale=uniform(0.0, 0.1),
                   interval_width = uniform(0.2, 0.8),
                   uncertainty_samples = [500, 1000, 1500, 2000]
                   )

In [None]:
test.columns = ["ds","y"]

In [None]:
def objective_function(args_list):
    global train, test, freq,scal

    params_evaluated = []
    results = []

    for params in args_list:
        try:
            model = Prophet(**params)
            model.add_country_holidays(country_name='MX')
            model.fit(train)
            future = model.make_future_dataframe(periods=len(test), freq=freq)
            forecast = model.predict(future)
            predictions_tuned = forecast.tail(len(test))
            error = mape(scal.inverse_transform(test['y']), scal.inverse_transform(predictions_tuned['yhat']))

            params_evaluated.append(params)
            results.append(error)
        except:
            #print(f"Exception raised for {params}")
            #pass
            params_evaluated.append(params)
            results.append(35.0)# Giving high loss for exceptions regions of spaces

        #print(params_evaluated, mse)
    return params_evaluated, results

In [None]:
#conf_Dict = dict()
#conf_Dict['initial_random'] = 10
#conf_Dict['num_iteration'] = 50
#
#tuner = Tuner(param_space, objective_function, conf_Dict)
#results = tuner.minimize()
#print('best parameters:', results['best_params'])
#print('best loss:', results['best_objective'])

# Salida de entrenamiento


In [None]:
predict = predict[["ds","yhat"]]
predict['yhat'] = scal.inverse_transform(predict['yhat'])
predict.columns = ["Date","Predicted Impressions"]

In [None]:
train.columns = ["Date","Real Impressions"]
train["Set"] = "Train"
#
test.columns = ["Date","Real Impressions"]
test["Set"] = "Test"

entrenamiento = pd.concat([train,test])
entrenamiento["Month"] = entrenamiento["Date"].dt.month
entrenamiento["Campaign"] = campana_especifica
entrenamiento["Granularidad"] = granularity
entrenamiento["Real Impressions"] = scal.inverse_transform(entrenamiento['Real Impressions'])



In [None]:
salida_1 = pd.merge(entrenamiento,predict,on="Date",how="left")
salida_1 = salida_1[["Date","Month","Campaign","Real Impressions","Predicted Impressions","Set"]]

In [None]:
salida_1["Real Impressions"] = salida_1["Real Impressions"].round(2)
salida_1["Predicted Impressions"] = salida_1["Predicted Impressions"].round(2)
#

In [None]:
def guardar_en_bigquery(df, tabla_destino):
    """Función para guardar un DataFrame en una tabla de BigQuery."""
    df.to_gbq(destination_table=tabla_destino, if_exists='replace')
    print(f'DataFrame guardado en BigQuery en la tabla: {tabla_destino}')

In [None]:
guardar_en_bigquery(salida_1, 'ga4-advance-analytics-alk-ktr.ZONE_STAGGING.SF_entrenamiento_prophet')

100%|██████████| 1/1 [00:00<00:00, 8224.13it/s]

DataFrame guardado en BigQuery en la tabla: ga4-advance-analytics-alk-ktr.ZONE_STAGGING.SF_entrenamiento_prophet





Unnamed: 0,Date,Month,Campaign,Real Impressions,Predicted Impressions,Set
0,2022-07-28,7,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,101562.00,139552.22,Train
1,2022-07-29,7,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,252747.00,151040.02,Train
2,2022-07-30,7,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,264057.27,156664.84,Train
3,2022-07-31,7,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,238241.00,169032.76,Train
4,2022-08-01,8,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,99186.76,141279.72,Train
...,...,...,...,...,...,...
839,2024-11-13,11,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,126654.00,70541.86,Test
840,2024-11-14,11,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,154006.00,65525.69,Test
841,2024-11-15,11,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,97990.00,73436.43,Test
842,2024-11-16,11,AK_COL_MAX_PEF_CPC_AON_TLP_Apple_Feb20_EXP_FEB,178114.00,77513.54,Test


# Salida de forecast

In [None]:
# best params
#best parameters: {'changepoint_prior_scale': 0.04846681613315475, 'changepoint_range': 0.6759636212275814, 'daily_seasonality': True, 'growth': 'flat', 'interval_width': 0.5662169920787155, 'n_changepoints': 0, 'seasonality_mode': 'additive', 'seasonality_prior_scale': 11.831205779427954, 'uncertainty_samples': 1500, 'weekly_seasonality': False, 'yearly_seasonality': False}
#best loss: 25.67372014704618
#params = {'changepoint_prior_scale': 0.04846681613315475, 'changepoint_range': 0.6759636212275814, 'daily_seasonality': True, 'growth': 'flat', 'interval_width': 0.5662169920787155, 'n_changepoints': 0, 'seasonality_mode': 'additive', 'seasonality_prior_scale': 11.831205779427954, 'uncertainty_samples': 1500, 'weekly_seasonality': False, 'yearly_seasonality': False}

In [None]:
def reFit(df, params,verbose=0): ## MAPE_mean


  df.columns = ['ds','y']
  model = Prophet(**params)
  for item in lista_exogenas:
    model.add_regressor(item)
  model.add_country_holidays(country_name='MX')
  model.fit(df)

  return model

In [None]:
model_final = reFit(df_complete, params)

# Create a DataFrame for the next 30 days
future = model_final.make_future_dataframe(periods=30)

# Make the forecast
forecast = model_final.predict(future)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/3ejyur52.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmph5a2awn9/i70jujnk.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=66744', 'data', 'file=/tmp/tmph5a2awn9/3ejyur52.json', 'init=/tmp/tmph5a2awn9/i70jujnk.json', 'output', 'file=/tmp/tmph5a2awn9/prophet_modeluqs_ojef/prophet_model-20250521143928.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:39:28 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:39:28 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
forecast.columns = ['Date','Predicción','Lower Bound','Upper Bound']
forecast["Campaña"] = campana_especifica
forecast["Granularidad"] = granularity
forecast["Month"] = forecast["Date"].dt.month
forecast = forecast[['Date','Month','Predicción','Lower Bound','Upper Bound']]
forecast['Predicción'] = scal.inverse_transform(forecast['Predicción']).round(2)
forecast['Lower Bound'] = scal.inverse_transform(forecast['Lower Bound']).round(2)
forecast['Upper Bound'] = scal.inverse_transform(forecast['Upper Bound']).round(2)

# Convert filter date to datetime
filter_date = pd.to_datetime('2025-01-01')

# Apply the filter
forecast = forecast[forecast['Date'] >= filter_date]


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=forecast['Date'], y= forecast['Predicción'],
                    line=dict(color='black', width=2,dash='solid'),
                    name='pred'))
fig.add_trace(go.Scatter(x=forecast['Date'], y= forecast['Lower Bound'],
                    line=dict(color='red', width=2,dash='solid'),
                    name='pred'))
fig.add_trace(go.Scatter(x=forecast['Date'], y= forecast['Upper Bound'],
                    line=dict(color='red', width=2,dash='solid'),
                    name='pred'))
fig.update_layout(
legend=dict(x=.6, y=1.1,bordercolor="Black",borderwidth=1),
legend_orientation="h",
autosize=False,
width=900,
height=500,
#template='ggplot2',
#plot_bgcolor='#FFFFFF',
title=f'Prophet model:',
xaxis_title="Fecha",
xaxis_tickformat = '%d %B <br>%Y',
yaxis_title=f"{variable}",
font=dict(
family='helvetica light',#"Courier New, monospace",
size=18,
color='black'))#"#7f7f7f"))
fig.show()

In [None]:
forecast

Unnamed: 0,Date,Month,Predicción,Lower Bound,Upper Bound
888,2025-01-01,1,173853.19,107670.73,237528.83
889,2025-01-02,1,171927.2,108564.54,240477.74
890,2025-01-03,1,182770.77,116342.28,253388.37
891,2025-01-04,1,191514.52,119834.08,255735.36
892,2025-01-05,1,208074.38,141332.56,279005.26
893,2025-01-06,1,181115.97,114360.66,243266.7
894,2025-01-07,1,188376.2,122443.29,250213.42
895,2025-01-08,1,189572.35,122565.46,255626.56
896,2025-01-09,1,183209.87,119287.22,247433.67
897,2025-01-10,1,193739.45,127257.87,260253.73


In [None]:
guardar_en_bigquery(forecast, 'ga4-advance-analytics-alk-ktr.ZONE_STAGGING.SF_prediccion_prophet')

100%|██████████| 1/1 [00:00<00:00, 8439.24it/s]

DataFrame guardado en BigQuery en la tabla: ga4-advance-analytics-alk-ktr.ZONE_STAGGING.SF_prediccion_prophet



