In [1]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm

In [2]:
# Configuración general
np.random.seed(42)  # Para reproducibilidad
n_days = 180  # Número de días (180 días ≈ 6 meses)

# Rango de presupuesto diario
min_spend = 200  # Gasto mínimo diario (€)
max_spend = 1000  # Gasto máximo diario (€)

In [3]:
# Parámetros de las plataformas
platforms = {
    'Google Ads': {'cpm': 5, 'ctr': 0.02, 'alpha': 0.001, 'beta': 0.03},
    'Facebook Ads': {'cpm': 4, 'ctr': 0.015, 'alpha': 0.0008, 'beta': 0.025},
    'Programmatic': {'cpm': 6, 'ctr': 0.012, 'alpha': 0.0006, 'beta': 0.02}
}

In [4]:
data = []

In [5]:
date = pd.Timestamp('2023-01-01')

In [6]:
spend = np.random.uniform(min_spend, max_spend)

In [7]:
date, spend

(Timestamp('2023-01-01 00:00:00'), 499.63209507788997)

In [8]:
for platform, params in platforms.items():
    print(platform)
    print(params)

Google Ads
{'cpm': 5, 'ctr': 0.02, 'alpha': 0.001, 'beta': 0.03}
Facebook Ads
{'cpm': 4, 'ctr': 0.015, 'alpha': 0.0008, 'beta': 0.025}
Programmatic
{'cpm': 6, 'ctr': 0.012, 'alpha': 0.0006, 'beta': 0.02}


In [9]:
# Función para generar gasto con una distribución normal truncada
def generate_normal_spend(min_spend, max_spend, mean_spend, std_spend, n_days):
    # Definir la distribución truncada para que los valores estén entre min_spend y max_spend
    lower_bound = (min_spend - mean_spend) / std_spend
    upper_bound = (max_spend - mean_spend) / std_spend
    spend_distribution = truncnorm(lower_bound, upper_bound, loc=mean_spend, scale=std_spend)
    
    # Generar el gasto diario
    spend = spend_distribution.rvs(n_days)
    return spend

In [10]:
# Parámetros para la distribución normal truncada del gasto
mean_spend = 600  # Media del gasto
std_spend = 150   # Desviación estándar del gasto

In [13]:
# Generar los datos nuevamente pero con gasto normal
def generate_data_with_normal_spend(platform, params, n_days):
    data = []
    spend_values = generate_normal_spend(min_spend, max_spend, mean_spend, std_spend, n_days)
    
    for day in range(n_days):
        date = pd.Timestamp('2023-01-01') + pd.Timedelta(days=day)
        spend = spend_values[day]
        impressions = int((spend / params['cpm']) * 1000)
        clicks = int(impressions * params['ctr'])
        if impressions > 0:
            conversion_rate = params['alpha'] * np.log(impressions + 1) + params['beta']
            conversion_rate = max(0, min(conversion_rate, 1))
        else:
            conversion_rate = 0
        conversions = int(clicks * conversion_rate)
        data.append([date, platform, spend, impressions, clicks, conversions])
    
    return data

# Generar los datos con gasto normalmente distribuido
all_data_with_normal_spend = []
for platform, params in platforms.items():
    platform_data = generate_data_with_normal_spend(platform, params, n_days)
    all_data_with_normal_spend.extend(platform_data)

# Convertir a DataFrame
columns = ['Fecha', 'Plataforma', 'Gasto (€)', 'Impresiones', 'Clics', 'Conversiones']
df_with_normal_spend = pd.DataFrame(all_data_with_normal_spend, columns=columns)

# Guardar el nuevo dataset
df_with_normal_spend.to_csv('simulated_ad_data_with_normal_spend.csv', index=False)