<a href="https://colab.research.google.com/github/brunobobadilla06/Portfolio-Proyectos/blob/main/Analisis_de_Marketing_En_coderhouse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# 1) Importación de librerías
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

np.random.seed(42)


In [None]:


marketing = pd.read_csv("/content/marketing_digital.csv")


marketing.head()


In [None]:

marketing["fecha"] = pd.to_datetime(marketing["fecha"])

marketing["ctr"] = marketing["clics"] / marketing["impresiones"]
marketing["cpc"] = marketing["costo"] / marketing["clics"]
marketing["costo_por_lead"] = marketing["costo"] / marketing["leads"]
marketing["tasa_conversion"] = marketing["conversiones"] / marketing["leads"]

marketing.head()


In [None]:


leads_canal = marketing.groupby("canal", as_index=False)["leads"].sum()

sns.barplot(data=leads_canal, x="canal", y="leads")
plt.title("Leads generados por canal")
plt.tight_layout()
plt.show()


In [None]:

cpl_canal = marketing.groupby("canal", as_index=False)["costo_por_lead"].mean()

sns.barplot(data=cpl_canal, x="canal", y="costo_por_lead")
plt.title("Costo por Lead promedio por canal")
plt.tight_layout()
plt.show()


In [None]:

marketing["mes"] = marketing["fecha"].dt.to_period("M").dt.start_time

leads_mensuales = (
    marketing.groupby("mes", as_index=False)
             .agg(leads_mensuales=("leads", "sum"))
)

plt.plot(leads_mensuales["mes"], leads_mensuales["leads_mensuales"])
plt.title("Evolución mensual de leads")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

leads_mensuales["mes_num"] = leads_mensuales["mes"].dt.month
leads_mensuales["tendencia"] = np.arange(len(leads_mensuales))

leads_mensuales.head()


In [None]:

X = leads_mensuales[["mes_num", "tendencia"]]
y = leads_mensuales["leads_mensuales"]

tscv = TimeSeriesSplit(n_splits=4)
errores = []

for train_idx, test_idx in tscv.split(X):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    modelo = RandomForestRegressor(n_estimators=300, random_state=42)
    modelo.fit(X_train, y_train)

    pred = modelo.predict(X_test)
    errores.append(mean_absolute_error(y_test, pred))

np.mean(errores)


In [None]:

futuro = pd.DataFrame({
    "mes_num": [1, 2, 3],
    "tendencia": [len(X), len(X)+1, len(X)+2]
})

futuro["leads_proyectados"] = modelo.predict(futuro)
futuro
