# Marketing Analytics POC Notebook
This notebook loads the marketing data warehouse, computes KPIs, performs EDA, and prepares for modeling.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy as sa

engine = sa.create_engine("sqlite:////mnt/data/marketing_dw.db")

fact = pd.read_sql("SELECT * FROM fact_marketing_performance", engine)
platforms = pd.read_sql("SELECT * FROM dim_platform", engine)
campaigns = pd.read_sql("SELECT * FROM dim_campaign", engine)
creatives = pd.read_sql("SELECT * FROM dim_creative", engine)
countries = pd.read_sql("SELECT * FROM dim_country", engine)

df = (
    fact
    .merge(platforms, on="platform_key")
    .merge(campaigns, on="campaign_key")
    .merge(creatives, on="creative_key")
    .merge(countries, on="country_key")
)

df['ctr'] = df['clicks'] / df['impressions']
df['cpc'] = df['spend'] / df['clicks']
df['cpm'] = df['spend'] / (df['impressions'] / 1000)
df['cvr'] = df['conversions'] / df['clicks']
df['roas'] = df['revenue'] / df['spend']
df['cac'] = df['spend'] / df['new_customers'].replace(0, np.nan)

df.head()

## Top Creatives by ROAS

In [None]:
df.groupby("creative_name")["roas"].mean().sort_values(ascending=False).head(10)

## CTR vs ROAS Scatter Plot

In [None]:
sns.scatterplot(data=df, x="ctr", y="roas", hue="platform_name", size="spend")
plt.title("CTR vs ROAS â€” Creative Efficiency")
plt.show()

## Simple Predictive Model (Conversions)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

features = df[["impressions", "clicks", "spend"]]
target = df["conversions"]

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)
pred = model.predict(X_test)

print("R2:", r2_score(y_test, pred))
print("MAE:", mean_absolute_error(y_test, pred))

pd.Series(model.feature_importances_, index=features.columns)