In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# -------------------------------
# PARAMETERS
# -------------------------------
num_customers = 200
max_touches_per_customer = 5
start_date = datetime(2025, 8, 1)
end_date = datetime(2025, 9, 30)

channels = ["Facebook Ads", "Google Ads", "Email", "Instagram", "Blog Post", "Organic Search", "YouTube"]
conversion_rate = 0.45  # 45% of customers eventually convert

# -------------------------------
# GENERATE DATA
# -------------------------------
records = []

for cid in range(1, num_customers + 1):
    customer_id = f"C{cid:03d}"
    num_touches = random.randint(2, max_touches_per_customer)
    touch_dates = sorted([start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
                          for _ in range(num_touches)])

    channels_used = random.choices(channels, k=num_touches)

    # Assume each touch has some ad spend (for ROI calc later)
    spends = np.random.uniform(10, 80, size=num_touches)

    # Mark the final conversion (some customers won't convert)
    converted = np.random.rand() < conversion_rate
    for i in range(num_touches):
        record = {
            "CustomerID": customer_id,
            "Touchpoint": i + 1,
            "Channel": channels_used[i],
            "Timestamp": touch_dates[i].strftime("%Y-%m-%d %H:%M:%S"),
            "Spend": round(spends[i], 2),
            "ConversionFlag": 1 if (converted and i == num_touches - 1) else 0
        }
        records.append(record)

# Convert to DataFrame
df = pd.DataFrame(records)

# Assign random revenue only to converted customers
df["Revenue"] = np.where(df["ConversionFlag"] == 1, np.random.uniform(200, 1000, len(df)), 0)

# -------------------------------
# FINAL TOUCHES
# -------------------------------
df = df.sort_values(["CustomerID", "Timestamp"]).reset_index(drop=True)

# Save to CSV
df.to_csv("marketing_touchpoints.csv", index=False)

print("✅ Dataset generated and saved as 'marketing_touchpoints.csv'")
print(df.head(10))

✅ Dataset generated and saved as 'marketing_touchpoints.csv'
  CustomerID  Touchpoint         Channel            Timestamp  Spend  \
0       C001           1       Blog Post  2025-08-09 00:00:00  20.89   
1       C001           2       Blog Post  2025-08-10 00:00:00  38.15   
2       C001           3           Email  2025-08-12 00:00:00  16.83   
3       C002           1       Blog Post  2025-08-06 00:00:00  15.56   
4       C002           2  Organic Search  2025-08-09 00:00:00  22.18   
5       C002           3      Google Ads  2025-09-19 00:00:00  68.71   
6       C003           1           Email  2025-08-15 00:00:00  18.94   
7       C003           2    Facebook Ads  2025-08-16 00:00:00  27.07   
8       C003           3  Organic Search  2025-09-06 00:00:00  16.93   
9       C003           4         YouTube  2025-09-19 00:00:00  70.40   

   ConversionFlag     Revenue  
0               0    0.000000  
1               0    0.000000  
2               0    0.000000  
3               0 

In [2]:
from google.colab import files
files.download('marketing_touchpoints.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>