In [2]:
import pandas as pd
import random
import uuid
from datetime import datetime, timedelta

# Function to generate random datetime
def random_date(start, end):
    return start + (end - start) * random.random()

# Parameters
num_sessions = 30000
num_customers = 4000
max_events_per_session = 5

sources = ['Google', 'Facebook', 'Twitter', 'Bing', 'Instagram', 'Direct']
mediums = {
    'Google': ['CPC', 'Organic'],
    'Facebook': ['SocialMedia'],
    'Twitter': ['SocialMedia'],
    'Bing': ['CPC', 'Organic'],
    'Instagram': ['SocialMedia'],
    'Direct': ['Direct']
}
campaigns = ['Campaign-1', 'Campaign-2', 'Campaign-3', 'Campaign-4']
contents = ['Ad-1', 'Ad-2', 'Ad-3']
terms = ['Sunglasses', 'Bags', 'Belts', 'Watches']

start_date = datetime(2023, 1, 1)
end_date = datetime(2024, 1, 1)

# Create a list to hold the data
data = []

for _ in range(num_sessions):
    customer_id = random.randint(1, num_customers)
    session_id = uuid.uuid4()
    session_start = random_date(start_date, end_date)
    num_events = random.randint(1, max_events_per_session)
    has_purchased = False

    for event_index in range(num_events):
        timestamp = session_start + timedelta(minutes=event_index * random.randint(1, 10))
        # Ensure session length does not exceed one day
        if (timestamp - session_start).days >= 1:
            break

        source = random.choice(sources)
        medium = random.choice(mediums[source])

        if source == 'Direct':
            campaign = ''
            content = ''
            term = ''
        else:
            campaign = random.choice(campaigns)
            content = random.choice(contents)
            term = random.choice(terms)

        tracking_params = (
                f"source={source}&medium={medium}"
                + (f"&campaign={campaign}" if campaign else "")
                + (f"&content={content}" if content else "")
                + (f"&term={term}" if term else "")
        )

        url = f"http://dummy-url.com?{tracking_params}"

        # Randomly decide if this event is a purchase, but only allow one purchase per session
        if not has_purchased and random.random() < 0.2:  # 20% chance this event is a purchase
            revenue = round(random.uniform(10.0, 500.0), 2)
            has_purchased = True
        else:
            revenue = 0

        data.append([customer_id, session_id, timestamp, url, revenue])

# Create DataFrame
df = pd.DataFrame(data, columns=['customer_id', 'session_id', 'timestamp', 'url', 'revenue'])

# Save to CSV
df.to_csv('marketing-dummy-data.csv', index=False)