In [8]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

# Create a Faker instance for generating synthetic data
fake = Faker()

# Function to generate random date ranges for campaigns over the last 10 years
def generate_date_range():
    start_date = fake.date_between(start_date='-10y', end_date='today')
    end_date = start_date + timedelta(days=random.randint(10, 90))  # Campaign duration between 10 to 90 days
    return start_date, end_date

# Generate 100 synthetic campaigns
campaigns_data = []
for campaign_id in range(1, 101):
    campaign_name = fake.catch_phrase()
    description = fake.paragraph(nb_sentences=3)
    start_date, end_date = generate_date_range()
    budget = round(random.uniform(1000, 50000), 2)
    channel = random.choice(["Email", "Social Media", "Online Ads", "Direct Mail"])
    target_audience = random.choice(["Adults", "Teens", "Seniors", "Professionals"])
    total_cost = budget * random.uniform(0.8, 1.2)  # Adjust total cost with some variability

    campaigns_data.append({
        "campaign_id": campaign_id,
        "campaign_name": campaign_name,
        "description": description,
        "start_date": start_date,
        "end_date": end_date,
        "budget": budget,
        "target_audience": target_audience,
        "channel": channel,
        "total_cost": round(total_cost, 2)
    })

# Create a DataFrame from the synthetic data
campaigns_df = pd.DataFrame(campaigns_data)

# Save the DataFrame to a CSV file
csv_filename = "/Users/ekinderdiyok/Documents/Projects/mealkit-delivery/campaigns.csv"
campaigns_df.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created with 100 synthetic campaigns.")

CSV file '/Users/ekinderdiyok/Documents/Projects/mealkit-delivery/campaigns.csv' has been created with 100 synthetic campaigns.


In [11]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

# Create a Faker instance for generating synthetic data
fake = Faker()

# Load the campaigns data from the CSV file
campaigns_df = pd.read_csv("/Users/ekinderdiyok/Documents/Projects/mealkit-delivery/campaigns.csv")

# Ensure 'start_date' and 'end_date' are datetime objects
campaigns_df['start_date'] = pd.to_datetime(campaigns_df['start_date'])
campaigns_df['end_date'] = pd.to_datetime(campaigns_df['end_date'])

# Number of synthetic events to generate
num_events = 1000

# Define possible values for event attributes
event_types = ["click", "impression", "conversion", "signup", "page_view"]
channels = ["Email", "Social Media", "Online Ads", "Direct Mail"]

# Generate synthetic events data
events_data = []
for _ in range(num_events):
    event_id = fake.uuid4()  # Unique event identifier
    campaign_id = random.randint(1, 100)  # Assuming there are 100 campaigns

    # Retrieve the campaign's start and end dates
    campaign = campaigns_df[campaigns_df["campaign_id"] == campaign_id].iloc[0]
    start_date = campaign["start_date"]
    end_date = campaign["end_date"]

    # Skip campaigns where the end date is before the start date (just in case)
    if end_date <= start_date:
        continue

    # Generate an event date within the campaign's duration
    event_date = fake.date_time_between_dates(datetime_start=start_date, datetime_end=end_date)

    customer_id = random.randint(1, 500)  # Assuming there are 500 customers
    event_type = random.choice(event_types)
    channel = random.choice(channels)
    order_id = random.randint(1, 1000) if event_type == "conversion" else None  # Order ID only for conversions
    value = round(random.uniform(10, 500), 2) if event_type in ["conversion", "signup"] else None  # Value for conversions/signups

    events_data.append({
        "event_id": event_id,
        "campaign_id": campaign_id,
        "customer_id": customer_id,
        "event_type": event_type,
        "event_date": event_date,
        "channel": channel,
        # Removed 'metadata' field
    })

# Create a DataFrame from the synthetic events data
events_df = pd.DataFrame(events_data)

# Save the DataFrame to a CSV file
csv_filename = "/Users/ekinderdiyok/Documents/Projects/mealkit-delivery/events.csv"
events_df.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created with {len(events_data)} synthetic events.")

CSV file '/Users/ekinderdiyok/Documents/Projects/mealkit-delivery/events.csv' has been created with 1000 synthetic events.
