In [9]:
import csv
import random
from typing import List, Dict, Tuple

EVENT_TYPES = [
    "Virtual event", "Conferences and seminars", "Festivals and fairs",
    "Sports events", "Community and Charity events", "Entertainment and media events"
]

SPONSOR_TYPES = [
    "Media Sponsorship", "Food Stalls", "Philanthropy", "Merchandise",
    "In Kind", "Influencer", "Financial"
]

def generate_sponsor(event_type: str) -> Tuple[str, float]:
    sponsor_type = random.choice(SPONSOR_TYPES)
    if event_type == "Virtual event" and sponsor_type == "Food Stalls":
        sponsor_type = random.choice([st for st in SPONSOR_TYPES if st != "Food Stalls"])
    
    base_cost = random.uniform(1000, 100000)
    if sponsor_type == "Financial":
        base_cost *= 2
    elif sponsor_type in ["Food Stalls", "Merchandise"]:
        base_cost *= 0.5
    
    return sponsor_type, round(base_cost, 2)

def generate_event() -> Dict:
    event_type = random.choice(EVENT_TYPES)
    
    if event_type == "Virtual event":
        expected_footfall = random.randint(500, 10000)
    elif event_type in ["Conferences and seminars", "Community and Charity events"]:
        expected_footfall = random.randint(100, 5000)
    elif event_type in ["Festivals and fairs", "Sports events"]:
        expected_footfall = random.randint(1000, 100000)
    else:  
        expected_footfall = random.randint(500, 50000)
    
    num_sponsors = random.randint(1, 5)
    sponsors = [generate_sponsor(event_type) for _ in range(num_sponsors)]
    
    total_sponsorship = sum(cost for _, cost in sponsors)
    
    budget = total_sponsorship * random.uniform(0.6, 0.9)
    
    revenue_multiplier = random.uniform(1.1, 2.0)
    total_revenue = budget * revenue_multiplier
    
    roi = (total_revenue - budget) / budget * 100
    
    return {
        "Event Type": event_type,
        "Sponsors": sponsors,
        "Expected Footfall": expected_footfall,
        "Budget": round(budget, 2),
        "Total Revenue": round(total_revenue, 2),
        "ROI": round(roi, 2)
    }

def generate_dataset(num_rows: int) -> List[Dict]:
    return [generate_event() for _ in range(num_rows)]

# Generate the dataset
dataset = generate_dataset(10000)

# Write to CSV
with open('event_sponsorship_roi_dataset.csv', 'w', newline='') as csvfile:
    fieldnames = ['Event Type', 'Sponsor Types', 'Sponsor Costs', 'Expected Footfall', 'Budget', 'Total Revenue', 'ROI']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    for event in dataset:
        sponsor_types = "|".join(sponsor_type for sponsor_type, _ in event['Sponsors'])
        sponsor_costs = "|".join(str(cost) for _, cost in event['Sponsors'])
        writer.writerow({
            'Event Type': event['Event Type'],
            'Sponsor Types': sponsor_types,
            'Sponsor Costs': sponsor_costs,
            'Expected Footfall': event['Expected Footfall'],
            'Budget': event['Budget'],
            'Total Revenue': event['Total Revenue'],
            'ROI': event['ROI']
        })

print("Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'")

Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'
