In [1]:
import csv
import random
from datetime import datetime, timedelta

def generate_event_type():
    return random.choice(['Cultural Fest', 'Hackathon', 'Sports Fest', 'Music Event', 'Conference'])

def generate_target_audience(event_type):
    audiences = {
        'Cultural Fest': ['Students', 'Young Adults', 'Families'],
        'Hackathon': ['Developers', 'Tech Enthusiasts', 'Students'],
        'Sports Fest': ['Athletes', 'Sports Fans', 'Families'],
        'Music Event': ['Music Lovers', 'Young Adults', 'Teenagers'],
        'Conference': ['Professionals', 'Industry Experts', 'Entrepreneurs']
    }
    return random.choice(audiences[event_type])

def generate_sponsors(event_type, budget):
    sponsor_levels = ['Platinum', 'Gold', 'Silver', 'Bronze']
    num_sponsors = random.randint(3, 8)
    sponsors = []
    
    for _ in range(num_sponsors):
        level = random.choice(sponsor_levels)
        cost = int(budget * random.uniform(0.05, 0.2))  # 5% to 20% of budget
        benefits = f"{level} booth, Logo on materials, {random.randint(2, 10)} free tickets"
        sponsors.append(f"{level}:{cost}:{benefits}")
    
    return "; ".join(sponsors)

def generate_attendees(event_type):
    attendees_range = {
        'Cultural Fest': (5000, 50000),
        'Hackathon': (500, 5000),
        'Sports Fest': (10000, 100000),
        'Music Event': (5000, 100000),
        'Conference': (1000, 10000)
    }
    return random.randint(*attendees_range[event_type])

def generate_budget(attendees):
    return int(attendees * random.uniform(10, 50))  # $10 to $50 per attendee

def generate_revenue(budget):
    return int(budget * random.uniform(0.8, 1.5))  # 80% to 150% of budget

def calculate_roi(revenue, budget):
    return round((revenue - budget) / budget * 100, 2)

def generate_contests(event_type):
    contests_range = {
        'Cultural Fest': (10, 50),
        'Hackathon': (5, 20),
        'Sports Fest': (20, 100),
        'Music Event': (5, 30),
        'Conference': (10, 50)
    }
    return random.randint(*contests_range[event_type])

def generate_dataset(num_rows):
    data = []
    for _ in range(num_rows):
        event_type = generate_event_type()
        target_audience = generate_target_audience(event_type)
        attendees = generate_attendees(event_type)
        budget = generate_budget(attendees)
        sponsors = generate_sponsors(event_type, budget)
        revenue = generate_revenue(budget)
        roi = calculate_roi(revenue, budget)
        contests = generate_contests(event_type)
        
        row = {
            'Event Type': event_type,
            'Target Audience': target_audience,
            'Sponsors Details': sponsors,
            'Total Attendees': attendees,
            'Budget': budget,
            'Revenue': revenue,
            'ROI': roi,
            'Contests': contests,
            'Emails Sent': random.randint(5000, 100000),
            'Social Media Posts': random.randint(50, 500),
            'Social Media Mentions': random.randint(1000, 100000),
            'Hashtag Usage': random.randint(5000, 500000),
            'Interactions': random.randint(int(attendees * 0.5), int(attendees * 2))
        }
        data.append(row)
    return data

# Generate the dataset
dataset = generate_dataset(5000)

# Write to CSV
with open('event_sponsorship_roi_dataset.csv', 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=dataset[0].keys())
    writer.writeheader()
    for row in dataset:
        writer.writerow(row)

print("Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'")

Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'


: 

In [3]:
import csv
import random
from datetime import datetime, timedelta

def generate_event_type():
    return random.choice(['Cultural Fest', 'Hackathon', 'Sports Fest', 'Music Event', 'Conference'])

def generate_target_audience(event_type):
    audiences = {
        'Cultural Fest': ['Students', 'Young Adults', 'Families'],
        'Hackathon': ['Developers', 'Tech Enthusiasts', 'Students'],
        'Sports Fest': ['Athletes', 'Sports Fans', 'Families'],
        'Music Event': ['Music Lovers', 'Young Adults', 'Teenagers'],
        'Conference': ['Professionals', 'Industry Experts', 'Entrepreneurs']
    }
    return random.choice(audiences[event_type])

def generate_sponsors(event_type, budget):
    sponsor_levels = ['Platinum', 'Gold', 'Silver', 'Bronze']
    num_sponsors = random.randint(3, 8)
    sponsors = []
    
    for _ in range(num_sponsors):
        level = random.choice(sponsor_levels)
        cost = int(budget * random.uniform(0.05, 0.2))  # 5% to 20% of budget
        benefits = f"{level} booth, Logo on materials, {random.randint(2, 10)} free tickets"
        sponsors.append(f"{level}:{cost}:{benefits}")
    
    return "; ".join(sponsors)

def generate_attendees(event_type):
    attendees_range = {
        'Cultural Fest': (5000, 50000),
        'Hackathon': (500, 5000),
        'Sports Fest': (10000, 100000),
        'Music Event': (5000, 100000),
        'Conference': (1000, 10000)
    }
    return random.randint(*attendees_range[event_type])

def generate_budget(attendees):
    return int(attendees * random.uniform(10, 50))  # $10 to $50 per attendee

def generate_revenue(budget):
    return int(budget * random.uniform(0.8, 1.5))  # 80% to 150% of budget

def calculate_roi(revenue, budget):
    return round((revenue - budget) / budget * 100, 2)

def generate_contests(event_type):
    contests_range = {
        'Cultural Fest': (10, 50),
        'Hackathon': (1, 5),
        'Sports Fest': (20, 100),
        'Music Event': (5, 30),
        'Conference': (0, 0)
    }
    return random.randint(*contests_range[event_type])

def is_virtual(event_type):
    if event_type in ['Hackathon', 'Conference']:
        return random.random() < 0.2  # 20% probability
    return False

def is_ticket_required():
    return random.random() < 0.9  # 90% probability

def generate_dataset(num_rows):
    data = []
    for _ in range(num_rows):
        event_type = generate_event_type()
        target_audience = generate_target_audience(event_type)
        attendees = generate_attendees(event_type)
        budget = generate_budget(attendees)
        sponsors = generate_sponsors(event_type, budget)
        revenue = generate_revenue(budget)
        roi = calculate_roi(revenue, budget)
        contests = generate_contests(event_type)
        virtual = is_virtual(event_type)
        ticket_required = is_ticket_required()
        
        row = {
            'Event Type': event_type,
            'Target Audience': target_audience,
            'Sponsors Details': sponsors,
            'Total Attendees': attendees,
            'Budget': budget,
            'Revenue': revenue,
            'ROI': roi,
            'Contests': contests,
            'Emails Sent': random.randint(5000, 100000),
            'Social Media Posts': random.randint(50, 500),
            'Social Media Mentions': random.randint(1000, 100000),
            'Hashtag Usage': random.randint(5000, 500000),
            'isVirtual': virtual,
            'isTicketRequired': ticket_required
        }
        data.append(row)
    return data

# Generate the dataset
dataset = generate_dataset(5000)

# Write to CSV
with open('event_sponsorship_roi_dataset2.csv', 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=dataset[0].keys())
    writer.writeheader()
    for row in dataset:
        writer.writerow(row)

print("Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'")

Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'


In [4]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Define the possible values for the categorical variables
event_types = ["Virtual event", "Conferences and seminars", "Festivals and fairs", "Sports events", "Community and Charity events", "Entertainment and media events"]
sponsor_types = ["Media Sponsorship", "Food Stalls", "Philanthropy", "Merchandise", "In Kind", "Influencer", "Financial"]

# Create an empty list to store rows of data
data = []

# Generate 5000 unique events
num_events = 5000
for event_id in range(1, num_events + 1):
    # Randomly select event type
    event_type = np.random.choice(event_types)

    # Determine footfall based on event type
    if event_type == "Virtual event":
        footfall = np.random.randint(100, 1000)
        budget = np.random.randint(5000, 10000)
    elif event_type == "Conferences and seminars":
        footfall = np.random.randint(200, 3000)
        budget = np.random.randint(10000, 50000)
    elif event_type in ["Festivals and fairs", "Sports events", "Community and Charity events"]:
        footfall = np.random.randint(500, 20000)
        budget = np.random.randint(20000, 100000)
    else:  # Entertainment and media events
        footfall = np.random.randint(1000, 50000)
        budget = np.random.randint(50000, 300000)

    # Decide the number of sponsors for this event
    num_sponsors = np.random.randint(1, 6)  # Each event could have between 1 to 5 sponsors

    total_revenue = 0

    for sponsor_id in range(1, num_sponsors + 1):
        # Randomly select sponsor type
        sponsor_type = np.random.choice(sponsor_types)

        # Determine sponsor's cost based on sponsor type
        if sponsor_type == "Media Sponsorship":
            sponsor_cost = np.random.randint(5000, 50000)
        elif sponsor_type == "Food Stalls":
            sponsor_cost = np.random.randint(1000, 20000)
        elif sponsor_type == "Philanthropy":
            sponsor_cost = np.random.randint(5000, 50000)
        elif sponsor_type == "Merchandise":
            sponsor_cost = np.random.randint(1000, 10000)
        elif sponsor_type == "In Kind":
            sponsor_cost = np.random.randint(1000, 10000)
        elif sponsor_type == "Influencer":
            sponsor_cost = np.random.randint(5000, 30000)
        else:  # Financial
            sponsor_cost = np.random.randint(5000, 50000)

        # Increment total revenue from sponsorships
        sponsor_revenue = sponsor_cost * np.random.uniform(1.5, 3.0)
        total_revenue += sponsor_revenue

        # Append individual sponsor data
        data.append([event_id, event_type, sponsor_type, sponsor_cost, footfall, budget, sponsor_revenue, None, None])

    # Calculate ROI for each event
    roi = (total_revenue - budget) / budget

    # Update all rows of the event to include total revenue and ROI
    for row in data:
        if row[0] == event_id:
            row[6] = total_revenue  # Total revenue for the event
            row[7] = roi  # ROI

# Create a DataFrame
columns = ["Event ID", "Event Type", "Sponsor Type", "Sponsor's Cost", "Expected Footfall", "Budget of the Event", "Total Revenue Generated", "Return on Investment", "Ticket Required"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("event_sponsorship_data3.csv", index=False)

print("CSV file generated successfully!")

CSV file generated successfully!


In [9]:
import csv
import random
from typing import List, Dict, Tuple

# Define constants and helper functions
EVENT_TYPES = [
    "Virtual event", "Conferences and seminars", "Festivals and fairs",
    "Sports events", "Community and Charity events", "Entertainment and media events"
]

SPONSOR_TYPES = [
    "Media Sponsorship", "Food Stalls", "Philanthropy", "Merchandise",
    "In Kind", "Influencer", "Financial"
]

def generate_sponsor(event_type: str) -> Tuple[str, float]:
    sponsor_type = random.choice(SPONSOR_TYPES)
    if event_type == "Virtual event" and sponsor_type == "Food Stalls":
        sponsor_type = random.choice([st for st in SPONSOR_TYPES if st != "Food Stalls"])
    
    base_cost = random.uniform(1000, 100000)
    if sponsor_type == "Financial":
        base_cost *= 2
    elif sponsor_type in ["Food Stalls", "Merchandise"]:
        base_cost *= 0.5
    
    return sponsor_type, round(base_cost, 2)

def generate_event() -> Dict:
    event_type = random.choice(EVENT_TYPES)
    
    if event_type == "Virtual event":
        expected_footfall = random.randint(500, 10000)
    elif event_type in ["Conferences and seminars", "Community and Charity events"]:
        expected_footfall = random.randint(100, 5000)
    elif event_type in ["Festivals and fairs", "Sports events"]:
        expected_footfall = random.randint(1000, 100000)
    else:  # Entertainment and media events
        expected_footfall = random.randint(500, 50000)
    
    num_sponsors = random.randint(1, 5)
    sponsors = [generate_sponsor(event_type) for _ in range(num_sponsors)]
    
    total_sponsorship = sum(cost for _, cost in sponsors)
    
    # Adjust budget to be lower than total revenue
    budget = total_sponsorship * random.uniform(0.6, 0.9)
    
    # Ensure revenue is higher than budget to guarantee positive ROI
    revenue_multiplier = random.uniform(1.1, 2.0)
    total_revenue = budget * revenue_multiplier
    
    roi = (total_revenue - budget) / budget * 100
    
    return {
        "Event Type": event_type,
        "Sponsors": sponsors,
        "Expected Footfall": expected_footfall,
        "Budget": round(budget, 2),
        "Total Revenue": round(total_revenue, 2),
        "ROI": round(roi, 2)
    }

def generate_dataset(num_rows: int) -> List[Dict]:
    return [generate_event() for _ in range(num_rows)]

# Generate the dataset
dataset = generate_dataset(10000)

# Write to CSV
with open('event_sponsorship_roi_dataset.csv', 'w', newline='') as csvfile:
    fieldnames = ['Event Type', 'Sponsor Types', 'Sponsor Costs', 'Expected Footfall', 'Budget', 'Total Revenue', 'ROI']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    for event in dataset:
        sponsor_types = "|".join(sponsor_type for sponsor_type, _ in event['Sponsors'])
        sponsor_costs = "|".join(str(cost) for _, cost in event['Sponsors'])
        writer.writerow({
            'Event Type': event['Event Type'],
            'Sponsor Types': sponsor_types,
            'Sponsor Costs': sponsor_costs,
            'Expected Footfall': event['Expected Footfall'],
            'Budget': event['Budget'],
            'Total Revenue': event['Total Revenue'],
            'ROI': event['ROI']
        })

print("Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'")

Dataset generated and saved as 'event_sponsorship_roi_dataset.csv'
