In [None]:
import pandas as pd
import random
import string
from datetime import datetime, timedelta

import numpy as np

In [None]:
def generate_random_address():
    """Generate a random address-like string."""
    return "".join(random.choices(string.ascii_letters + string.digits, k=10))


def generate_random_datetime(start, end):
    """Generate a random datetime between 'start' and 'end'."""
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds()))
    )


def generate_votes(max_projects, max_votes):
    """Generate a random votes array."""
    num_votes = random.randint(1, max_votes)
    projects = random.sample(range(max_projects), num_votes)
    return [
        {"amount": str(random.randint(1000, 1000000)), "projectId": f"proj{proj_id}"}
        for proj_id in projects
    ]

In [None]:
# Set parameters
num_rows = 1000
max_projects_in_ballot = 500
max_votes = 20
start_date = datetime(2023, 9, 1)
end_date = datetime(2023, 12, 1)

# set seed
random.seed(42)

# Generate data
data = []
for _ in range(num_rows):
    has_published = random.choice([True, False])
    has_voted = True if has_published else random.choice([True, False])
    created_at = generate_random_datetime(start_date, end_date)
    updated_at = generate_random_datetime(created_at, end_date)
    published_at = (
        generate_random_datetime(updated_at, end_date) if has_published else None
    )

    votes = generate_votes(max_projects_in_ballot, max_votes) if has_published else []

    projects_in_ballot = (
        len(votes)
        # random.randint(1, max_projects_in_ballot) if has_published else None
    )

    row = {
        "Address": generate_random_address(),
        "Has voted": has_voted,
        "Has published": has_published,
        "Published at": published_at,
        "Created at": created_at,
        "Updated at": updated_at,
        "Projects in ballot": projects_in_ballot,
        "Votes": votes,
    }
    data.append(row)

In [None]:
# Create DataFrame
df = pd.DataFrame(data)
df.head()  # Display the first few rows of the DataFrame

In [None]:
df.to_csv("data/dummy_data_rpgf3.csv", index=False)  # Save the DataFrame to a CSV file