In [None]:
import pandas as pd
import random
import string
from datetime import datetime, timedelta

In [None]:
def generate_random_address():
    """Generate a random address-like string."""
    return "".join(random.choices(string.ascii_letters + string.digits, k=10))


def generate_random_datetime(start, end):
    """Generate a random datetime between 'start' and 'end'."""
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds()))
    )


def generate_votes(max_projects, max_votes):
    """Generate a random votes array."""
    num_votes = random.randint(1, max_votes)
    projects = random.sample(range(max_projects), num_votes)
    return [
        {"amount": str(random.randint(1000, 1000000)), "projectId": f"proj{proj_id}"}
        for proj_id in projects
    ]

In [None]:
# Set parameters
num_rows = 1000
max_projects_in_ballot = 500
max_votes = 20
start_date = datetime(2023, 9, 1)
end_date = datetime(2023, 12, 1)

# Generate data
data = []
for _ in range(num_rows):
    has_voted = random.choice([True, False])
    has_published = random.choice([True, False])
    created_at = generate_random_datetime(start_date, end_date)
    updated_at = generate_random_datetime(created_at, end_date)
    published_at = (
        generate_random_datetime(created_at, updated_at) if has_published else None
    )
    projects_in_ballot = (
        random.randint(1, max_projects_in_ballot) if has_published else None
    )
    votes = generate_votes(max_projects_in_ballot, max_votes) if has_published else []

    row = {
        "Address": generate_random_address(),
        "Has voted": has_voted,
        "Has published": has_published,
        "Published at": published_at,
        "Created at": created_at,
        "Updated at": updated_at,
        "Projects in ballot": projects_in_ballot,
        "Votes": votes,
    }
    data.append(row)

In [11]:
# Create DataFrame
df = pd.DataFrame(data)
df.head()  # Display the first few rows of the DataFrame

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
0,5R3d7IeoAB,True,False,NaT,2023-11-06 10:51:07,2023-11-13 09:28:20,,[]
1,K66AqbQbXq,False,False,NaT,2023-10-30 22:32:19,2023-11-14 00:08:19,,[]
2,l6UWM6Bf4X,True,True,2023-11-02 18:36:08,2023-10-24 21:11:07,2023-11-08 13:11:34,240.0,"[{'amount': '644777', 'projectId': 'proj274'},..."
3,vWgy70oDLH,False,False,NaT,2023-09-05 00:10:01,2023-10-21 02:55:52,,[]
4,JEkkCBV3nk,True,False,NaT,2023-09-19 13:30:58,2023-09-30 02:28:27,,[]


In [None]:
df.to_csv("data/dummy_data_rpgf3.csv", index=False)  # Save the DataFrame to a CSV file