In [4]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define the size of the dataset
n_samples = 1000

# Specify options for categorical data
checklistname_options = ['Checklist A', 'Checklist B', 'Checklist C']
archived_options = [True, False]
bay_options = ['Bay 1', 'Bay 2', 'Bay 3']
position_options = ['Position 1', 'Position 2', 'Position 3']
module_options = ['Module 1', 'Module 2', 'Module 3']
workstation_options = ['Workstation 1', 'Workstation 2', 'Workstation 3']
project_options = ['Project X', 'Project Y', 'Project Z']
project_status_options = ['Ongoing', 'Completed', 'Pending']
project_type_options = ['Type A', 'Type B', 'Type C']
tenant_options = ['Tenant 1', 'Tenant 2', 'Tenant 3']

# Generate continuous data ranges
durationms_range = (1000, 50000)  # milliseconds
durationmin_range = (1, 60)  # minutes

# Generate datetime ranges
start_date = datetime(2020, 1, 1)
end_date = datetime(2024, 1, 1)

def random_date(start, end):
    """Generate a random datetime between `start` and `end`."""
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds())),
    )

# Create the dummy dataset
data = {
    'uid': [f'uid_{i}' for i in range(n_samples)],
    'checklistname': np.random.choice(checklistname_options, n_samples),
    'createdat': [random_date(start_date, end_date) for _ in range(n_samples)],
    'updatedat': [random_date(start_date, end_date) for _ in range(n_samples)],
    'archived': np.random.choice(archived_options, n_samples),
    'Bay': np.random.choice(bay_options, n_samples),
    'Position': np.random.choice(position_options, n_samples),
    'Module': np.random.choice(module_options, n_samples),
    'Workstation': np.random.choice(workstation_options, n_samples),
    'Project': np.random.choice(project_options, n_samples),
    'ProjectStatus': np.random.choice(project_status_options, n_samples),
    'ProjectType': np.random.choice(project_type_options, n_samples),
    'ProjectStartDate': [random_date(start_date, end_date) for _ in range(n_samples)],
    'ProjectEndDate': [random_date(start_date, end_date) for _ in range(n_samples)],
    'tenant': np.random.choice(tenant_options, n_samples),
    'durationms': np.random.uniform(durationms_range[0], durationms_range[1], n_samples).astype(int),
    'durationmin': np.random.uniform(durationmin_range[0], durationmin_range[1], n_samples).astype(int),
}

# Create a DataFrame
df = pd.DataFrame(data)

# Ensure 'updatedat' is always after 'createdat'
df['updatedat'] = df.apply(lambda row: max(row['createdat'], row['updatedat']), axis=1)

# Ensure 'ProjectEndDate' is always after 'ProjectStartDate'
df['ProjectEndDate'] = df.apply(lambda row: max(row['ProjectStartDate'], row['ProjectEndDate']), axis=1)

# Display the first few rows of the dummy dataset
print(df.head())

     uid checklistname           createdat           updatedat  archived  \
0  uid_0   Checklist B 2023-06-01 02:02:07 2023-06-01 02:02:07     False   
1  uid_1   Checklist B 2023-06-23 06:42:56 2023-06-23 06:42:56      True   
2  uid_2   Checklist C 2021-08-22 15:54:42 2021-08-22 15:54:42     False   
3  uid_3   Checklist B 2020-09-06 06:35:07 2021-12-29 06:29:05     False   
4  uid_4   Checklist A 2020-09-26 04:04:14 2020-09-26 04:04:14      True   

     Bay    Position    Module    Workstation    Project ProjectStatus  \
0  Bay 1  Position 3  Module 3  Workstation 2  Project Z     Completed   
1  Bay 3  Position 1  Module 1  Workstation 2  Project Y       Ongoing   
2  Bay 2  Position 3  Module 1  Workstation 2  Project Y       Pending   
3  Bay 2  Position 3  Module 3  Workstation 1  Project Y       Pending   
4  Bay 1  Position 2  Module 3  Workstation 2  Project X       Ongoing   

  ProjectType    ProjectStartDate      ProjectEndDate    tenant  durationms  \
0      Type C 2022-

In [5]:
df.to_csv('180 days.csv')