Examples class below:
- Date (01/15/2019, 15/02/2018, Dec 24th 2016, April 9 2015, etc.)


In [11]:
# Imports

import random
import pandas as pd
import numpy as np


In [3]:
# Generate 20K Dates 

start = datetime(2020, 1, 1)
end = datetime(2025, 1, 1)

delta_seconds = int((end - start).total_seconds())

datetimes = [
    start + timedelta(seconds=random.randint(0, delta_seconds))
    for _ in range(20000)
]

dates = pd.DataFrame({"datetime": datetimes})
dates["date"] = dates["datetime"].dt.date

dates.head(2)


Unnamed: 0,datetime,date
0,2021-12-05 04:39:45,2021-12-05
1,2021-12-15 20:01:15,2021-12-15


In [8]:
# Generate for formats for each dates

# Ensure correct dtypes
dates["date"] = pd.to_datetime(dates["date"])

# Simple formats
dates["mmddyyyy"] = dates["date"].dt.strftime("%m/%d/%Y")
dates["ddmmyyyy"] = dates["date"].dt.strftime("%d/%m/%Y")

# Long format (April 9 2015)
# Linux / Mac: "%B %-d %Y"
# Windows: "%B %#d %Y"
dates["long"] = dates["date"].dt.strftime("%B %-d %Y")

# Ordinal suffix helper
def ordinal(n):
    if 11 <= n % 100 <= 13:
        return "th"
    return {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")

day = dates["date"].dt.day
suffix = day.map(ordinal)

dates["ordinal"] = (
    dates["date"].dt.strftime("%b ") +
    day.astype(str) +
    suffix +
    dates["date"].dt.strftime(" %Y")
)

dates.head()


Unnamed: 0,datetime,date,mmddyyyy,ddmmyyyy,long,ordinal
0,2021-12-05 04:39:45,2021-12-05,12/05/2021,05/12/2021,December 5 2021,Dec 5th 2021
1,2021-12-15 20:01:15,2021-12-15,12/15/2021,15/12/2021,December 15 2021,Dec 15th 2021
2,2020-09-06 10:48:34,2020-09-06,09/06/2020,06/09/2020,September 6 2020,Sep 6th 2020
3,2020-11-27 04:55:15,2020-11-27,11/27/2020,27/11/2020,November 27 2020,Nov 27th 2020
4,2023-10-28 00:03:57,2023-10-28,10/28/2023,28/10/2023,October 28 2023,Oct 28th 2023


In [12]:
# Randomly select dates in different formats

# List of format columns
format_columns = [
    "mmddyyyy",
    "ddmmyyyy",
    "long",
    "ordinal"
]

# Randomly select one format per row
dates["random_date_format"] = dates[format_columns].apply(
    lambda row: np.random.choice(row.values),
    axis=1
)

print(dates[["date", "random_date_format"]])



            date random_date_format
0     2021-12-05    December 5 2021
1     2021-12-15         12/15/2021
2     2020-09-06       Sep 6th 2020
3     2020-11-27      Nov 27th 2020
4     2023-10-28    October 28 2023
...          ...                ...
19995 2021-10-24         24/10/2021
19996 2021-12-01    December 1 2021
19997 2020-06-02         02/06/2020
19998 2021-02-14         14/02/2021
19999 2020-09-21         09/21/2020

[20000 rows x 2 columns]


In [14]:
dates.to_csv("Sub_datasets/dates.csv")