In [1]:
import numpy as np
import pandas as pd

from collections import OrderedDict
from datetime import datetime, timedelta
from typing import Any, Iterable

In [2]:
def random_date_str(N: int=1, datefmt: str='%Y-%m-%d') -> np.array:
    """ Return N random date strings.
    """
    first = datetime.strptime('2000-01-01', '%Y-%m-%d')
    last = datetime.strptime('2020-01-01', '%Y-%m-%d')
    
    # Datetime diff
    datediff = last - first
    
    # Diff in seconds
    secdiff = datediff.days*24*3600 + datediff.seconds
    
    # Generate random seconds in range
    randsecs = np.random.randint(low=0, high=secdiff+1, size=N)
    
    # Generate date strings
    resdates = []
    for idx, seconds in enumerate(randsecs):
        # New date
        rand_date = first + timedelta(seconds=int(seconds))
        resdates.append(rand_date.strftime(datefmt))
    
    return np.array(resdates)

In [3]:
def random_category(categories: Iterable[Any], N: int=1) -> np.array:
    """ Pick random elements from the supplied list
    """
    choices = np.random.randint(low=0, high=len(categories), size=N)
    chosen = []
    for choice in choices:
        chosen.append(categories[choice])
    
    return np.array(chosen)

In [4]:
random_category(['Low', 'Medium', 'High', ''], N=5)

array(['', 'Low', 'Low', 'Low', ''], dtype='<U3')

In [5]:
# Construct test data
data_len = 100

levels = ['Low', 'Medium', 'High', 'Critical', '']
settings = ['stun', 'heat', 'disintegrate', 'field burst', 'luvetric pulse', 'expanding energy pulse', 'proximity blast', '']

data = OrderedDict()
data['date'] = random_date_str(data_len)
data['level'] = random_category(levels, data_len)
data['setting'] = random_category(settings, data_len)

In [7]:
df = pd.DataFrame(data)
df

Unnamed: 0,date,level,setting
0,2002-11-03,,field burst
1,2008-08-26,Critical,stun
2,2003-08-13,,
3,2001-10-17,,field burst
4,2007-04-10,,expanding energy pulse
...,...,...,...
95,2011-07-01,,heat
96,2017-05-18,,heat
97,2003-07-13,High,
98,2013-01-13,Medium,proximity blast


In [7]:
#df.to_csv('../../dataworks/tests/testdata/testdata.csv')