In [1]:
import numpy as np
import pandas as pd

from collections import OrderedDict
from datetime import datetime, timedelta
from typing import Any, Iterable

In [2]:
def random_date_str(N: int=1, datefmt: str='%Y-%m-%d') -> np.array:
    """ Return N random date strings.
    """
    first = datetime.strptime('2000-01-01', '%Y-%m-%d')
    last = datetime.strptime('2020-01-01', '%Y-%m-%d')
    
    # Datetime diff
    datediff = last - first
    
    # Diff in seconds
    secdiff = datediff.days*24*3600 + datediff.seconds
    
    # Generate random seconds in range
    randsecs = np.random.randint(low=0, high=secdiff+1, size=N)
    
    # Generate date strings
    resdates = []
    for idx, seconds in enumerate(randsecs):
        # New date
        rand_date = first + timedelta(seconds=int(seconds))
        resdates.append(rand_date.strftime(datefmt))
    
    return np.array(resdates)

In [3]:
def random_category(categories: Iterable[Any], N: int=1) -> np.array:
    """ Pick random elements from the supplied list
    """
    choices = np.random.randint(low=0, high=len(categories), size=N)
    chosen = []
    for choice in choices:
        chosen.append(categories[choice])
    
    return np.array(chosen)

In [4]:
random_category(['Low', 'Medium', 'High', ''], N=5)

array(['Medium', 'Low', '', 'High', 'High'], dtype='<U6')

In [5]:
# Construct test data
data_len = 50

levels = ['Low', 'Medium', 'High', 'Critical', '']
settings = ['stun', 'heat', 'disintegrate', 'field burst', 'luvetric pulse', 'expanding energy pulse', 'proximity blast', '']

data = OrderedDict()
data['date'] = random_date_str(data_len)
data['level'] = random_category(levels, data_len)
data['setting'] = random_category(settings, data_len)

In [6]:
df = pd.DataFrame(data)

In [10]:
df.to_csv('../../dataworks/tests/testdata/testdata.csv')

In [9]:
!ls ..

README.md [34mdata[m[m      [34mjupyter[m[m
