In [29]:
import pandas as pd
import numpy as np
import random
import copy
from datetime import datetime

In [30]:
def make_categorical(choices, n):
    return random.choices(choices, k=n)

def make_datetime(min, max, n):
    return pd.to_datetime(np.random.randint(min.timestamp(), max.timestamp(), n), unit='s')

In [31]:
def make_dataframe(columns, n_rows, sort_by=None):
    columns = copy.deepcopy(columns)
    data = {}
    for k,v in columns.items():
        col_type = v.pop('type')
        if col_type == 'categorical':
            data[k] = make_categorical(**v, n=n_rows)
        elif col_type == 'datetime':
            data[k] = make_datetime(**v, n=n_rows)
        else:
            raise ValueError(f'column type {col_type} is not recognized')
    df = pd.DataFrame(data)
    if sort_by is not None:
        df.sort_values(sort_by, inplace=True)

    return df

In [32]:
columns = {
    'user': {'type': 'categorical', 'choices': ['user_a', 'user_b', 'user_c']},
    'event_name': {'type': 'categorical', 'choices': ['view_home_page', 'view_product_page', 'view_cart']},
    'product': {'type': 'categorical', 'choices': [None, 'shirt', 'dress', 'shoes']},
    'event_time': {
        'type': 'datetime',
        'min': datetime(2022,1,1),
        'max': datetime(2022,2,3),
    }
}

df = make_dataframe(columns=columns, n_rows=30, sort_by=['user', 'event_time'])

In [33]:
# add constraint 

df.loc[df['event_name'] != 'view_product_page', ['product']] = None

In [34]:
df

Unnamed: 0,user,event_name,product,event_time
3,user_a,view_home_page,,2022-01-02 22:40:14
12,user_a,view_home_page,,2022-01-07 20:18:20
18,user_a,view_home_page,,2022-01-15 04:14:30
0,user_a,view_product_page,shirt,2022-01-15 16:48:27
25,user_a,view_product_page,,2022-01-20 09:15:01
22,user_a,view_cart,,2022-01-26 09:10:32
15,user_a,view_home_page,,2022-01-29 06:08:29
16,user_a,view_cart,,2022-01-31 01:45:55
13,user_a,view_product_page,shirt,2022-01-31 14:38:12
2,user_a,view_product_page,dress,2022-02-02 23:28:31


In [35]:
df.to_csv('../tests/test_data.csv')