# Imports

In [50]:
from faker import Faker
from faker.providers import BaseProvider
from datetime import datetime
from json import dumps
import pandas as pd
import random
import collections
import glob
import os

# Generating fake `event_id`: random UUIDs 

In [51]:
fake = Faker()
Faker.seed(random.randrange(0, 99999999999999999999, 1))
fake_event_id = fake.uuid4()
print(fake_event_id)

c7963524-1397-4692-9314-b0f66e07552d


# Generating fake `timestamp`: random timestamps with values until 3 years ago

In [52]:
fake_timestamp = datetime.strftime(fake.date_time_between(start_date='-3y', end_date='now'),"%Y-%m-%dT%H:%M:%S")
print(fake_timestamp)

2020-09-30T03:42:50


# Generating fake `domain`: random values based on valid grade names list

In [53]:
class ProjectDomainProvider(BaseProvider):
    def project_domain_name(self):
        list_project_domain_names = ['account','transaction']
        return random.choice(list_project_domain_names)

fake.add_provider(ProjectDomainProvider)

fake_project_domain_name = fake.project_domain_name()
print(fake_project_domain_name)

transaction


# Generating fake `status`: random values based on list

In [54]:
class StatusTypeProvider(BaseProvider):
    def status_type(self):
        list_status_types = ['ACTIVE','INACTIVE','SUSPENDED','BLOCKED', 'DELETED']
        return random.choice(list_status_types)

fake.add_provider(StatusTypeProvider)

fake_status_type = fake.status_type()
print(fake_status_type)

INACTIVE


# Generating custom fake `uuid`: random values based on list

In [55]:
class CustomUUIDProvider(BaseProvider):
    def custom_uuid(self):
        list_uuids = [
            '1a1a1a1a-1a1a-1a1a-1a1a-1a1a1a1a1a1a',
            '2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b',
            '3c3c3c3c-3c3c-3c3c-3c3c-3c3c3c3c3c3c',
            '4d4d4d4d-4d4d-4d4d-4d4d-4d4d4d4d4d4d', 
            '5e5e5e5e-5e5e-5e5e-5e5e-5e5e5e5e5e5e'
            ]
        return random.choice(list_uuids)

# Defining `write_fake_data` and `read_fake_data` functions

In [56]:
def write_fake_data(fake, length, destination_path, unique_uuid = True):  

    database = []
    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    filename = 'fake_events_'+current_time

    for x in range(length):
        uuid = fake.uuid4() if unique_uuid else fake.custom_uuid()
        database.append(collections.OrderedDict([
            ('event_id', uuid),
            ('timestamp', datetime.strftime(fake.date_time_between(start_date='-3y', end_date='now'),"%Y-%m-%dT%H:%M:%S")),
            ('domain', fake.project_domain_name()),
            ('event_type', "status-change"),
            ('data', collections.OrderedDict([
                ('id', fake.random_number(digits=6)),
                ('old_status', fake.status_type()),
                ('new_status', fake.status_type()),
                ('reason', fake.sentence(nb_words=5))
            ]))
        ]))

    with open('%s%s.json' % (destination_path, filename), 'w') as output:
        output.write(dumps(database, indent=4, sort_keys=False, default=str))
                
    print("Done.")

def read_fake_data(json_filepath):
    json_files = [os.path.normpath(i) for i in glob.glob(json_filepath)]
    df = pd.concat([pd.read_json(f) for f in json_files])
    return df

# Writing and reading fake data

In [57]:
def run(unique_uuid = True):
    fake = Faker()
    Faker.seed(random.randrange(0, 99999999999999999999, 1))
    fake.add_provider(ProjectDomainProvider)
    fake.add_provider(StatusTypeProvider)
    fake.add_provider(CustomUUIDProvider)

    length = 10
    destination_path = 'C:/Users/Eder/Documents/EDER/projetos/pismo_recruiting_technical_case/work/data/input/'
    write_fake_data(fake, length, destination_path,unique_uuid)

    json_filepath = destination_path+'*.json'
    fake_data = read_fake_data(json_filepath)
    print(fake_data)

In [61]:
run()

Done.
                               event_id           timestamp       domain  \
0  9debcba6-83b4-4308-99d9-b24b624d5253 2020-10-16 17:11:31      account   
1  2cba02a0-b0fb-4098-956d-1b2fed98936e 2020-12-27 21:13:15      account   
2  9168b868-3409-4050-959c-c00201a68ee4 2022-04-19 17:57:59      account   
3  39178f86-8589-4fb7-be07-67c9eb0ca297 2023-05-28 06:49:45  transaction   
4  c901596c-b8f4-486c-bf65-5ef24c7eaa47 2023-05-06 16:46:24  transaction   
5  9aeafe7a-d85f-49a6-97d3-bcabc85db4a5 2021-10-06 08:20:45  transaction   
6  b4e6c3d0-0ac8-411c-a585-afd7d9d5594a 2020-11-03 06:56:34  transaction   
7  0680e9c5-cff3-40e4-a02a-ba822e0abb00 2022-05-24 23:03:26      account   
8  61f09824-fc6b-486d-a8a3-091f1255994b 2022-02-10 19:32:23      account   
9  b85674cc-f983-485f-8740-c999c4caaaca 2023-05-07 16:22:23  transaction   

      event_type                                               data  
0  status-change  {'id': 751464, 'old_status': 'ACTIVE', 'new_st...  
1  status-change 

In [62]:
run(unique_uuid = False)

Done.
                               event_id           timestamp       domain  \
0  9debcba6-83b4-4308-99d9-b24b624d5253 2020-10-16 17:11:31      account   
1  2cba02a0-b0fb-4098-956d-1b2fed98936e 2020-12-27 21:13:15      account   
2  9168b868-3409-4050-959c-c00201a68ee4 2022-04-19 17:57:59      account   
3  39178f86-8589-4fb7-be07-67c9eb0ca297 2023-05-28 06:49:45  transaction   
4  c901596c-b8f4-486c-bf65-5ef24c7eaa47 2023-05-06 16:46:24  transaction   
5  9aeafe7a-d85f-49a6-97d3-bcabc85db4a5 2021-10-06 08:20:45  transaction   
6  b4e6c3d0-0ac8-411c-a585-afd7d9d5594a 2020-11-03 06:56:34  transaction   
7  0680e9c5-cff3-40e4-a02a-ba822e0abb00 2022-05-24 23:03:26      account   
8  61f09824-fc6b-486d-a8a3-091f1255994b 2022-02-10 19:32:23      account   
9  b85674cc-f983-485f-8740-c999c4caaaca 2023-05-07 16:22:23  transaction   
0  4d4d4d4d-4d4d-4d4d-4d4d-4d4d4d4d4d4d 2021-03-16 14:34:00  transaction   
1  4d4d4d4d-4d4d-4d4d-4d4d-4d4d4d4d4d4d 2020-09-21 14:29:11  transaction   
2  2b2