# Imports

In [1]:
from faker import Faker
from faker.providers import BaseProvider
from datetime import datetime
from json import dumps
import pandas as pd
import random
import collections
import glob
import os

# Generating fake `event_id`: random UUIDs 

In [2]:
fake = Faker()
Faker.seed(random.randrange(0, 99999999999999999999, 1))
fake_event_id = fake.uuid4()
print(fake_event_id)

d9410c8c-28d9-4884-9a29-5cb951e6b100


# Generating fake `timestamp`: random timestamps with values until 3 years ago

In [3]:
fake_timestamp = datetime.strftime(fake.date_time_between(start_date='-3y', end_date='now'),"%Y-%m-%dT%H:%M:%S")
print(fake_timestamp)

2022-07-27T07:27:09


# Generating fake `domain`: random values based on valid grade names list

In [4]:
class ProjectDomainProvider(BaseProvider):
    def project_domain_name(self):
        list_project_domain_names = ['account','transaction']
        return random.choice(list_project_domain_names)

fake.add_provider(ProjectDomainProvider)

fake_project_domain_name = fake.project_domain_name()
print(fake_project_domain_name)

transaction


# Generating fake `status`: random values based on list

In [5]:
class StatusTypeProvider(BaseProvider):
    def status_type(self):
        list_status_types = ['ACTIVE','INACTIVE','SUSPENDED','BLOCKED', 'DELETED']
        return random.choice(list_status_types)

fake.add_provider(StatusTypeProvider)

fake_status_type = fake.status_type()
print(fake_status_type)

DELETED


# Generating custom fake `uuid`: random values based on list

In [6]:
class CustomUUIDProvider(BaseProvider):
    def custom_uuid(self):
        list_uuids = [
            '1a1a1a1a-1a1a-1a1a-1a1a-1a1a1a1a1a1a',
            '2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b'
            ]
        return random.choice(list_uuids)

# Defining `write_fake_data` and `read_fake_data` functions

In [7]:
def write_fake_data(fake, length, destination_path, unique_uuid = True):

    database = []
    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    filename = 'fake_events_'+current_time

    for x in range(length):
        uuid = fake.uuid4() if unique_uuid else fake.custom_uuid()
        project_domain_name = fake.project_domain_name()
        event_type = project_domain_name + "-status-change"

        database.append(collections.OrderedDict([
            ('event_id', uuid),
            ('timestamp', datetime.strftime(fake.date_time_between(start_date='-3y', end_date='now'),"%Y-%m-%dT%H:%M:%S")),
            ('domain', project_domain_name),
            ('event_type', event_type),
            ('data', collections.OrderedDict([
                ('id', fake.random_number(digits=6)),
                ('old_status', fake.status_type()),
                ('new_status', fake.status_type()),
                ('reason', fake.sentence(nb_words=5))
            ]))
        ]))

    with open('%s%s.json' % (destination_path, filename), 'w') as output:
        output.write(dumps(database, indent=4, sort_keys=False, default=str))

    print("Done.")

def read_fake_data(json_filepath):
    json_files = [os.path.normpath(i) for i in glob.glob(json_filepath)]
    df = pd.concat([pd.read_json(f) for f in json_files])
    return df

# Writing and reading fake data

In [8]:
def run(unique_uuid = True):
    fake = Faker()
    Faker.seed(random.randrange(0, 99999999999999999999, 1))
    fake.add_provider(ProjectDomainProvider)
    fake.add_provider(StatusTypeProvider)
    fake.add_provider(CustomUUIDProvider)

    length = 10
    destination_path = 'C:/Users/Eder/Documents/EDER/projetos/pismo_recruiting_technical_case/work/data/raw/events/'
    write_fake_data(fake, length, destination_path,unique_uuid)

    json_filepath = destination_path+'*.json'
    fake_data = read_fake_data(json_filepath)
    print(fake_data)

In [9]:
run()

Done.
                               event_id           timestamp       domain  \
0  bd59afd5-f34d-4fd9-a225-6e3105708a1f 2021-08-19 20:38:31      account   
1  a4048d06-4978-43c1-9324-b66a5920c63a 2022-03-23 10:13:43  transaction   
2  b38fc522-bdaf-4d1b-a189-59f5f4d12763 2021-03-11 17:28:24      account   
3  d24bc668-664d-4ad7-ac09-277d3c41f3f8 2021-06-18 23:33:26  transaction   
4  42995860-51b3-4c92-a87a-bcb01d6bc049 2021-11-17 00:47:35      account   
5  18d88ff4-63b9-4e48-acf6-90031ce0494b 2023-05-31 14:23:17      account   
6  e910c319-cfcd-482f-b750-e51aa38852fd 2021-05-29 06:03:49  transaction   
7  323ebf7a-83c2-4fb4-81b1-11499e8baff9 2021-02-12 18:30:36      account   
8  8caf9558-0696-4fcb-91e1-ad4b45d92cbe 2021-02-04 20:42:57  transaction   
9  87eae111-5589-464e-b131-68f1c35f496c 2021-10-19 14:04:38      account   

                  event_type  \
0      account-status-change   
1  transaction-status-change   
2      account-status-change   
3  transaction-status-change 

In [11]:
run(unique_uuid = False)

Done.
                               event_id           timestamp       domain  \
0  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2022-09-17 06:45:48      account   
1  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2020-12-01 18:41:47      account   
2  1a1a1a1a-1a1a-1a1a-1a1a-1a1a1a1a1a1a 2020-10-18 19:36:53      account   
3  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2023-01-01 13:12:16      account   
4  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2021-09-27 17:55:07      account   
5  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2021-04-05 00:10:46  transaction   
6  1a1a1a1a-1a1a-1a1a-1a1a-1a1a1a1a1a1a 2023-07-07 10:32:12      account   
7  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2022-04-29 19:01:50      account   
8  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2022-05-24 23:29:52  transaction   
9  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2021-12-09 23:37:08      account   
0  1a1a1a1a-1a1a-1a1a-1a1a-1a1a1a1a1a1a 2022-08-16 07:50:27  transaction   
1  2b2b2b2b-2b2b-2b2b-2b2b-2b2b2b2b2b2b 2023-03-04 21:44:03  transaction   
2  2b2