In [250]:
try:
    from faker import Faker
except (ModuleNotFoundError, ImportError):
    !pip install faker
    from faker import Faker

from itertools import repeat, cycle, count
from dataclasses import dataclass
import enum
from uuid import uuid4
import json
import datetime as dt
import random
from pprint import pprint
import pandas as pd
from pathlib import Path

In [138]:
seed = 37
counter = count()   # gerador de números sequênciais
random.seed(seed)
Faker.seed(seed)    # Semente para números pseudo-aleatórios
fake = Faker('pt_BR')   # Faker para português brasileiro
fake.seed_instance(seed)   # Semente para números pseudo-aleatórios

class FormaPagamento(enum.Enum):
    """Formas de pagamento."""
    GRATIS = 0
    DINHEIRO = enum.auto()
    DEBITO = enum.auto()
    CREDITO = enum.auto()
    PIX = enum.auto()
    
class LocalConsumo(enum.Enum):
    DELIVERY=1
    NOLOCAL=enum.auto()

Regiao = enum.Enum('Regiao', 'NORTE NORDESTE CENTROESTE SUDESTE SUL'.split())


@dataclass
class Estado:
    sigla: str
    nome: str
    capital: str
    regiao: Regiao
    

@dataclass
class Loja:
    nome: str
    cidade: str
    estado: Estado
    regiao: Regiao

## Brainstorms

In [3]:
display(FormaPagamento.GRATIS)
display(LocalConsumo.NOLOCAL)

<FormaPagamento.GRATIS: 0>

<LocalConsumo.NOLOCAL: 2>

In [4]:
help(repeat)

Help on class repeat in module itertools:

class repeat(builtins.object)
 |  repeat(object [,times]) -> create an iterator which returns the object
 |  for the specified number of times.  If not specified, returns the object
 |  endlessly.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __length_hint__(...)
 |      Private method returning an estimate of len(list(it)).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __repr__(self, /)
 |      Return repr(self).
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [5]:
help(cycle)

Help on class cycle in module itertools:

class cycle(builtins.object)
 |  cycle(iterable, /)
 |  
 |  Return elements from the iterable until it is exhausted. Then repeat the sequence indefinitely.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [6]:
a, b, c, d, e = repeat(True, 5)

In [7]:
z=zip((1,2,3,4,5,6), cycle([True]))
list(z)

[(1, True), (2, True), (3, True), (4, True), (5, True), (6, True)]

In [8]:
fake.name()

'Maria Carvalho'

In [9]:

Regiao.CENTROESTE

<Regiao.CENTROESTE: 3>

In [10]:
uuid4().hex

'3d4683a4dc214ca59ae6abd11a1bc77a'

In [11]:
uuid4().int

222748887400216239420142960413937851943

In [12]:
uuid4()

UUID('16d29e36-533c-4ebc-a226-e5d636ac009e')

In [13]:
uuid4().fields

(2919839887, 62908, 19088, 167, 217, 194178242576003)

In [14]:
next(counter)

0

## Geração de dados

In [15]:
with open('../data_files/estados_br.json') as f:
    estadosBr = json.load(f)
estadosBr

[{'SIGLA': 'AC', 'ESTADO': 'Acre', 'CAPITAL': 'Rio Branco', 'REGIAO': 'NORTE'},
 {'SIGLA': 'AL',
  'ESTADO': 'Alagoas',
  'CAPITAL': 'Maceió',
  'REGIAO': 'NORDESTE'},
 {'SIGLA': 'AP', 'ESTADO': 'Amapá', 'CAPITAL': 'Macapá', 'REGIAO': 'NORTE'},
 {'SIGLA': 'AM', 'ESTADO': 'Amazonas', 'CAPITAL': 'Manaus', 'REGIAO': 'NORTE'},
 {'SIGLA': 'BA',
  'ESTADO': 'Bahia',
  'CAPITAL': 'Salvador',
  'REGIAO': 'NORDESTE'},
 {'SIGLA': 'CE',
  'ESTADO': 'Ceará',
  'CAPITAL': 'Fortaleza',
  'REGIAO': 'NORDESTE'},
 {'SIGLA': 'DF',
  'ESTADO': 'Distrito Federal',
  'CAPITAL': 'Brasília',
  'REGIAO': 'CENTROESTE'},
 {'SIGLA': 'ES',
  'ESTADO': 'Espírito Santo',
  'CAPITAL': 'Vitória',
  'REGIAO': 'SUDESTE'},
 {'SIGLA': 'GO',
  'ESTADO': 'Goiás',
  'CAPITAL': 'Goiânia',
  'REGIAO': 'CENTROESTE'},
 {'SIGLA': 'MA',
  'ESTADO': 'Maranhão',
  'CAPITAL': 'São Luís',
  'REGIAO': 'NORDESTE'},
 {'SIGLA': 'MT',
  'ESTADO': 'Mato Grosso',
  'CAPITAL': 'Cuiabá',
  'REGIAO': 'CENTROESTE'},
 {'SIGLA': 'MS',
  'ESTADO':

In [16]:
{
    'cod_pedido': None,
    'data': None,
    'loja': None,
    'cidade': None,
    'estado': None,
    'regiao': None,
    'tamanho': None,
    'local_consumo': None,
    'preco': None,
    'forma_pagamento': None,
    'data': None,
    'ano_mes': None
}

{'cod_pedido': None,
 'data': None,
 'loja': None,
 'cidade': None,
 'estado': None,
 'regiao': None,
 'tamanho': None,
 'local_consumo': None,
 'preco': None,
 'forma_pagamento': None,
 'ano_mes': None}

In [34]:
dt.datetime.fromisoformat('2020-01-01T20:20:20.123456-03:00')

datetime.datetime(2020, 1, 1, 20, 20, 20, 123456, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=75600)))

In [35]:
dt.datetime.fromisoformat('2020-01-01T20:20:20.123456-03:00').strftime("%F")

'2020-01-01'

In [127]:
fake.date_between(start_date=dt.datetime.strptime('2020-12-31', '%Y-%m-%d'), end_date=dt.datetime.now())

datetime.date(2021, 2, 18)

In [135]:
fake.date_between(start_date=dt.datetime.strptime('2020-12-31', '%Y-%m-%d'), end_date=dt.datetime.now()).strftime("%F")

'2022-01-03'

In [188]:
LocalConsumo.__members__.keys()

dict_keys(['DELIVERY', 'NOLOCAL'])

In [247]:
random.seed(37)
products = tuple(zip(['200ml', '300ml', '500ml', '700ml', '1000ml'], (18, 25.5, 40, 55, 75.5)))
# cities = random.choices(estadosBr, k=10)
cities = random.sample(estadosBr, k=10)
lojas = [{'loja': f'loja{(x +1):02}', **cities[x]} for x in range(len(cities))]
registros = []
# pprint(cities)
for _ in range(7*10**4):
    d = fake.date_between(start_date=dt.datetime.strptime('2020-12-31', '%Y-%m-%d'), end_date=dt.datetime.now())
    p = random.choices(products, [1,1,2,2,1], k=1)[0]
    city = random.choice(lojas)
    registro = {
        'cod_pedido': uuid4().hex,
        'data': d.strftime('%F'),
        'loja': city.get('loja'),
        'cidade': city.get('CAPITAL'),
        'estado': city.get('ESTADO'),
        'regiao': city.get('REGIAO'),
        'tamanho': p[0],
        'local_consumo': random.choice(list(LocalConsumo.__members__.keys())),
        'preco': p[1],
        'forma_pagamento': random.choice([FormaPagamento.PIX.name, FormaPagamento.CREDITO.name, FormaPagamento.DEBITO.name, FormaPagamento.DINHEIRO.name]),
        'ano_mes': d.strftime("%Y-%m")
    }
    registros.append(registro)
print(registros)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [248]:
df = pd.DataFrame(registros)
df

Unnamed: 0,cod_pedido,data,loja,cidade,estado,regiao,tamanho,local_consumo,preco,forma_pagamento,ano_mes
0,fc9c53e49cb649ed9d0ab7991fdf3e9a,2021-10-28,loja02,Porto Alegre,Rio Grande do Sul,SUL,500ml,NOLOCAL,40.0,DEBITO,2021-10
1,8b59eab421bc486b97e711cd3e461879,2022-05-29,loja07,Recife,Pernambuco,NORDESTE,1000ml,NOLOCAL,75.5,CREDITO,2022-05
2,471f469d3d7d4b289e2753f8f6c5846a,2021-04-09,loja01,Porto Velho,Rondônia,NORTE,1000ml,DELIVERY,75.5,DINHEIRO,2021-04
3,cac59816964b4e32a5ab542a46c13fae,2022-06-02,loja05,Palmas,Tocantins,NORTE,200ml,NOLOCAL,18.0,DINHEIRO,2022-06
4,6d7e12b449f743b5bc19a279188bbdca,2022-02-28,loja10,João Pessoa,Paraíba,NORDESTE,700ml,NOLOCAL,55.0,PIX,2022-02
...,...,...,...,...,...,...,...,...,...,...,...
69995,f512b4b80c5242aba4dd2bd61a1bca95,2022-03-14,loja05,Palmas,Tocantins,NORTE,700ml,DELIVERY,55.0,CREDITO,2022-03
69996,9060c1f734dc478d97e7c125334ae359,2021-07-09,loja04,São Paulo,São Paulo,SUDESTE,500ml,NOLOCAL,40.0,DEBITO,2021-07
69997,7466aaef9a004909a8e9a3fda59a822e,2022-04-13,loja05,Palmas,Tocantins,NORTE,500ml,DELIVERY,40.0,CREDITO,2022-04
69998,fa0b66e664154eee91d61dd42d4305ba,2021-07-06,loja09,Campo Grande,Mato Grosso do Sul,CENTROESTE,700ml,DELIVERY,55.0,CREDITO,2021-07


In [252]:
df.to_json("../data_files/vendas.json", indent=2, orient='records')