# Exercise 1 - operational vs analytical

In [19]:
from faker.providers import BaseProvider
from faker import Faker
from dataclasses import dataclass
from datetime import datetime

fake = Faker()
name_pool = [fake.name() for _ in range(5)]


@dataclass
class Transaction:
    id: int
    name: str
    datetime: datetime
    amount: float


class TransactionProvider(BaseProvider):
    __provider__ = "transaction"

    def transaction(self) -> Transaction:
        return Transaction(
            fake.unique.uuid4(),
            fake.random_element(elements=name_pool),
            fake.date_time_this_century(),
            fake.pyfloat(min_value=-120, max_value=100),
        )


fake.add_provider(TransactionProvider)
N = int(500)
transactions = [fake.transaction() for _ in range(N)]

In [20]:
transactions

[Transaction(id='687199f6-9eb2-496e-a81c-b3cdf6dd1818', name='James Barnes', datetime=datetime.datetime(2015, 6, 21, 23, 0, 49, 70666), amount=-111.56576),
 Transaction(id='94d79adb-e48a-4730-bc62-91ccaa5c5bf4', name='Adam Boone', datetime=datetime.datetime(2021, 10, 30, 9, 35, 20, 822688), amount=-62.291),
 Transaction(id='c0632a65-e9db-4398-a029-9a731fb2c79e', name='Adam Boone', datetime=datetime.datetime(2013, 3, 9, 2, 26, 55, 131712), amount=15.458772),
 Transaction(id='47d737af-f695-449f-9367-677ca313f1b3', name='Adam Boone', datetime=datetime.datetime(2006, 11, 23, 16, 56, 12, 284446), amount=-67.23414),
 Transaction(id='79bc96f8-e2f7-43da-af8b-cbc06f01c6f7', name='Adam Boone', datetime=datetime.datetime(2023, 9, 14, 13, 15, 33, 824677), amount=-81.259254192),
 Transaction(id='3ea35d12-93e8-4594-bb96-97c68a0f42cf', name='Joshua Jackson', datetime=datetime.datetime(2004, 1, 30, 20, 48, 3, 647075), amount=-99.36852),
 Transaction(id='f6957e52-63dc-4e3a-8d39-0f3a1f009c19', name='Ada

In [24]:
%%timeit

max([transaction.amount for transaction in transactions])

17.9 µs ± 1.2 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [27]:
%%timeit

sum([transaction.amount for transaction in transactions if transaction.name == 'Joshua Jackson'])

26.5 µs ± 3.37 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [22]:
columnar = {
    "id": [t.id for t in transactions],
    "name": [t.name for t in transactions],
    "datetime": [t.datetime for t in transactions],
    "amount": [t.amount for t in transactions],
    }

In [25]:
%%timeit

max(columnar["amount"])

5.58 µs ± 54.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [26]:
%%timeit

sum(
    [
        amount
        for amount, name in zip(columnar["amount"], columnar["name"])
        if name == "Joshua Jackson"
    ]
)

19.6 µs ± 4.41 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [32]:
for transaction in transactions:
    if transaction.id == "c8b0d86e-9fba-442c-b33b-05b1e71daeb5":
        print(transaction)
        break

33.8 µs ± 6.38 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [33]:
%%timeit

for i, id in enumerate(columnar["id"]):
    if id == "c8b0d86e-9fba-442c-b33b-05b1e71daeb5":
        break

88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.248094978558 2001-08-18 01:53:19.787093 Joshua Jackson
88.24809497855

KeyboardInterrupt: 