# Examples

## `pythonkit`

In [1]:
import datetime as dt
import functools
import time
import traceback

import time_machine
from toolz import curried

import onekit.pythonkit as pk

### `stopwatch`

In [2]:
# overwrite defaults for illustrative purposes
stopwatch = functools.partial(
    pk.stopwatch,
    fmt="%a, %-d %b %Y %H:%M:%S",
    flush=False,
)

# mock datetime
traveller = time_machine.travel(dt.datetime(2023, 1, 1, 12, 0, 0))
traveller.start();

Use as context manager.

Example 1: Measure total elapsed time of multiple statements.

In [3]:
with stopwatch("example 1"):
    time.sleep(0.05)
    time.sleep(0.05)

Sun, 1 Jan 2023 12:00:00 -> Sun, 1 Jan 2023 12:00:00 = 0.101233s - example 1


Example 2: Measure total elapsed time of multiple `stopwatch` instances.

In [4]:
with stopwatch("example 2 - stopwatch 1") as sw1:
    time.sleep(0.05)

with stopwatch("example 2 - stopwatch 2") as sw2:
    time.sleep(0.05)

sw1 + sw2

Sun, 1 Jan 2023 12:00:00 -> Sun, 1 Jan 2023 12:00:00 = 0.050622s - example 2 - stopwatch 1
Sun, 1 Jan 2023 12:00:00 -> Sun, 1 Jan 2023 12:00:00 = 0.050875s - example 2 - stopwatch 2


0.101497s - total elapsed time

Use as decorator.

In [5]:
@stopwatch("example 3")
def func_with_supplied_label():
    time.sleep(0.1)

In [6]:
func_with_supplied_label()

Sun, 1 Jan 2023 12:00:00 -> Sun, 1 Jan 2023 12:00:00 = 0.100615s - example 3


In [7]:
@stopwatch()
def func_with_no_supplied_label():
    time.sleep(0.1)

In [8]:
func_with_no_supplied_label()

Sun, 1 Jan 2023 12:00:00 -> Sun, 1 Jan 2023 12:00:00 = 0.100686s - func_with_no_supplied_label


In [9]:
traveller.stop()

### `flatten`

In [10]:
irregular_list = [
    ["one", 2],
    3,
    [(4, "five")],
    [[["six"]]],
    "seven",
    [],
]

list(pk.flatten(irregular_list, 8, [9, ("ten",)]))

['one', 2, 3, 4, 'five', 'six', 'seven', 8, 9, 'ten']

### `highlight_string_differences`

Use in an `assert` statement to get more information:

In [11]:
lft_str = "hello"
rgt_str = "hallo"

Common approach to print values might not be informative enough for subtle differences.

In [12]:
try:
    assert lft_str == rgt_str, f"{lft_str} != {rgt_str}"
except AssertionError:
    traceback.print_exc()

Traceback (most recent call last):
  File "/tmp/ipykernel_18859/54865336.py", line 2, in <module>
    assert lft_str == rgt_str, f"{lft_str} != {rgt_str}"
AssertionError: hello != hallo


With `highlight_string_differences`, it is easier to spot subtle differences.

In [13]:
def get_string_diff(lft_str: str, rgt_str: str) -> str:
    return "lft_str != rgt_str\n" + pk.highlight_string_differences(lft_str, rgt_str)


try:
    assert lft_str == rgt_str, get_string_diff(lft_str, rgt_str)
except AssertionError:
    traceback.print_exc()

Traceback (most recent call last):
  File "/tmp/ipykernel_18859/788221307.py", line 6, in <module>
    assert lft_str == rgt_str, get_string_diff(lft_str, rgt_str)
AssertionError: lft_str != rgt_str
hello
 |   
hallo


### Date computation

#### `daycount`

In [14]:
# month sequence - first date
curried.pipe(
    pk.daycount(dt.date(2022, 1, 1)),
    curried.filter(lambda d: d.day == 1),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01', '2022-05-01']

In [15]:
# month sequence - last date
curried.pipe(
    pk.daycount(dt.date(2022, 1, 1)),
    curried.filter(lambda d: d.day == 1),
    curried.map(lambda d: pk.last_date_of_month(d.year, d.month)),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30', '2022-05-31']

In [16]:
# Monday sequence
curried.pipe(
    pk.daycount(dt.date(2022, 1, 1)),
    curried.filter(lambda d: pk.weekday(d) == "Mon"),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-03', '2022-01-10', '2022-01-17', '2022-01-24', '2022-01-31']

In [17]:
# pick every 7th day
curried.pipe(
    pk.daycount(dt.date(2022, 1, 1)),
    curried.take_nth(7),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-01', '2022-01-08', '2022-01-15', '2022-01-22', '2022-01-29']

Task: The digits of 22 February 2022 form [a palindrome and an ambigram](https://en.wikipedia.org/wiki/Twosday) in dd-mm-yyyy format.
List the next five dates with these properties.

In [18]:
def format_date(d: dt.date, with_hyphen: bool) -> str:
    fmt = "%d-%m-%Y" if with_hyphen else "%d%m%Y"
    return d.strftime(fmt)


def is_palindrome_date(d: dt.date) -> bool:
    d_str = format_date(d, False)
    return d_str == d_str[::-1]


def is_ambigram_date(d: dt.date) -> bool:
    d_str = format_date(d, False)
    return set(d_str) <= {"0", "1", "2", "8"}


def show_date(d: dt.date) -> str:
    return f"{format_date(d, True)} ↦ {format_date(d, False)}"


curried.pipe(
    pk.daycount(dt.date(2022, 2, 23)),
    curried.filter(is_palindrome_date),
    curried.filter(is_ambigram_date),
    curried.map(show_date),
    curried.take(5),
    list,
)

['08-02-2080 ↦ 08022080',
 '18-02-2081 ↦ 18022081',
 '28-02-2082 ↦ 28022082',
 '10-12-2101 ↦ 10122101',
 '20-12-2102 ↦ 20122102']

#### `daterange`

In [19]:
# month sequence - first date
curried.pipe(
    pk.daterange(dt.date(2022, 1, 1), dt.date(2022, 5, 31)),
    curried.filter(lambda d: d.day == 1),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01', '2022-05-01']

In [20]:
# month sequence - last date
curried.pipe(
    pk.daterange(dt.date(2022, 1, 1), dt.date(2022, 5, 31)),
    curried.filter(lambda d: d.day == 1),
    curried.map(lambda d: pk.last_date_of_month(d.year, d.month)),
    curried.map(pk.date_to_str),
    curried.take(5),
    list,
)

['2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30', '2022-05-31']

## `sparkkit`

In [21]:
import os

from pyspark.sql import SparkSession

import onekit.sparkkit as sk

In [22]:
spark = (
    SparkSession.builder.master("local[1]")
    .appName("spark-session-docs")
    .config("spark.sql.shuffle.partitions", 1)
    .config("spark.default.parallelism", os.cpu_count())
    .config("spark.rdd.compress", False)
    .config("spark.shuffle.compress", False)
    .config("spark.dynamicAllocation.enabled", False)
    .config("spark.executor.cores", 1)
    .config("spark.executor.instances", 1)
    .config("spark.ui.enabled", False)
    .config("spark.ui.showConsoleProgress", False)
    .getOrCreate()
)
spark.sparkContext.setLogLevel("ERROR")

23/11/20 08:02:47 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [23]:
df = sk.union(
    spark.createDataFrame([dict(x=1, y=2), dict(x=3, y=4)]),
    spark.createDataFrame([dict(x=5, y=6), dict(x=7, y=8)]),
    spark.createDataFrame([dict(x=0, y=1), dict(x=2, y=3)]),
)

In [24]:
df.show()

+---+---+
|  x|  y|
+---+---+
|  1|  2|
|  3|  4|
|  5|  6|
|  7|  8|
|  0|  1|
|  2|  3|
+---+---+



In [25]:
spark.stop()