In [None]:
#|default_exp utils

# General utilities

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export

import json
import os
import random
import re
from contextlib import contextmanager
from operator import eq
from pathlib import Path
from typing import Dict, List, Tuple, Union, Iterable, TypeVar, Generator

import numpy as np
import pandas as pd
from fastcore.basics import patch

In [None]:
#|export

def set_seed(seed):
    np.random.seed(seed%(2**32-1))
    random.seed(seed)

In [None]:
#|hide
set_seed(42)

In [None]:
#|export
from collections import Counter

def most_common(lst):
    """returns the most common element of a collection"""
    return Counter(lst).most_common(1)[0][0]

In [None]:
#|hide
test_eq(most_common([1,1,1,2,2,3,3,3,3,4,4]), 3)
test_eq(most_common([1,1,1,2,2,3,3,3,4,4]), 1)
test_eq(most_common([0]), 0)

In [None]:
#|export

@patch
def ls_sorted(self:Path):
    "ls but sorts files by name numerically"
    return self.ls().sorted(key=lambda f: int(f.with_suffix('').name))

In [None]:
#|export

# ref: https://dev.to/teckert/changing-directory-with-a-python-context-manager-2bj8
@contextmanager
def context_chdir(path: Union[Path, str]):
    """Sets the cwd within the context"""
    origin = Path().absolute()
    try:
        os.chdir(path)
        yield
    finally:
        os.chdir(origin)

In [None]:
#|hide
origin = os.getcwd()
with context_chdir('/opt'):
    test_eq(os.getcwd(), '/opt')
test_eq(os.getcwd(), origin)

In [None]:
#|export
from datetime import datetime

def generate_time_id(dt=None):
    """generates a string id from given datetime or now"""
    return (dt or datetime.now()).isoformat().rsplit('.', 1)[0].replace(':', '-')

In [None]:
#|hide

test_eq(generate_time_id(datetime(2022, 1, 1, 1, 1, 1)), '2022-01-01T01-01-01')

time_id = generate_time_id()
test_eq(len(time_id), 19)
test_eq(time_id.count('-'), 4)

In [None]:
#|export

T = TypeVar("T")


def chunk_random(lst: List[T], min_chunk: int = 2, max_chunk: int = 4) -> Generator[List[T], None, None]:
    """
    Splits a list into random-sized chunks.

    Args:
        lst (list): The list to be split into chunks.
        min_chunk (int, optional): The minimum size of each chunk. Defaults to 2.
        max_chunk (int, optional): The maximum size of each chunk. Defaults to 4.

    Yields:
        list: A chunk of the original list.

    Returns:
        list: A list of chunks.

    """
    # Ensure the list has at least the minimum number of elements required for a chunk
    if len(lst) < min_chunk:
        return [lst]

    i = 0  # Initialize an index to traverse the list
    while i < len(lst):
        if len(lst) - i < min_chunk:
            break
        # Determine the size of the next chunk
        chunk_size = random.randint(min_chunk, min(max_chunk, len(lst) - i))
        # Add the chunk to the list of chunks
        yield lst[i : i + chunk_size]
        # Increment the index by the size of the chunk just added
        i += chunk_size


In [None]:
#|hide
list(chunk_random(list(range(24))))

[[0, 1, 2, 3],
 [4, 5],
 [6, 7],
 [8, 9, 10, 11],
 [12, 13, 14],
 [15, 16],
 [17, 18],
 [19, 20],
 [21, 22]]

In [None]:
#|export

def jprint(obj, indent=2, **kwargs):
    print(json.dumps(obj, indent=indent), **kwargs)

In [None]:
jprint({"a": 1, "b": {"c": 2, "d": {"e": 3, "f": 4}}})

{
  "a": 1,
  "b": {
    "c": 2,
    "d": {
      "e": 3,
      "f": 4
    }
  }
}


In [None]:
#|export

def is_in(target, collection: Iterable, eq_fn=eq) -> bool:
    for item in collection:
        if eq_fn(item, target):
            return True
    return False

In [None]:
#|export

def split_camel_case(input_str):
    # Use regular expression to find word boundaries in camel case
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', input_str)
    # Extract the words and return as a list
    return [m.group(0) for m in matches]

In [None]:
test_eq(split_camel_case("camelCase"), ["camel", "Case"])
test_eq(split_camel_case("CamelCase"), ["Camel", "Case"])
test_eq(split_camel_case("camel"), ["camel"])
test_eq(split_camel_case("Camel"), ["Camel"])
test_eq(split_camel_case(""), [])
test_eq(split_camel_case("snake_case"), ["snake_case"])

In [None]:
#|export

def print_dict_schema(d, indent=0):
    """
    Recursively prints the structure of a nested dictionary with indentation.

    :param d: The dictionary to print.
    :param indent: Current indentation level (used internally for recursion).
    """
    if not isinstance(d, dict):
        print('  ' * indent + str(d))
        return

    for key, value in d.items():
        print('  ' * indent + str(key) + " " + f"({type(value).__name__})")
        if isinstance(value, dict):
            print_dict_schema(value, indent + 1)
        elif isinstance(value, list):
            for index, item in enumerate(value[:1]):
                print('  ' * (indent + 1) + f'[{index}]' + " " + f"({type(item).__name__})")
                if isinstance(item, dict):
                    print_dict_schema(item, indent + 2)
                else:
                    print('  ' * (indent + 2) + str(item))

In [None]:
obj = {
    "a": 1,
    "b": {
        "c": 2,
        "d": {
            "e": 3,
            "f": 4
        }
    },
    "g": [
        {
            "h": 5,
            "i": 6
        }
    ]
}

print_dict_schema(obj)

a (int)
b (dict)
  c (int)
  d (dict)
    e (int)
    f (int)
g (list)
  [0] (dict)
    h (int)
    i (int)


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()