In [None]:
#|default_exp utils

# General utilities

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export

import os
import re
from contextlib import contextmanager
from pathlib import Path
from typing import Dict, List, Tuple, Union

import pandas as pd
from fastcore.basics import patch

In [None]:
#|export
from collections import Counter

def most_common(lst):
    """returns the most common element of a collection"""
    return Counter(lst).most_common(1)[0][0]

In [None]:
#|hide
test_eq(most_common([1,1,1,2,2,3,3,3,3,4,4]), 3)
test_eq(most_common([1,1,1,2,2,3,3,3,4,4]), 1)
test_eq(most_common([0]), 0)

In [None]:
#|export

@patch
def ls_sorted(self:Path):
    "ls but sorts files by name numerically"
    return self.ls().sorted(key=lambda f: int(f.with_suffix('').name))

In [None]:
#|export

# ref: https://dev.to/teckert/changing-directory-with-a-python-context-manager-2bj8
@contextmanager
def context_chdir(path: Union[Path, str]):
    """Sets the cwd within the context"""
    origin = Path().absolute()
    try:
        os.chdir(path)
        yield
    finally:
        os.chdir(origin)

In [None]:
#|hide
origin = os.getcwd()
with context_chdir('/opt'):
    test_eq(os.getcwd(), '/opt')
test_eq(os.getcwd(), origin)

In [None]:
#|export
from datetime import datetime

def generate_time_id(dt=None):
    """generates a string id from given datetime or now"""
    return (dt or datetime.now()).isoformat().rsplit('.', 1)[0].replace(':', '-')

In [None]:
#|hide

test_eq(generate_time_id(datetime(2022, 1, 1, 1, 1, 1)), '2022-01-01T01-01-01')

time_id = generate_time_id()
test_eq(len(time_id), 19)
test_eq(time_id.count('-'), 4)

## Nested dictionary utils

In [None]:
#|export

def flatten_dict(d: Dict, sep='.') -> Dict:
    records = pd.json_normalize(d, sep=sep).to_dict(orient='records')
    if len(records):
        return records[0]
    return {}

def unflatten_dict(d: Dict, sep='.') -> Dict:
    res = {}
    for k, v in d.items():
        subkeys = k.split(sep)
        container = res
        for subkey in subkeys[:-1]:
            if subkey not in container:
                container[subkey] = {}
            container = container[subkey]
        container[subkeys[-1]] = v
    return res

In [None]:
#|hide

noop_cases = [{}, {'a': 1, 'b': 2}]
for d in noop_cases:
    test_eq(flatten_dict(d), d)
    test_eq(unflatten_dict(d), d)

In [None]:
#|hide

nested_dict = {
    'dataset_path': 'a/b/c/d',
    'train': {
        'lr': 1e-4,
        'n_epoch': 10,
        'early_stop': {
            'patience': 10,
            'metric': 'val_loss',
        }
    },
    'wandb': {
        'username': 'bdsaglam',
        'project': 'project-x',
    }
}

flat_dict = {
    'dataset_path': 'a/b/c/d',
    'train/lr': 0.0001,
    'train/n_epoch': 10,
    'train/early_stop/patience': 10,
    'train/early_stop/metric': 'val_loss',
    'wandb/username': 'bdsaglam',
    'wandb/project': 'project-x',
}

test_eq(flatten_dict(nested_dict, sep='/'), flat_dict)
test_eq(unflatten_dict(flat_dict, sep='/'), nested_dict)

In [None]:
#|export

class NestedDict(dict):
    def __init__(self, data, sep='.'):
        super().__init__(data)
        self.sep = sep
    
    def at(self, keys: Union[str, List, Tuple], default=None):
        if isinstance(keys, str):
            keys = keys.split(self.sep)
        node = self
        for key in keys:
            if key not in node:
                return default
            node = node.get(key)
        return node

    def set(self, keys: Union[str, List, Tuple], value):
        if isinstance(keys, str):
            keys = keys.split(self.sep)
        node = self
        last_key = keys.pop()
        for key in keys:
            if key not in node:
                node[key] = dict()
            node = node[key]
        node[last_key] = value

    def flat(self) -> Dict:
        return flatten_dict(self, sep=self.sep)
    
    @classmethod
    def from_flat_dict(cls, data, sep='.'):
        return cls(unflatten_dict(data, sep=sep))
     

In [None]:
#|hide
nested_dict = NestedDict(nested_dict, sep='.')

test_eq(nested_dict.at('wandb'), nested_dict['wandb'])
test_eq(nested_dict.at(['wandb']), nested_dict['wandb'])
test_eq(nested_dict.at('wandb.username'), 'bdsaglam')
test_eq(nested_dict.at(['train', 'lr']), nested_dict['train']['lr'])
test_eq(nested_dict.at('a.b.c'), None)
test_eq(nested_dict.at('train.non-existing-field'), None)
test_eq(nested_dict.at('train.non-existing-field', 0), 0)

nested_dict.set('dataset_path', '/newpath')
test_eq(nested_dict.at('dataset_path'), '/newpath')
nested_dict.set('train.lr', 1)
test_eq(nested_dict.at('train.lr'), 1)
nested_dict.set('train.optimizer.name', 'adam')
nested_dict.set('train.optimizer.momentum', 0.9)
test_eq(nested_dict.at('train.optimizer.name'), 'adam')
test_eq(nested_dict.at('train.optimizer.momentum'), 0.9)

In [None]:
#|hide
nested_dict = NestedDict.from_flat_dict({'a/b/c': 1, 'd': 2}, sep='/')
test_eq(dict(nested_dict), {'a': {'b': {'c': 1}}, 'd': 2})

In [None]:
#|export

def split_camel_case(input_str):
    # Use regular expression to find word boundaries in camel case
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', input_str)
    # Extract the words and return as a list
    return [m.group(0) for m in matches]

In [None]:
test_eq(split_camel_case("camelCase"), ["camel", "Case"])
test_eq(split_camel_case("CamelCase"), ["Camel", "Case"])
test_eq(split_camel_case("camel"), ["camel"])
test_eq(split_camel_case("Camel"), ["Camel"])
test_eq(split_camel_case(""), [])
test_eq(split_camel_case("snake_case"), ["snake_case"])

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()