Example notebook

To do: DATA CONFIG BUILDER

Walk through a folder, collecting all the files into a provided filename e.g. data.yaml.

```sh
bluprint index <dirname> <config.yaml>
```

In [1]:
import os
from pathlib import Path, PurePath, PosixPath
from omegaconf import OmegaConf

In [None]:
def index_files_in_dir(
	index_dir: str | PosixPath,
	skip_dot_files: bool = True,
) -> list[tuple[str, ...]]:
	indexed_files = []
	index_dir_part_id = len(Path(index_dir).parts)
	for root, _dirs, files in os.walk(index_dir):
		for file in files:
			file_path = Path(root) / file
			if file.startswith('.') and skip_dot_files:
				continue
			indexed_files.append(file_path.parts[index_dir_part_id:])
	return sorted(indexed_files)

In [2]:
indexed_files_as_parts = []
index_dir = '../tests/'
index_dir_part_id = len(Path(index_dir).parts)
for root, _dirs, files in os.walk(index_dir):
	print(f'Root: {root}')
	for file in files:
		file_path = Path(root) / file
		relative_file_path = Path(*file_path.parts)
		print(f'- File: {file_path.parts[index_dir_part_id:]}')
		if not file.startswith('.'):
			
			indexed_files_as_parts.append(file_path.parts[index_dir_part_id:])

Root: ../tests/
Root: ../tests/demo
- File: ('demo', 'test_demo.py')
Root: ../tests/demo/__pycache__
- File: ('demo', '__pycache__', 'test_demo.cpython-311-pytest-7.4.3.pyc')
Root: ../tests/data_config_builder
Root: ../tests/data_config_builder/fixtures
Root: ../tests/workflow
- File: ('workflow', 'conftest.py')
- File: ('workflow', 'test_workflow.py')
Root: ../tests/workflow/snapshots
- File: ('workflow', 'snapshots', 'test.log')
Root: ../tests/workflow/fixtures
- File: ('workflow', 'fixtures', 'test1.ipynb')
- File: ('workflow', 'fixtures', 'test2.ipynb')
- File: ('workflow', 'fixtures', 'test4.ipynb')
- File: ('workflow', 'fixtures', 'workflows.yaml')
- File: ('workflow', 'fixtures', 'test3.ipynb')
Root: ../tests/workflow/__pycache__
- File: ('workflow', '__pycache__', 'conftest.cpython-311-pytest-7.4.3.pyc')
- File: ('workflow', '__pycache__', 'test_workflow.cpython-311-pytest-7.4.3.pyc')
Root: ../tests/yaml
- File: ('yaml', 'test_yaml.py')
Root: ../tests/yaml/snapshots
- File: ('y

In [None]:
indexed_files_as_parts.sort()

In [21]:
sorted(indexed_files_as_parts)

[('demo', '__pycache__', 'test_demo.cpython-311-pytest-7.4.3.pyc'),
 ('demo', 'test_demo.py'),
 ('workflow', '__pycache__', 'conftest.cpython-311-pytest-7.4.3.pyc'),
 ('workflow', '__pycache__', 'test_workflow.cpython-311-pytest-7.4.3.pyc'),
 ('workflow', 'conftest.py'),
 ('workflow', 'fixtures', 'test1.ipynb'),
 ('workflow', 'fixtures', 'test2.ipynb'),
 ('workflow', 'fixtures', 'test3.ipynb'),
 ('workflow', 'fixtures', 'test4.ipynb'),
 ('workflow', 'fixtures', 'workflows.yaml'),
 ('workflow', 'snapshots', 'test.log'),
 ('workflow', 'test_workflow.py'),
 ('yaml', '__pycache__', 'conftest.cpython-311-pytest-7.4.3.pyc'),
 ('yaml', '__pycache__', 'test_yaml.cpython-311-pytest-7.4.3.pyc'),
 ('yaml', 'fixtures', 'config.yaml'),
 ('yaml', 'fixtures', 'test.yaml'),
 ('yaml', 'snapshots', 'prefixed_test.yaml'),
 ('yaml', 'test_yaml.py')]

In [19]:
tuple(map(str, indexed_files_as_parts[5]))

('workflow', 'fixtures', 'test1.ipynb')

[('demo', '__pycache__', 'test_demo.cpython-311-pytest-7.4.3.pyc'),
 ('demo', 'test_demo.py'),
 ('workflow', '__pycache__', 'conftest.cpython-311-pytest-7.4.3.pyc'),
 ('workflow', '__pycache__', 'test_workflow.cpython-311-pytest-7.4.3.pyc'),
 ('workflow', 'conftest.py'),
 ('workflow', 'fixtures', 'test1.ipynb'),
 ('workflow', 'fixtures', 'test2.ipynb'),
 ('workflow', 'fixtures', 'test3.ipynb'),
 ('workflow', 'fixtures', 'test4.ipynb'),
 ('workflow', 'fixtures', 'workflows.yaml'),
 ('workflow', 'snapshots', 'test.log'),
 ('workflow', 'test_workflow.py'),
 ('yaml', '__pycache__', 'conftest.cpython-311-pytest-7.4.3.pyc'),
 ('yaml', '__pycache__', 'test_yaml.cpython-311-pytest-7.4.3.pyc'),
 ('yaml', 'fixtures', 'config.yaml'),
 ('yaml', 'fixtures', 'test.yaml'),
 ('yaml', 'snapshots', 'prefixed_test.yaml'),
 ('yaml', 'test_yaml.py')]

In [4]:
Path('/'.join(indexed_files_as_parts[1]))

PosixPath('demo/__pycache__/test_demo.cpython-311-pytest-7.4.3.pyc')

In [5]:
config_dotlist = []

for index in indexed_files_as_parts:
	final_key = Path(index[-1]).stem.replace('.', '_')
	nonfinal_keys = [id.replace('.', '_') for id in index[:-1]]
	config_dotlist.append(
		'{key}={value}'.format(
			key='.'.join([*nonfinal_keys, final_key]),
			value='/'.join(index),
		),
	)

config_dotlist

['demo.test_demo=demo/test_demo.py',
 'demo.__pycache__.test_demo_cpython-311-pytest-7_4_3=demo/__pycache__/test_demo.cpython-311-pytest-7.4.3.pyc',
 'workflow.conftest=workflow/conftest.py',
 'workflow.test_workflow=workflow/test_workflow.py',
 'workflow.snapshots.test=workflow/snapshots/test.log',
 'workflow.fixtures.test1=workflow/fixtures/test1.ipynb',
 'workflow.fixtures.test2=workflow/fixtures/test2.ipynb',
 'workflow.fixtures.test4=workflow/fixtures/test4.ipynb',
 'workflow.fixtures.workflows=workflow/fixtures/workflows.yaml',
 'workflow.fixtures.test3=workflow/fixtures/test3.ipynb',
 'workflow.__pycache__.conftest_cpython-311-pytest-7_4_3=workflow/__pycache__/conftest.cpython-311-pytest-7.4.3.pyc',
 'workflow.__pycache__.test_workflow_cpython-311-pytest-7_4_3=workflow/__pycache__/test_workflow.cpython-311-pytest-7.4.3.pyc',
 'yaml.test_yaml=yaml/test_yaml.py',
 'yaml.snapshots.prefixed_test=yaml/snapshots/prefixed_test.yaml',
 'yaml.fixtures.config=yaml/fixtures/config.yaml',
 

In [6]:
cfg = OmegaConf.from_dotlist(config_dotlist)
cfg

{'demo': {'test_demo': 'demo/test_demo.py', '__pycache__': {'test_demo_cpython-311-pytest-7_4_3': 'demo/__pycache__/test_demo.cpython-311-pytest-7.4.3.pyc'}}, 'workflow': {'conftest': 'workflow/conftest.py', 'test_workflow': 'workflow/test_workflow.py', 'snapshots': {'test': 'workflow/snapshots/test.log'}, 'fixtures': {'test1': 'workflow/fixtures/test1.ipynb', 'test2': 'workflow/fixtures/test2.ipynb', 'test4': 'workflow/fixtures/test4.ipynb', 'workflows': 'workflow/fixtures/workflows.yaml', 'test3': 'workflow/fixtures/test3.ipynb'}, '__pycache__': {'conftest_cpython-311-pytest-7_4_3': 'workflow/__pycache__/conftest.cpython-311-pytest-7.4.3.pyc', 'test_workflow_cpython-311-pytest-7_4_3': 'workflow/__pycache__/test_workflow.cpython-311-pytest-7.4.3.pyc'}}, 'yaml': {'test_yaml': 'yaml/test_yaml.py', 'snapshots': {'prefixed_test': 'yaml/snapshots/prefixed_test.yaml'}, 'fixtures': {'config': 'yaml/fixtures/config.yaml', 'test': 'yaml/fixtures/test.yaml'}, '__pycache__': {'conftest_cpython-3

In [7]:
OmegaConf.save(cfg, 'abc.yaml')

In [8]:
# cfg['workflow']['fixtures']['test1'] == 'test1.ipynb

In [9]:
from bluprint.config import load_config_yaml, load_data_yaml

cfg = load_config_yaml('test.yaml', config_dir='tests/yaml/fixtures/')
cfg

{'groups': {'g1': {'opt11': 1, 'opt12': 'a'}, 'opt2': '/b/', 'opt3': ['c', 'd']}, 'list': ['1', '2', {'3': ['/a/3a', '3b/3b2', '3c', 's3://example-bucket/path/to/object']}]}

In [10]:
type(cfg['groups']['opt2'])

str

In [11]:
Path('../demo/').parts

('..', 'demo')

In [12]:
Path('../demo/somewhere/folder/data.txt').parts

('..', 'demo', 'somewhere', 'folder', 'data.txt')

In [13]:
Path('somewhere', 'folder', 'data.txt')

PosixPath('somewhere/folder/data.txt')