## this notebook is an adjucnt to a design discussion in [How to Refactor Armory](https://docs.google.com/document/d/1MJwGz7UzuVOhbEEPqij4GmGGtK2QV46b9N-sY6xIFOM/edit?usp=sharing) and will not make sense without it.

In [24]:
# Inner Loop Example

from pathlib import Path

def length_of(path):
    return len(open(path).readlines())

def sum_lines_dir(dir):
    """Return the sum of all lines in all files in dir."""
    count = 0
    for file in dir.glob('*'):
        count += length_of(file)
    return count

dir = Path("/etc/apt/sources.list.d")
print(f"sum_lines_dir({dir})  = {sum_lines_dir(dir)}")

sum_lines_dir(/etc/apt/sources.list.d)  = 156


In [25]:
# Inner Loop Made Outer

import random

def sum_line_count(files: list):
    return sum([length_of(file) for file in files])

files = list(pathlib.Path('/etc/apt/sources.list.d').glob('*'))

# we want the whole batch
print("sum line counts of all files", sum_line_count(files))

# now we want to process only a little sample of the files
for file in random.sample(files, k=3):
    print("small sample line count for", file, sum_line_count([file]))

sum line counts of all files 156
small sample line count for /etc/apt/sources.list.d/mmstick76-ubuntu-alacritty-focal.list 2
small sample line count for /etc/apt/sources.list.d/openshot_developers-ubuntu-ppa-bionic.list.save 2
small sample line count for /etc/apt/sources.list.d/kxstudio-debian.gcc5.list.dpkg-new 8


In [26]:
# Refactor Ontology

from dataclasses import dataclass

class Model:
    def train(self, x, y):
        return x * y

class Dataset:
    def training(self):
        return range(4)

class Weights:
    pass

@dataclass
class Run:
    model: Model
    dataset: Dataset

    def train(self) -> Weights:
        return self.model.train(self.dataset.training())

    def pipeline(self, model: Model, dataset: Dataset):
        self.weights = Weights.load(model, dataset) or self.train()
        Weights.store(model, dataset, self.weights) 