In [71]:
import yaml
import pathlib as pl
import queue
import enum
import dataclasses

refcon_repo = pl.Path("/home/ebertp/work/code/cubi/reference-container")
refcon_file = refcon_repo.joinpath("config", "ref_container", "ncbi-mm11-GRCm39_v0.yaml")

DIR_RES = pl.Path("results")
DIR_PROC = pl.Path("proc")


class ReferenceProvider(enum.Enum):
    ftp = 0
    FTP = 0
    sftp = 1
    SFTP = 1
    aws = 2
    amazon = 2
    AWS = 2
    gcp = 3
    google = 3
    gcloud = 3
    GCP = 3
    local = 100
    LOCAL = 100
    localhost = 100
    module = 1000
    MODULE = 1000


class GetterOperation(enum.Enum):
    download = 0
    load = 0
    LOAD = 0
    rename = 1
    RENAME = 1
    decompress = 2
    inflate = 2
    DECOMPRESS = 2
    extract = 3
    EXTRACT = 3
    compress = 4
    deflate = 4
    COMPRESS = 4
    derive = 5
    DERIVE = 5
    module = 6
    MODULE = 6

    
class ShellBuilder:
    pass
    
    


class ReferenceFile:
    __slots__ = (
        "container",
        "source_num",
        "provider",
        "uri",
        "parent",
        "getter",
        "name",
        "alias1",
        "alias2",
        "local_path",
        "shell"
    )
    
    def __init__(self, file_spec, container, source_num, provider, uri, no_payload=False):
        self.container = container
        transformation = self._split_file_spec(file_spec)
        self._parse_transformation(transformation, provider, uri)
        if transformation is not None:
            self.parent = None
            self.shell = None
        else:
            self.parent = None
            self.shell = None
        if no_payload:
            self.local_path = DIR_PROC.joinpath(container, self.name)
        else:
            self.local_path = DIR_RES.joinpath(container, "payload", self.name)
        return None

    def __str__(self):
        info_string = f"Reference file: {self.name}"
        return info_string
        
    def _split_file_spec(self, file_spec):
        if isinstance(file_spec[-1], dict):
            transformation = file_spec[-1]
            identifier = file_spec[:-1]
        else:
            transformation = None
            identifier = file_spec
        self.name = identifier[0]
        try:
            self.alias1 = identifier[1]
        except IndexError:
            self.alias1 = None
        try:
            self.alias2 = identifier[2]
        except IndexError:
            self.alias2 = None
        return transformation
    
    def _parse_transformation(self, transformation_spec, provider, uri):
        if provider == ReferenceProvider.MODULE:
            self.getter = GetterOperation.MODULE
        elif transformation_spec is None:
            self.getter = GetterOperation.LOAD
        else:
            transform_op = list(transformation_spec.keys())[0]
            transform_info = transformation_spec[transform_op]
            self.getter = GetterOperation[transform_op]
            if self.getter = GetterOperation.RENAME:
            
        
            

class ReferenceContainer:
    __slots__ = (
        "name",
        "version",
        "full_name",
        "id",
        "wd",
        "base_image",
        "labels",
        "sources",
        "payload"
    )

    def __init__(self, work_dir, config_record):
        self.name = config_record["name"]
        self.version = int(config_record["version"])
        self.full_name = f"{self.name}_v{self.version}"
        self.id = self.full_name
        self.wd = work_dir
        self.base_image = config_record["base_image"]
        self.labels = config_record["labels"]
        self._parse_sources_listing(config_record["sources"])
        self.payload = [source_file.local_path for source_file in self.sources]
        return None
    
    def _parse_sources_listing(self, data_sources):
        
        prio_sources = queue.PriorityQueue()
        for source_num, data_source in enumerate(data_sources, start=1):
            provider = ReferenceProvider[data_source["provider"]]
            if provider == ReferenceProvider.LOCAL:
                uri = "localhost"
            else:
                uri = data_source["prefix"]
            prio_sources.put(
                (
                    provider.value,
                    (
                        source_num,
                        provider,
                        uri,
                        data_source["files"]
                    )
                ) 
            )
            
        all_source_files = []
        while 1:
            try:
                prio, source_spec = prio_sources.get_nowait()
                sources = self._parse_source_spec(*source_spec)
                all_source_files.extend(sources)
            except queue.Empty:
                break
        self.sources = all_source_files
        return

    def _parse_source_spec(self, num, provider, location, files):
        
        source_files = []
        for file_spec in files:
            ref_file = ReferenceFile(
                file_spec,
                self.id,
                num, provider, location
            )
            source_files.append(ref_file)
            print(dir(ref_file))
            if ref_file.parent is not None:
                parent_ref_file = ReferenceFile(
                    [ref_file.parent],
                    self.id,
                    num, provider, location,
                    no_payload=True
                )
                
        return source_files
        
    
    def get_base_image(self, as_path=False):
        if as_path:
            rel_path = pl.Path("container", f"{self.base_image}.sif")
            base_image = self.wd.joinpath(rel_path)
        else:
            base_image = self.base_image
        return base_image


content = yaml.load(open(refcon_file, "rb"), Loader=yaml.SafeLoader)

for key, values in content.items():
    if key.startswith("metadata_"):
        refcon = ReferenceContainer(pl.Path(".").resolve(), values)



GetterOperation.decompress
['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_parse_transformation', '_split_file_spec', 'alias1', 'alias2', 'container', 'getter', 'local_path', 'name', 'parent', 'provider', 'shell', 'source_num', 'uri']
GetterOperation.rename
['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_parse_transformation', '_split_file_spec', 'alias1', 'alias2', 'container', 'getter', 'local_path', 'name', 'parent', 'provi