In [2]:
import pathlib
from dataclasses import dataclass

In [40]:
@dataclass
class DataFiles:
    """Structured datatype representation that contains all files in a selected dataset

    Attributes:
    -----------
    metdata: str
        path to metdata directory
    plate_data: list[str]
        list of plate data (parque or sqlite files)


    """

    dataset_dir: str | pathlib.Path

    # extracted files
    metadata: str = None
    plate_data: list[str] = None
    barcode: str = None

    # extracting file paths and setting into dataclass attributes
    def __post_init__(self):
        self._extract_content_files()

    def _extract_content_files(self):
        """extracts all files within given dataset folder and sets the DataFile dataclass 
        attributes

        Raises
        ------
        TypeError
            raised if dataset_dir is not a str or pathlib.Path object.
            raised if plate data is not parquet or sqlite file
        """
        accepted_plate_ext = ("parquet", "sqlite")

        # get all top level files
        if not isinstance(self.dataset_dir, (str, pathlib.Path)):
            raise TypeError(
                "dataset_dir must be a string or pathlib.Path object"
            )
        if isinstance(self.dataset_dir, str):
            self.dataset_dir = pathlib.Path(self.dataset_dir).resolve(
                strict=True
            )

        # get all files
        all_files = list(self.dataset_dir.glob("*"))

        # get data files 
        plate_data = [str(fpath.name) for fpath in all_files if fpath.suffix == ".parquet" or fpath.suffix == ".sqlite"]
        self.plate_data = plate_data

        # get metadata_dir
        meta_data_path = [str(fpath.name) for fpath in all_files if fpath.is_dir()]
        self.metadata = meta_data_path[0]

        # get barcode 
        barcode_path = [str(fpath.name) for fpath in all_files if fpath.suffix == ".txt"]
        self.barcode = barcode_path[0]

        

In [41]:
data_test = pathlib.Path("./datasets/standard_sqlite").resolve(strict=True)
data_test

PosixPath('/home/erikserrano/Development/CytoSnake/cytosnake/tests/functional/datasets/standard_sqlite')

In [42]:
df = DataFiles(dataset_dir=data_test)

In [45]:

df.metadata

'metadata'