In [1]:
import os
os.chdir("../")

In [46]:
from deepClassifier.utils import create_directories, read_yaml
from deepClassifier.config import *
from dataclasses import dataclass
from pathlib import Path
from ensure import ensure_annotations
from glob import glob

In [47]:
@dataclass(frozen=True)
class DataSplitConfig:
    raw_data_dir: Path
    train_data_dir: Path
    test_data_dir: Path
    split_ratio: float


# DataSplitConfig = ("DataSplitConfig", ["raw_data_dir", "train_data_dir", "eval_data_dir"])

In [60]:
class ConfigurationManager:
    @ensure_annotations
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH, 
        secrets_filepath=SECRETS_FILE_PATH):
        self.config = read_yaml(path_to_yaml=config_filepath)
        self.params = read_yaml(path_to_yaml=params_filepath)
        self.secrets = read_yaml(path_to_yaml=secrets_filepath)
        create_directories([self.config.artifacts_root])

    @ensure_annotations
    def get_data_split_config(self) -> DataSplitConfig:

        config = self.config.data_split
        create_directories([config.train_data_dir, config.test_data_dir])
        data_split_config = DataSplitConfig(
            raw_data_dir=Path(self.config.data_ingestion.root_dir),
            train_data_dir=Path(config.train_data_dir),
            test_data_dir=Path(config.test_data_dir),
            split_ratio=self.params.SPLIT_RATIO
        )


        return data_split_config

In [61]:
config  =  ConfigurationManager()
base_model_config = config.get_data_split_config()

[2022-09-16 22:24:45,090: INFO: common]: yaml file: configs\config.yaml loaded successfully
[2022-09-16 22:24:45,093: INFO: common]: yaml file: params.yaml loaded successfully
[2022-09-16 22:24:45,096: INFO: common]: yaml file: configs\secrets.yaml loaded successfully
[2022-09-16 22:24:45,097: INFO: common]: created directory at: artifacts
[2022-09-16 22:24:45,099: INFO: common]: created directory at: artifacts/data_split/train
[2022-09-16 22:24:45,100: INFO: common]: created directory at: artifacts/data_split/test


In [70]:
class DataSplit:
    @ensure_annotations
    def __init__(self,config: DataSplitConfig):
        self.config = config

    def split_data(self):
        paths = glob(f"{self.config.raw_data_dir}/*/*/*")
        print(len(paths), paths[0], paths[-1])
        # print(paths)

In [71]:
obj = DataSplit(base_model_config)
obj.split_data()

24998 artifacts\data_ingestion\PetImages\Cat\0.jpg artifacts\data_ingestion\PetImages\Dog\9999.jpg
