In [3]:
import os

In [None]:
%pwd

In [5]:
os.chdir("../")

In [None]:
%pwd

# Entity

In [7]:
from dataclasses import dataclass
from pathlib import Path

In [8]:
@dataclass(frozen=True)
class DataIngestionEntity:
    encrypted_dataset: Path
    zip_dataset: Path
    unzip_dir: Path
    password: str

In [9]:
from pathlib import Path
from lesionSeg.constant import *
from lesionSeg.Utils.common import read_yaml, create_directory

In [10]:
class ConfiguratioManager:
    def __init__(self, parmas_file = PARAMS_FILE_PATH, config_file = CONFIG_FILE_PATH):
        self.parmas = read_yaml(parmas_file)
        self.config = read_yaml(config_file)

        create_directory([self.config.artifact_root])

    def data_ingestion_config(self) -> DataIngestionEntity:
        config = self.config.data_ingestion
        secret = read_yaml(Path(config.secret_dir))

        data_ingestion = DataIngestionEntity(
            encrypted_dataset = Path(config.encrypted_dataset),
            zip_dataset = Path(config.zip_dataset),
            unzip_dir = Path(config.unzip_dir),
            password = secret.dataset_password,
        )

        return data_ingestion


In [11]:
import shutil
import sys
import tarfile
from lesionSeg.Exception.exception import CustomeException
from lesionSeg.logging import logger
import subprocess

In [14]:
class DataIngestion:
    def __init__(self, config: DataIngestionEntity):
        self.config = config
        
    def rename_folder(self, unzip_dir):
        for file in os.listdir(unzip_dir):   
            if "ATLAS" in file:
                os.rename(f"{unzip_dir}/{file}", f"{unzip_dir}/data_ingestion")
                logger.info(f"File Name Changed from {file} to data_ingestion")
                break
        

    def decrypt_dataset(self, unzip_dir, zip_file):
        file_name = self.config.encrypted_dataset
        try:
            if not shutil.which('openssl'):
                message = "OpenSSL is not Installed. Please Installed OpenSSL"
                raise ModuleNotFoundError(message)
            else:
                password = self.config.password
                cmd = [
                    "openssl",
                    "enc",            
                    "-aes-256-cbc",
                    "-md", "sha256",
                    "-d",             
                    "-a",             
                    "-in", file_name,
                    "-out", zip_file,
                    "-pass", f"pass:{password}"
                ]

                result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
                logger.info(f"Dataset Decrypted Successfully at: {unzip_dir}{zip_file}")

        except Exception as e:
            raise CustomeException(e,sys)
        

    def extract_dataset(self):
        unzip_dir = self.config.unzip_dir
        zip_file = self.config.zip_dataset
        self.decrypt_dataset(unzip_dir, zip_file)

        # Unzip File
        try:
            file = tarfile.open(zip_file)
            file.extractall(unzip_dir) 
            file.close()
            logger.info(f"Dataset Unzipped at: {unzip_dir}")
        except Exception as e:
            raise CustomeException(e,sys)
        
        self.rename_folder(unzip_dir)
        

In [None]:
try:
    config = ConfiguratioManager()
    ingestion_config = config.data_ingestion_config()
    data_ingestion = DataIngestion(config = ingestion_config)
    data_ingestion.extract_dataset()
except Exception as e:
    raise e