In [1]:
import os

In [2]:
%pwd

'f:\\MLOps\\MLOps\\Kidney-Decease-Classification-End-to-End-MLflow-DVC\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'f:\\MLOps\\MLOps\\Kidney-Decease-Classification-End-to-End-MLflow-DVC'

## Entity

In [11]:
# Entity is return type of function


from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen = True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path
    

## Configuration 

In [12]:
from cnnClassifier.constants import *
from cnnClassifier.utils.utils import read_yaml, create_directories
from cnnClassifier.exception import CustomException
import sys
from cnnClassifier.logger import logging

In [13]:
class ConfigurationManager:
    
    """
    Manages the Configuration settings for the project.
    
    Reads the configurations form YAML files and sets up directories.
    """
    

    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        
        """
        Initialized the ConfigurationManager.
        
        Args:
            config_filepath (str): Path to the configuration YAML file.
            params_filepath (str): Path to the paramters YAML file
        """
        
        try:
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
        
        except Exception as e:
            logging.error(f"Failed to read YAML files: {e}")
            raise CustomException(e, sys)
        
        create_directories([self.config.artifacts_root])
        
        
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        
        """
        Retrieves and sets up the data ingestion configuration.
        
        Creates the necessary directories for data ingestion and returns a 
        DataIngestionConfig object containing configuration details.
        
        Returns:
            DataIngestionConfig: Configuration object for data ingestion.
        
        Raises:
            CustomException: If there is an error while creating directories.
        """
        
        try:
            config = self.config.data_ingestion
        
            create_directories([config.root_dir])
            data_ingestion_config = DataIngestionConfig(
                root_dir = config.root_dir,
                source_URL=config.source_URL,
                local_data_file=config.local_data_file,
                unzip_dir=config.unzip_dir
            )
            
            return data_ingestion_config

        except Exception as e:
            raise CustomException(e, sys)

## Components

In [19]:
import os 
import zipfile
import gdown
from cnnClassifier.logger import logger
from cnnClassifier.utils.utils import get_size

In [20]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def download_file(self) -> str:
        """
        Fetch the data from url
        """
        
        try: 
            dataset_url = self.config.source_URL
            zip_downlaod_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok = True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_downlaod_dir}")
            
            file_id = dataset_url.split("/")[-2]
            prefix = "https://drive.google.com/uc?/export=download&id="
            gdown.download(prefix + file_id, zip_downlaod_dir)
            
            logger.info(f"Downlaoded data from {dataset_url} into file {zip_downlaod_dir}")
            
        except Exception as e:
            raise CustomException(e, sys)
        
    def extract_zip_file(self):
        
        """
        zip_file_path: str
        Extarcts the zip file into the data direcotry
        Function returns None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)
            

## Pipeline

In [21]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config = data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
    
except Exception as e:
    raise CustomException(e,sys)

Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=bcf90125-4ce1-4eb3-9f60-ee3c08621859
To: f:\MLOps\MLOps\Kidney-Decease-Classification-End-to-End-MLflow-DVC\artifacts\data_ingestion\data.zip
100%|██████████| 57.7M/57.7M [02:05<00:00, 461kB/s]
