In [1]:
import os

In [4]:
%pwd

'e:\\progs\\Kidney_Disease_Classification_DL'

In [3]:
os.chdir("../")

In [46]:
from dataclasses import dataclass
from pathlib import Path

#dataclass to assign above any python class to consider it as an entity. It will not like be typical py class but like variables that can be acceses from other files

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [47]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [48]:
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath= PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        
        create_directories([self.config.artifacts_root])
        
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        
        create_directories([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_URL = config.source_URL,
            local_data_file = config.local_data_file,
            unzip_dir = config.unzip_dir
        )
        return data_ingestion_config
            
    

In [49]:
import os
import zipfile
import gdown
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size

In [50]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def download_file(self) -> str:
        
        #Fetch data from url
        
        try:
            dataset_url = self.config.source_URL
            zip_download_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")
            
            file_id = dataset_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id, zip_download_dir)
            
            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")
            
        except Exception as e:
            raise e
        
    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function return None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)
            
            

In [51]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2024-11-03 18:41:11,527: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-11-03 18:41:11,529: INFO: common: yaml file: params.yaml loaded successfully]
[2024-11-03 18:41:11,530: INFO: common: created directory at: artifacts]
[2024-11-03 18:41:11,532: INFO: common: created directory at: artifacts/data_ingestion]
[2024-11-03 18:41:11,533: INFO: 2288733769: Downloading data from https://drive.google.com/file/d/1kQlUBk_BVEJLUNKdEUcNaFIayBitxVH4/view?usp=sharing into file artifacts/data_ingestion/data.zip]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1kQlUBk_BVEJLUNKdEUcNaFIayBitxVH4
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1kQlUBk_BVEJLUNKdEUcNaFIayBitxVH4&confirm=t&uuid=ac75a986-93b0-4827-a2b2-1702d7b5e425
To: e:\progs\Kidney_Disease_Classification_DL\artifacts\data_ingestion\data.zip
100%|██████████| 940M/940M [01:26<00:00, 10.8MB/s] 

[2024-11-03 18:42:42,712: INFO: 2288733769: Downloaded data from https://drive.google.com/file/d/1kQlUBk_BVEJLUNKdEUcNaFIayBitxVH4/view?usp=sharing into file artifacts/data_ingestion/data.zip]



