In [1]:
import os

In [2]:
pwd

'c:\\Users\\harsh\\OneDrive\\Desktop\\Job\\ResumeProjects\\SterileBrowsing\\SterileBrowsingMedicalImgs\\research'

In [3]:
os.chdir('../')

In [4]:
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [5]:
from cnnImageClassification.constants import *
from cnnImageClassification.utils.common import read_yaml, create_directories

In [6]:
class ConfigManager:
    def __init__(
            self, 
            config__filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config__filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root]) 
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        return DataIngestionConfig(
            root_dir = config.root_dir,
            source_URL = config.source_URL,
            local_data_file = config.local_data_file,
            unzip_dir = config.unzip_dir
        )

In [7]:
import os
import gdown
import urllib.request as request
import zipfile
from cnnImageClassification import logger
from cnnImageClassification.utils.common import get_size

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
    

    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            gdown.download(f'https://drive.google.com/uc?id={self.config.source_URL.split("/")[-2]}', self.config.local_data_file, quiet=False)
            logger.info(f"Downloading file{self.config.local_data_file}")
        else:
            print("self.config.source_URL", self.config.source_URL.split('view')[-1])
            logger.info(f"File already exists with size {get_size(Path(self.config.local_data_file))}")
    
    def extract_zip_file(self):
        """
        zip_file_path: str
        Download the zip file from the source URL
        return None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)        



In [9]:
try:
    config = ConfigManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e


[2024-10-28 18:14:59,938: INFO: common{c:\users\harsh\onedrive\desktop\job\resumeprojects\sterilebrowsing\sterilebrowsingmedicalimgs\src\cnnImageClassification\utils\common.py: Reading yaml file from config\config.yaml loaded suucessfully]
[2024-10-28 18:14:59,940: INFO: common{c:\users\harsh\onedrive\desktop\job\resumeprojects\sterilebrowsing\sterilebrowsingmedicalimgs\src\cnnImageClassification\utils\common.py: Reading yaml file from params.yaml loaded suucessfully]
[2024-10-28 18:14:59,941: INFO: common{c:\users\harsh\onedrive\desktop\job\resumeprojects\sterilebrowsing\sterilebrowsingmedicalimgs\src\cnnImageClassification\utils\common.py: Directory created at artifacts]
[2024-10-28 18:14:59,942: INFO: common{c:\users\harsh\onedrive\desktop\job\resumeprojects\sterilebrowsing\sterilebrowsingmedicalimgs\src\cnnImageClassification\utils\common.py: Directory created at artifacts/data_ingestion]


Downloading...
From (original): https://drive.google.com/uc?id=1DXvi_5cuMNIkjqWhEERPOvZEwme2EjeM
From (redirected): https://drive.google.com/uc?id=1DXvi_5cuMNIkjqWhEERPOvZEwme2EjeM&confirm=t&uuid=bf890241-a627-4885-a7ab-2f2c9aebdae4
To: c:\Users\harsh\OneDrive\Desktop\Job\ResumeProjects\SterileBrowsing\SterileBrowsingMedicalImgs\artifacts\data_ingestion\Dataset.zip
100%|██████████| 51.7M/51.7M [00:00<00:00, 55.4MB/s]

[2024-10-28 18:15:05,012: INFO: 1085507142{C:\Users\harsh\AppData\Local\Temp\ipykernel_20260\1085507142.py: Downloading fileartifacts/data_ingestion/Dataset.zip]



