In [1]:
import os

In [2]:
%pwd

'c:\\Users\\gupta\\Documents\\GitHub\\Brain_Tumor_Detection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\gupta\\Documents\\GitHub\\Brain_Tumor_Detection'

This script is designed to handle the data ingestion process, which involves downloading a ZIP file from a URL, saving it locally, and then extracting its contents to a specified directory

In [5]:
'''
@dataclass(frozen=True): This decorator creates an immutable class where instances are read-only once created.
DataIngestionConfig: This class holds the configuration needed for data ingestion. It stores:
root_dir: The root directory for storing data.
source_URL: The URL from which data will be downloaded.
local_data_file: The path where the downloaded file will be saved.
unzip_dir: The directory where the extracted files will be stored.

'''

#For Modular code: Copy this part in config_entity.py file present in entity folder
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [6]:
#For Modular code: Copy this part in configuration.py file present in config folder
from CNN_Classifier.constants import *
from CNN_Classifier.utils.common import read_yaml, create_directories

In [7]:
'''
config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH : This line is for getting the file path for both yaml files
self.config = read_yaml(config_filepath): This part reads the yaml file returns the content of the file in the form of ConfigBox. 
create_directories([self.config.artifacts_root]): As from above we got contents of the yaml file in the type of ConfigBox and not dictionary
                                                  and saved it in the variable "self.config" so now we can access anything in the file using
                                                  the "key" only no need for indices. So here we get value of key: artificats_root and create 
                                                  directory whose name is that  value"


'''
#For Modular code: Copy this part in configuration.py file present in config folder
#Update the entity
#This class manages configurations by reading them from YAML files.
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,         
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    #This method extracts the data ingestion-related configuration from the YAML file.
    def get_data_ingestion_config(self) -> DataIngestionConfig:   #This is for getting the all the configuration related to our data 
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,                                     #For storing all the values in the variable
            source_URL=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir 
        )

        return data_ingestion_config

In [8]:
#For Modular code: Copy this part in data_ingestion.py file created in components folder
import os
import urllib.request as request
import zipfile
from src.CNN_Classifier.logger import logging
from CNN_Classifier.utils.common import get_size

In [9]:
#For Modular code: Copy this part in data_ingestion.py file created in components folder
#Update the Components
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config


    
    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url = self.config.source_URL,
                filename = self.config.local_data_file
            )
            logging.info(f"{filename} download! with following info: \n{headers}")
        else:
            logging.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")  


    
    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function returns None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)



In [10]:
##For Modular code: Copy this part in stage_01_data_ingestion.py file created in pipeline folder
#Update the pipeline
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2024-09-03 14:53:51,435: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-03 14:53:51,452: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-03 14:53:51,457: INFO: common: created directory at: artifacts]
[2024-09-03 14:53:51,462: INFO: common: created directory at: artifacts/data_ingestion]
[2024-09-03 14:53:57,459: INFO: 4095441442: artifacts/data_ingestion/data.zip download! with following info: 
Connection: close
Content-Length: 9802300
Cache-Control: max-age=300
Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox
Content-Type: application/zip
ETag: "35ecddc750a09a7099a4b1573c1d475a37fc6628327a7b76f44fb66b74fd5849"
Strict-Transport-Security: max-age=31536000
X-Content-Type-Options: nosniff
X-Frame-Options: deny
X-XSS-Protection: 1; mode=block
X-GitHub-Request-Id: 6D47:39CB17:4AF21:4E6C4:66D78570
Accept-Ranges: bytes
Date: Tue, 03 Sep 2024 21:53:53 GMT
Via: 1.1 varnish
X-Served-By: cache-lax-kwhp1940130-LAX
X-Cac