In [13]:
import os

In [14]:
%pwd

'c:\\Users\\Jawad Z\\Desktop\\Chicken_Diesease_Prediction\\test\\chicken_Disease_Classification'

In [15]:
os.chdir("../")

In [16]:
%pwd

'c:\\Users\\Jawad Z\\Desktop\\Chicken_Diesease_Prediction\\test'

In [None]:
# ============================================================
# 🧩 Step 4: Entity Definition — Data Ingestion Configuration
# ------------------------------------------------------------
# This dataclass represents the configuration settings for the
# Data Ingestion stage of the pipeline. It defines the paths
# and parameters that will be used to download and extract data.
# ============================================================

from dataclasses import dataclass
from pathlib import Path

# The `frozen=True` parameter makes the dataclass immutable,
# meaning its values cannot be changed after creation.
@dataclass(frozen=True)
class DataIngestionConfig:
    # Directory where all data ingestion artifacts will be stored
    root_dir: Path

    # URL from which raw data will be downloaded
    source_URL: str

    # Local path where the downloaded data file will be saved
    local_data_file: Path

    # Directory where the downloaded file will be extracted
    unzip_dir: Path


In [18]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml
from cnnClassifier.utils.common import create_directories

In [None]:
# ============================================================
# 🧩 Step 5: Configuration Manager
# ------------------------------------------------------------
# This component reads configuration files (config.yaml,
# params.yaml), creates required directories, and returns
# configuration objects (entities) for use in each pipeline stage.
# ============================================================

from cnnClassifier.utils.common import read_yaml, create_directories
from cnnClassifier.entity.config_entity import DataIngestionConfig
from cnnClassifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH


class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        # Load configuration and parameters from YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        # Create the root artifacts directory
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        # Extract data ingestion-related config from YAML
        config = self.config.data_ingestion

        # Ensure the directory for data ingestion artifacts exists
        create_directories([config.root_dir])

        # Create and return a DataIngestionConfig entity object
        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_URL=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
        )

        return data_ingestion_config


In [20]:
import os
import urllib.request as request
import zipfile
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size

In [21]:
# import os
# import shutil
# import zipfile
# from urllib import request  # For potential URL handling
# # ... other imports like logger, Path, get_size, etc.

# class DataIngestion:
#     def __init__(self, config: DataIngestionConfig):
#         self.config = config

#     def ingest_data(self):
#         source = self.config.source_URL
#         target_dir = self.config.unzip_dir
#         os.makedirs(target_dir, exist_ok=True)

#         if os.path.isdir(source):
#             # Copy folder contents directly to target_dir
#             for item in os.listdir(source):
#                 s = os.path.join(source, item)
#                 d = os.path.join(target_dir, item)
#                 if os.path.isdir(s):
#                     shutil.copytree(s, d, dirs_exist_ok=True)
#                 else:
#                     shutil.copy2(s, d)
#             logger.info(f"Copied contents from local folder {source} to {target_dir}")
#         elif os.path.isfile(source):
#             # Copy file (assuming ZIP) and extract
#             local_zip = self.config.local_data_file
#             if not os.path.exists(local_zip):
#                 shutil.copyfile(source, local_zip)
#                 logger.info(f"Copied local file {source} to {local_zip}")
#             else:
#                 logger.info(f"File already exists: {local_zip}")
            
#             # Extract
#             with zipfile.ZipFile(local_zip, 'r') as zip_ref:
#                 zip_ref.extractall(target_dir)
#             logger.info(f"Extracted ZIP to {target_dir}")
#         else:
#             raise FileNotFoundError(f"Source not found or invalid: {source}")

# # In your main try block, change the calls to:
# try:
#     config = ConfigurationManager()
#     data_ingestion_config = config.get_data_ingestion_config()
#     data_ingestion = DataIngestion(config=data_ingestion_config)
#     data_ingestion.ingest_data()  # New method that handles both cases
# except Exception as e:
#     raise e

In [22]:
# try:
#     config = ConfigurationManager()
#     data_ingestion_config = config.get_data_ingestion_config()
#     data_ingestion = DataIngestion(config=data_ingestion_config)
#     data_ingestion.download_file()
#     data_ingestion.extract_zip_file()
# except Exception as e:
#     raise e

In [None]:
# ============================================================
# 🧩 Step 6: Component — Data Ingestion
# ------------------------------------------------------------
# This component handles the actual logic for downloading and
# extracting the dataset. It uses the configuration provided by
# the ConfigurationManager (via the DataIngestionConfig entity).
# ============================================================

import os
import zipfile
from pathlib import Path
from urllib import request
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size
from cnnClassifier.entity.config_entity import DataIngestionConfig


class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        # Store the configuration object (entity)
        self.config = config

    def download_file(self):
        """
        Downloads the dataset from the provided source URL if it
        doesn't already exist in the local directory.
        """
        if not os.path.exists(self.config.local_data_file):
            # Download the dataset
            filename, headers = request.urlretrieve(
                url=self.config.source_URL,
                filename=self.config.local_data_file
            )
            logger.info(f"{filename} downloaded successfully!\nInfo: {headers}")
        else:
            # Log file existence and its size
            logger.info(
                f"File already exists. Size: {get_size(Path(self.config.local_data_file))}"
            )

    def extract_zip_file(self):
        """
        Extracts the downloaded zip file into the target directory.
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)

        # Extract the contents of the zip file
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)
        logger.info(f"Files extracted successfully to {unzip_path}")


In [None]:
# ============================================================
# 🧩 Step 7: Pipeline — Data Ingestion Stage
# ------------------------------------------------------------
# This pipeline script orchestrates the Data Ingestion process.
# It connects the ConfigurationManager (to get config data)
# with the DataIngestion component (that performs the actions).
# ============================================================

from cnnClassifier.config.configuration import ConfigurationManager
from cnnClassifier.components.data_ingestion import DataIngestion
from cnnClassifier import logger

try:
    # Initialize the configuration manager to read YAML configs
    config = ConfigurationManager()

    # Retrieve the Data Ingestion configuration entity
    data_ingestion_config = config.get_data_ingestion_config()

    # Create the DataIngestion component with the retrieved config
    data_ingestion = DataIngestion(config=data_ingestion_config)

    # Execute the data ingestion steps
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()

    logger.info("Data Ingestion stage completed successfully!")

except Exception as e:
    logger.exception(e)
    raise e


[2025-10-21 21:33:08,835: INFO: common: YAML file: config\config.yaml loaded successfully]
[2025-10-21 21:33:08,839: INFO: common: YAML file: params.yaml loaded successfully]
[2025-10-21 21:33:08,841: INFO: common: Created directory at: artifacts]
[2025-10-21 21:33:08,843: INFO: common: Created directory at: artifacts/data_ingestion]
[2025-10-21 21:33:19,317: INFO: 4037744573: artifacts/data_ingestion/data.zip download! with following info: 
Content-type: application/x-zip-compressed
Content-length: 1719723149
Last-modified: Tue, 30 Sep 2025 15:23:56 GMT

]
