In [1]:
import os

In [2]:
%pwd #possitive writing directory

'd:\\project\\chicken-fecal-classification_project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\project\\chicken-fecal-classification_project'

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_dir: Path



In [6]:
from cnn_classifier.constants import *
from cnn_classifier.utils.common import read_yaml,create_directories

In [7]:
from pathlib import Path

class configurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
    ) -> None:
        self.config = read_yaml(config_filepath)
        self.param = read_yaml(params_filepath)

        # Debug print to verify structure of self.config
        print("Loaded config:", self.config)  # Check if 'data_ingestion' and 'artifacts_root' keys exist

        # Convert `artifacts_root` to Path object before passing to `create_directories`
        create_directories([Path(self.config["artifacts_root"])])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config["data_ingestion"]  # Use dictionary-style access
        return DataIngestionConfig(
             root_dir=Path(
                 config["root_dir"]),
                 source_url=config["source_url"],
                local_data_file=Path(config["local_data_file"]),
                unzip_dir=Path(config["unzip_dir"]),
           
            
            
        )
# root_dir=Path(config["root_dir"]),
#source_url=config["source_url"],
#local_data_file=Path(config["local_data_file"]),
#unzip_dir=Path(config["unzip_dir"]),

In [8]:
import os
import urllib.request as request
import zipfile 
from cnn_classifier import logger
from cnn_classifier.utils.common import get_size

In [9]:
import os
import zipfile
from pathlib import Path
import logging

# Assuming the logger is configured
logger = logging.getLogger(__name__)

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(self.config.source_url, self.config.local_data_file)
            logger.info(f"{filename} downloaded with the following info:\n{headers}")
        else:
            file_size = os.path.getsize(self.config.local_data_file)
            logger.info(f"File already exists of size: {file_size} bytes")

    def extract_zip_file(self):
        """
        Extract the zip file into the data directory.
        """
        # Ensure `unzip_path` is a Path object
        unzip_path = Path(self.config.unzip_dir)  # Explicitly cast to Path object
        print(f"Unzip path: {unzip_path}")  # Debug print
        print(f"Local data file: {self.config.local_data_file}")  # Debug print

        # Create the directory if it doesn't exist
        os.makedirs(unzip_path, exist_ok=True)
        print(f"Directory created or already exists at: {unzip_path}")  # Debug print

        # Check if the local data file exists before extracting
        if not os.path.exists(self.config.local_data_file):
            print(f"File does not exist: {self.config.local_data_file}")
            return

        try:
            with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
                zip_ref.extractall(unzip_path)  # Extract all files into the unzip path
                logger.info(f"Extracted zip file to: {unzip_path}")
        except Exception as e:
            print(f"Error during extraction: {e}")
            raise e  # Reraise exception to be caught elsewhere


In [10]:
try:
    config = configurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.extract_zip_file()

except Exception as e:
    raise e

[2024-11-13 19:16:30,970: common: YAML file: d:\project\chicken-fecal-classification_project\config\config.yaml loaded successfully]
[2024-11-13 19:16:30,972: common: Content of d:\project\chicken-fecal-classification_project\config\config.yaml: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_injection', 'source_url': 'https://github.com/itisha249/chicken-fecal-classification_project/raw/refs/heads/main/archive%20(1).zip', 'local_data_file': 'artifacts/data_injection/data.zip', 'unzip_dir': 'artifacts/data_injection'}}]
[2024-11-13 19:16:30,978: common: YAML file: d:\project\chicken-fecal-classification_project\param.yaml loaded successfully]
[2024-11-13 19:16:30,980: common: Content of d:\project\chicken-fecal-classification_project\param.yaml: {'parameters': {'key1': 'value1', 'key2': 'value2', 'key3': {'subkey1': 'subvalue1', 'subkey2': 'subvalue2'}}}]


Loaded config: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_injection', 'source_url': 'https://github.com/itisha249/chicken-fecal-classification_project/raw/refs/heads/main/archive%20(1).zip', 'local_data_file': 'artifacts/data_injection/data.zip', 'unzip_dir': 'artifacts/data_injection'}}
[2024-11-13 19:16:30,983: common: Created directory at: artifacts]
Unzip path: artifacts\data_injection
Local data file: artifacts\data_injection\data.zip
Directory created or already exists at: artifacts\data_injection
File does not exist: artifacts\data_injection\data.zip
