In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Sasu4\\SHIP_Classification_using_Resnet\\research'

In [5]:
os.chdir("../")

In [29]:
%pwd

'c:\\Users\\Sasu4\\SHIP_Classification_using_Resnet'

In [7]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir :Path
    source_url:str
    local_data_file:Path
    unzip_dir :Path

In [44]:
from Ship_Classifier.constants import *
from Ship_Classifier.utils.common  import read_yaml,create_directories

In [46]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        # Load configurations and parameters from YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
         # Print the paths to check if they are correct
        print(f"CONFIG_FILE_PATH: {config_filepath}")
        print(f"PARAMS_FILE_PATH: {params_filepath}")

        # Check if the config file exists
        if not os.path.exists(config_filepath):
            raise FileNotFoundError(f"Config file not found at: {config_filepath}")
        if not os.path.exists(params_filepath):
            raise FileNotFoundError(f"Params file not found at: {params_filepath}")
        
         # Load the YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        # Create the root directory for artifacts if it doesn't exist
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        # Extract the data ingestion configuration from the loaded config
        data_ingestion_config = self.config.data_ingestion

        # Create the root directory for data ingestion if it doesn't exist
        create_directories([data_ingestion_config.root_dir])

        # Initialize and return a DataIngestionConfig object with the necessary paths and URLs
        return DataIngestionConfig(
            root_dir=data_ingestion_config.root_dir,
            source_URL=data_ingestion_config.source_URL,
            local_data_file=data_ingestion_config.local_data_file,
            unzip_dir=data_ingestion_config.unzip_dir  # Renamed to match "extract_dir"
        )

In [39]:
import os
import urllib.request as request
import zipfile
from Ship_Classifier import logger
from Ship_Classifier.utils.common import get_size

In [35]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config


    
    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            
            try:
                response = request.get(self.config.source_URL)
                response.raise_for_status()  # Raise an HTTPError for bad responses
                with open(self.config.local_data_file, 'wb') as f:
                    f.write(response.content)
                logger.info(f"{self.config.local_data_file} downloaded successfully!")
            except request.exceptions.RequestException as e:
                logger.error(f"Failed to download the file. Error: {str(e)}")
                raise e
        else:
            file_size = self.get_size(Path(self.config.local_data_file))
            logger.info(f"File already exists with size: {file_size}") 


    
    def extract_zip_file(self):
        # Ensure the file exists before attempting to extract
        if os.path.exists(self.config.local_data_file):
            with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(self.config.extract_dir)
                logger.info(f"Extraction completed successfully into {self.config.extract_dir}")
            
            # Verifying the folders
            expected_folders = ['Cargo', 'Carrier', 'Cruise', 'Tanker', 'Military']
            for folder in expected_folders:
                folder_path = Path(self.config.extract_dir) / folder
                if folder_path.exists() and folder_path.is_dir():
                    logger.info(f"Folder '{folder}' exists.")
                else:
                    logger.warning(f"Folder '{folder}' is missing!")

        else:
            logger.error("Zip file does not exist. Please check the download step.")

In [47]:
# Pipeline
try:
    # Initialize the configuration manager
    config = ConfigurationManager()
    
    # Retrieve the data ingestion configuration
    data_ingestion_config = config.get_data_ingestion_config()
    
    # Initialize the DataIngestion process with the retrieved configuration
    data_ingestion = DataIngestion(config=data_ingestion_config)
    
    # Download the file if it doesn't already exist locally
    data_ingestion.download_file()
    
    # Extract the downloaded zip file to the specified directory
    data_ingestion.extract_zip_file()

except Exception as e:
    # Catch and raise any exceptions with additional context
    raise Exception(f"Error in the pipeline execution: {str(e)}") from e


[2024-08-20 08:43:21,368: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-20 08:43:21,370: INFO: common: yaml file: params.yaml loaded successfully]


Exception: Error in the pipeline execution: yaml file is empty