In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Sasu4\\SHIP_Classification_using_Resnet\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Sasu4\\SHIP_Classification_using_Resnet'

In [5]:
from dataclasses import dataclass
from pathlib import Path
from typing import List

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir :Path
    source_URL:str
    local_data_file:Path
    unzip_dir :Path
    class_dirs: List[str]

In [7]:

from Ship_Classifier.constants import *
from Ship_Classifier.utils.common  import read_yaml,create_directories

In [8]:

#def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
print( read_yaml(CONFIG_FILE_PATH))
print(f"CONFIG_FILE_PATH: {read_yaml(CONFIG_FILE_PATH)}")
    


[2024-08-28 10:51:42,451: INFO: common: yaml file: config\config.yaml loaded successfully]
{'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://github.com/anishmaks/SHIP_Classification_using_Resnet/raw/main/Images.zip', 'local_data_file': 'artifacts/data_ingestion/Images.zip', 'unzip_dir': 'artifacts/data_ingestion/extracted_data', 'class_dirs': ['artifacts/data_ingestion/extracted_data/Carrier', 'artifacts/data_ingestion/extracted_data/Cruise', 'artifacts/data_ingestion/extracted_data/Cargo', 'artifacts/data_ingestion/extracted_data/Military', 'artifacts/data_ingestion/extracted_data/Tanker']}, 'prepare_base_model': {'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.pth', 'updated_base_model_path': 'artifacts/prepare_base_model/base_model_updated.pth', 'params_classes': 5, 'freeze_all': True, 'freeze_till': 2, 'params_learning_rate': 0.001}, 'prepare_callbacks': {'root_di

In [9]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        # Load configurations and parameters from YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
         # Print the paths to check if they are correct
        print(f"CONFIG_FILE_PATH: {config_filepath}")
        print(f"PARAMS_FILE_PATH: {params_filepath}")

        # Check if the config file exists
        if not os.path.exists(config_filepath):
            raise FileNotFoundError(f"Config file not found at: {config_filepath}")
        if not os.path.exists(params_filepath):
            raise FileNotFoundError(f"Params file not found at: {params_filepath}")
        
         # Load the YAML files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        # Create the root directory for artifacts if it doesn't exist
        create_directories([self.config['artifacts_root']])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        # Extract the data ingestion configuration from the loaded config
        data_ingestion_config = self.config['data_ingestion']
        
        
        # Check if 'class_dirs' is present
        if 'class_dirs' not in data_ingestion_config:
            raise KeyError("Key 'class_dirs' not found in the configuration")

        # Create the root directory for data ingestion if it doesn't exist
        create_directories([data_ingestion_config['root_dir']])

        # Initialize and return a DataIngestionConfig object with the necessary paths and URLs
        return DataIngestionConfig(
            root_dir=data_ingestion_config['root_dir'],
            source_URL=data_ingestion_config['source_URL'],
            local_data_file=data_ingestion_config['local_data_file'],
            unzip_dir=data_ingestion_config['unzip_dir'],
            class_dirs=data_ingestion_config['class_dirs'] # Renamed to match "extract_dir"
        )

In [10]:
import os
import urllib.request as request
import zipfile
from Ship_Classifier import logger
from Ship_Classifier.utils.common import get_size

In [11]:
import requests
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config


    
    def download_file(self):
       if not os.path.exists(self.config.local_data_file):
            try:
                response = requests.get(self.config.source_URL)  # Corrected to 'requests.get'
                response.raise_for_status()  # Raise an HTTPError for bad responses
                with open(self.config.local_data_file, 'wb') as f:
                    f.write(response.content)
                logger.info(f"{self.config.local_data_file} downloaded successfully!")
                
            except requests.exceptions.RequestException as e:  # Corrected to 'requests.exceptions.RequestException'
                logger.error(f"Failed to download the file. Error: {str(e)}")
                raise e
       else:
            file_size = self.get_size(Path(self.config.local_data_file))
            logger.info(f"File already exists with size: {file_size}") 

    def extract_zip_file(self):
        # Ensure the file exists before attempting to extract
        if os.path.exists(self.config.local_data_file):
            with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(self.config.unzip_dir)  # Changed to 'unzip_dir'
                logger.info(f"Extraction completed successfully into {self.config.unzip_dir}")
            
            # Verifying the folders
            expected_folders = ['Cargo', 'Carrier', 'Cruise', 'Tanker', 'Military']
            for folder in expected_folders:
                folder_path = Path(self.config.unzip_dir) / folder  # Changed to 'unzip_dir'
                if folder_path.exists() and folder_path.is_dir():
                    logger.info(f"Folder '{folder}' exists.")
                else:
                    logger.warning(f"Folder '{folder}' is missing!")
        else:
            logger.error("Zip file does not exist. Please check the download step.")

    def get_size(self, path):
        # Utility method to get the size of a file in bytes
        return os.path.getsize(path)

In [12]:
# Pipeline

import logging

logger = logging.getLogger(__name__)
try:
    # Initialize the configuration manager
    logger.info("Initializing ConfigurationManager")
    config = ConfigurationManager()
    
    # Retrieve the data ingestion configuration
    # Retrieve the data ingestion configuration
    logger.info("Retrieving data ingestion configuration")
    data_ingestion_config = config.get_data_ingestion_config()
    
    # Initialize the DataIngestion process with the retrieved configuration
    logger.info("Initializing DataIngestion")
    data_ingestion = DataIngestion(config=data_ingestion_config)
    
    # Download the file if it doesn't already exist locally
    logger.info("Downloading file")
    data_ingestion.download_file()
    
    # Extract the downloaded zip file to the specified directory
    logger.info("Extracting zip file")
    data_ingestion.extract_zip_file()

except FileNotFoundError as fnf_error:
    logger.error(f"File not found: {fnf_error}")
    raise
except ValueError as val_error:
    logger.error(f"Value error: {val_error}")
    raise
except AttributeError as attr_error:
    logger.error(f"Attribute error: {attr_error}")
    raise
except Exception as e:
    logger.error(f"Error in the pipeline execution: {str(e)}")
    raise Exception(f"Error in the pipeline execution: {str(e)}") from e


[2024-08-28 10:51:58,725: INFO: 2771052960: Initializing ConfigurationManager]
[2024-08-28 10:51:58,733: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-28 10:51:58,759: INFO: common: yaml file: params.yaml loaded successfully]
CONFIG_FILE_PATH: config\config.yaml
PARAMS_FILE_PATH: params.yaml
[2024-08-28 10:51:58,769: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-28 10:51:58,775: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-28 10:51:58,779: INFO: common: created directory at: artifacts]
[2024-08-28 10:51:58,781: INFO: 2771052960: Retrieving data ingestion configuration]
[2024-08-28 10:51:58,785: INFO: common: created directory at: artifacts/data_ingestion]
[2024-08-28 10:51:58,786: INFO: 2771052960: Initializing DataIngestion]
[2024-08-28 10:51:58,789: INFO: 2771052960: Downloading file]
[2024-08-28 10:52:23,702: INFO: 3862538793: artifacts/data_ingestion/Images.zip downloaded successfully!]
[2024-08-28 10:52: