In [1]:
import os


In [2]:
%pwd

'd:\\aakashyadav\\other_end_end_projects\\Store-Item-Demand-Forecasting-Challenge-main\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\aakashyadav\\other_end_end_projects\\Store-Item-Demand-Forecasting-Challenge-main'

In [10]:
from dataclasses import dataclass
from pathlib import Path

from src.store_demand_forecasting.constants import *

@dataclass
class DataIngestionConfig:
    def __init__(self):
        self.data_ingestion_artifacts_dir = os.path.join(
            ARTIFACTS_DIR,ROOT_DIR
        )
        self.source_url:str = SOURCE_URL
        self.local_data_file:Path = os.path.join(
            self.data_ingestion_artifacts_dir,LOCAL_FILE_PATH
        )
        self.unzip_dir:Path = os.path.join(
            self.data_ingestion_artifacts_dir
        )

In [6]:
# Data Ingestion Artifacts
@dataclass
class DataIngestionArtifacts:
    zip_data_file_path: str
    csv_data_file_path: str

In [7]:
import os
import pandas as pd
import urllib.request as request
import requests
import zipfile
from src.store_demand_forecasting.logger import logging
from pathlib import Path
from src.store_demand_forecasting.utils.common import MainUtils

In [8]:
class DataIngestion:
    def __init__(self,data_ingestion_config:DataIngestionConfig) -> None:

        self.data_ingestion_config = data_ingestion_config

    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url = self.data_ingestion_config.source_url,
                filename = self.data_ingestion_config.local_data_file
            )
            logging.info(f"{filename} download! with following info: \n{headers}")
        else:
            logging.info(f"File already exists of size: {MainUtils.get_size(Path(self.config.local_data_file))}")  

    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function returns None
        """
        unzip_path = self.data_ingestion_config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        try:
            with zipfile.ZipFile(self.data_ingestion_config.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(unzip_path)
        except zipfile.BadZipFile as e:
            print(f"BadZipFile Error:{e}")
        

    def initiate_data_ingestion(self) -> DataIngestionArtifacts:
        logging.info("Entered the initiate_data_ingestion method of the data ingestion class")
        try:
            os.makedirs(
                self.data_ingestion_config.data_ingestion_artifacts_dir,exist_ok=True
            )
            logging.info(f"Creating {os.basename(self.data_ingestion_config.data_ingestion_artifacts_dir)} directory")

            #Downloading data from given URL
            self.download_file()
            logging.info(f"Downloading data from given url {self.data_ingestion_config.source_url}")

            #extract file
            self.extract_zip_file()
            logging.info(f"Extracting file in directory {self.data_ingestion_config.local_data_file}")

        except Exception as e:
            raise e            


In [13]:
# try:
#     config = DataIngestionConfig()
#     data_ingestion_config = config.data_ingestion_artifacts_dir()
#     data_ingestion = DataIngestion(config=data_ingestion_config)
#     data_ingestion.initiate_data_ingestion()
# except Exception as e:
#     raise e

In [None]:
import sys
from src.store_demand_forecasting.constants import *

from src.store_demand_forecasting.entity.artifact_entity import (
    DataIngestionArtifacts
    )


from src.store_demand_forecasting.entity.config_entity import (
    DataIngestionConfig,
    
)


from src.store_demand_forecasting.exception import NerException
from src.store_demand_forecasting.logger import logging


class TrainPipeline:
    def __init__(self):
        self.data_ingestion_config = DataIngestionConfig()

    
     # This method is used to start the data ingestion
    def start_data_ingestion(self) -> DataIngestionArtifacts:
        logging.info("Entered the start_data_ingestion method of TrainPipeline class")
        try:
            logging.info("Getting the data from Google cloud storage")
            data_ingestion = DataIngestion(
                data_ingestion_config=self.data_ingestion_config
            )
            data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
            logging.info("Got the data from Google cloud storage")
            logging.info(
                "Exited the start_data_ingestion method of TrainPipeline class"
            )
            return data_ingestion_artifact

        except Exception as e:
            raise NerException(e, sys) from e

In [14]:
def run_pipeline(self) -> None:
    try:
        logging.info("Started Model training >>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        data_ingestion_artifact = self.start_data_ingestion()
        
    except Exception as e:
        raise e