In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ayush\\OneDrive - Sujal Dhungana\\MBA Admission Classification Project\\notebooks'

In [3]:
os.chdir('c:\\Users\\ayush\\OneDrive - Sujal Dhungana\\MBA Admission Classification Project')

In [4]:
%pwd

'c:\\Users\\ayush\\OneDrive - Sujal Dhungana\\MBA Admission Classification Project'

In [1]:
import pandas as pd
import numpy as np
import mysql.connector as mysql
from dotenv.main import load_dotenv
import warnings
from dataclasses import dataclass
from pathlib import Path
from src import *
from src.utils.common import read_sql_data, read_yaml_file, create_directory
from src.logger import logging
from src.exception import CustomException
warnings.filterwarnings('ignore')
import sys
from sklearn.model_selection import train_test_split
load_dotenv()

True

In [6]:
@dataclass(frozen=True)
class DataIngestionConfig:
    raw_data_path: Path
    train_data_path: Path
    test_data_path: Path

In [7]:
class ConfigurationManager:
    def __init__(self,
                  config_file_path = CONFIG_FILE_PATH,
                  params_file_path = PARAMS_FILE_PATH):
        try:
            self.config = read_yaml_file(config_file_path)

            logging.info("Configuration and Parameters files have been read successfully")

            logging.info("Creating directories to store artifacts")
            create_directory([self.config.artifacts_directory])
            logging.info("Directories have been created successfully")
        except Exception as e:
            raise CustomException(e, sys)

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        try:
            config = self.config.data_ingestion

            logging.info("Creating root directory to store raw data, train data and test data")

            create_directory([config.root_dir])

            logging.info("Root directory has been created successfully")

            logging.info("Assigning paths to raw data, train data and test data")

            data_ingestion_config = DataIngestionConfig(
                raw_data_path = config.raw_data_path,
                train_data_path = config.train_data_path,
                test_data_path = config.test_data_path
            )

            logging.info("Paths have been assigned successfully")

            return data_ingestion_config
        
        except Exception as e:
            raise CustomException(e, sys)
    
        

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def initiate_data_ingestion(self) -> pd.DataFrame:
        try:
            logging.info("Reading data from MySQL database")
            data = read_sql_data()
            logging.info("Data has been read successfully")

            logging.info("Saving data to raw data directory")
            data.to_csv(self.config.raw_data_path, index=False, header=True)
            logging.info(f"Raw has been saved successfully at {self.config.raw_data_path}")

            logging.info("Splitting data into train and test data")
            train, test = train_test_split(data, test_size=0.3, random_state=42)
            logging.info("Data has been split successfully")

            logging.info("Saving train data to train data directory")
            train.to_csv(self.config.train_data_path, index=False, header=True)
            logging.info(f"Train data has been saved successfully at {self.config.train_data_path}")

            logging.info("Saving test data to test data directory")
            test.to_csv(self.config.test_data_path, index=False, header=True)
            logging.info(f"Test data has been saved successfully at {self.config.test_data_path}")
            
            return (
                self.config.train_data_path,
                self.config.test_data_path
            )
        
        except Exception as e:
            raise CustomException(e, sys)

    

In [9]:
if __name__ == "__main__":
    try:
        config_manager = ConfigurationManager()
        data_ingestion_config = config_manager.get_data_ingestion_config()
        data_ingestion = DataIngestion(data_ingestion_config)
        data_ingestion.initiate_data_ingestion()
    except Exception as e:
        raise CustomException(e, sys)