In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple
from pathlib import Path
from src.laptop_price_prediction.constants.constant import *
from src.laptop_price_prediction.utils.common import read_yaml, create_directories, read_sql
from src.laptop_price_prediction.logger import logging
from sklearn.model_selection import train_test_split

In [4]:
@dataclass(frozen=True)
class DataIngestionConfig:
    raw_path: Path
    train_path: Path
    test_path: Path


In [5]:
class ConfigurationManager:
    def __init__(self, config_file_path = CONFIG_FILE_PATH):
        

        try:
            self.config = read_yaml(config_file_path)
            logging.info(f"Configuration file loaded successfully")

            logging.info(f'Creating directories to stor artifacts')
            create_directories([self.config.artifacts])
        
        except Exception as e:
            logging.error(f"Error loading configuration file: {e}")
            raise e

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        try:
            config = self.config.data_ingestion
            logging.info(f"Data Ingestion Configuration loaded successfully")

            logging.info(f"Creating directories to store data")
            create_directories([config.root_dir])

            logging.info(f"Successfully created directories to store data")

            logging.info(f"Assigning paths to raw, train and test data")
            
            data_ingestion_config = DataIngestionConfig(
                raw_path = config.raw_path,
                train_path = config.train_path,
                test_path = config.test_path
            )

            logging.info(f"Paths assigned successfully")
            
            return data_ingestion_config
        
        except Exception as e:
            logging.error(f"Error loading data ingestion configuration: {e}")
            raise e

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def initiate_data_ingestion(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        try:
            df = read_sql()

            logging.info(f"Data loaded successfully")
            df.to_csv(self.config.raw_path, index=False, header=True)

            logging.info(f"Data saved successfully")

            logging.info(f"Splitting data into train and test data")
            train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

            logging.info(f"Data split successfully")

            train_data.to_csv(self.config.train_path, index=False, header=True)
            test_data.to_csv(self.config.test_path, index=False, header=True)

            logging.info(f"Train and Test data saved successfully")

            return (
                self.config.train_path,
                self.config.test_path
            )
        
        except Exception as e:
            logging.error(f"Error in data ingestion: {e}")
            raise e
        

In [9]:
import traceback

if __name__ == "__main__":
    try:
        config_manager = ConfigurationManager()
        data_ingestion_config = config_manager.get_data_ingestion_config()
        data_ingestion = DataIngestion(data_ingestion_config)
        data_ingestion.initiate_data_ingestion()
    except Exception as e:
        logging.error(f'Error: {e}')
        logging.error(traceback.format_exc())
        raise e

[ 2024-10-05 16:39:48,698 ] 20 root - INFO - Configuration file loaded successfully
[ 2024-10-05 16:39:48,699 ] 20 root - INFO - Creating directories to stor artifacts
[ 2024-10-05 16:39:48,700 ] 20 root - INFO - Creating directories
[ 2024-10-05 16:39:48,701 ] 20 root - INFO - Directories created successfully
[ 2024-10-05 16:39:48,701 ] 20 root - INFO - Data Ingestion Configuration loaded successfully
[ 2024-10-05 16:39:48,702 ] 20 root - INFO - Creating directories to store data
[ 2024-10-05 16:39:48,703 ] 20 root - INFO - Creating directories
[ 2024-10-05 16:39:48,704 ] 20 root - INFO - Directories created successfully
[ 2024-10-05 16:39:48,704 ] 20 root - INFO - Successfully created directories to store data
[ 2024-10-05 16:39:48,705 ] 20 root - INFO - Assigning paths to raw, train and test data
[ 2024-10-05 16:39:48,705 ] 20 root - INFO - Paths assigned successfully
[ 2024-10-05 16:39:48,706 ] 20 root - INFO - Creating connection to MySQL database
[ 2024-10-05 16:39:48,712 ] 20 ro

  df = pd.read_sql_query('SELECT * FROM laptop', conn)
