In [1]:
%pwd

'c:\\Users\\Dzuels Foundation\\mlproject\\notebook'

In [2]:
%cd ..\

c:\Users\Dzuels Foundation\mlproject


In [6]:
import requests
import os
import sys
import pandas as pd
from pathlib import Path
from dataclasses import dataclass
from sklearn.model_selection import train_test_split

from src.constant import *
from src.logger import logging
from src.exception import CustomException
from src.utils import read_yaml,create_directory


In [None]:
@dataclass(frozen=True)
class DataTransformationConfig:
    """
    Data Transformation Configuration
    """

    root_dir: Path
    input_file: Path
    

In [5]:
class ConfigurationManager:
    def __init__(self, 
                 config_file_path=CONFIG_FILE_PATH,
                 schema_file_path=SCHEMA_FILE_PATH
                 ):
        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)
                
        create_directory(self.config.artifact_root)
        
    def get_data_transformation_config(self):
        config = self.config.data_transformation
        
        create_directory(config.root_dir)
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            input_file= config.input_file
        )
        return data_transformation_config

In [None]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
    
    def transform_data(self):
        try:
            logging.info("Data Transformation started")
            df = pd.read_csv(self.config.input_file)
            train_df,test_df = train_test_split(df, random_state=42, test_size=0.2)
            train_df.to_csv(os.path.join(self.config.root_dir, "train.csv"), index=False)
            test_df.to_csv(os.path.join(self.config.root_dir, "test.csv"), index=False)
            logging.info("Data Transformation completed successfully.")
            return train_df, test_df
        except Exception as e:
            raise CustomException(e, sys)
         
            
        

In [9]:
try:
    config_manager = ConfigurationManager()
    data_transformation_config = config_manager.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    train_df, test_df = data_transformation.transform_data()

except Exception as e:
    logging.error(f"Error occurred during data transformation: {e}")
    raise CustomException(e, sys)

[2025-04-07 15:29:06,284] : root : INFO: yaml file: config\config.yaml loaded successfully
[2025-04-07 15:29:06,290] : root : INFO: yaml file: schema.yaml loaded successfully
[2025-04-07 15:29:06,290] : root : INFO: Directory 'artifact' already exists.
[2025-04-07 15:29:06,301] : root : INFO: Directory 'artifact/data_transformation' created.
[2025-04-07 15:29:06,303] : root : INFO: Data Transformation started
[2025-04-07 15:29:06,617] : root : INFO: Data Transformation completed successfully.
