In [89]:
import os


In [90]:
%pwd

'D:\\Machine-learning-project-with-Mlflow'

In [91]:
os.chdir('../')

In [92]:
%pwd

'D:\\'

In [93]:
import os
from pathlib import Path

# Set working directory to project root
project_root = r'D:\Machine-learning-project-with-Mlflow'
os.chdir(project_root)

print(f"‚úÖ Working directory set to: {os.getcwd()}")

‚úÖ Working directory set to: D:\Machine-learning-project-with-Mlflow


In [94]:
import pandas as pd

In [95]:
from dataclasses import dataclass
from pathlib import Path    

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [96]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [97]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path: Path = CONFIG_FILE_PATH,
        params_file_path: Path = PARAMS_FILE_PATH,    
        schema_file_path: Path = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_file_path)
        
        create_directories([self.config.artifacts_root])


    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(config.root_dir),
            data_path=Path(config.data_path)  # ‚úÖ Changed from transformed_data_dir
    )
        return data_transformation_config

In [98]:
import os
from mlProject import logger
from sklearn.model_selection import train_test_split as split_data
import pandas as pd 


In [99]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def train_test_split(self):
        try:
            logger.info("Reading the data for transformation")
            data = pd.read_csv(self.config.data_path)
            
            logger.info(f"Data shape: {data.shape}")
            logger.info("Splitting the data into train and test sets")
            train_set, test_set = split_data(data, test_size=0.2, random_state=42)  # ‚úÖ Changed here
            
            logger.info(f"Train set shape: {train_set.shape}")
            logger.info(f"Test set shape: {test_set.shape}")
            
            train_file_path = os.path.join(self.config.root_dir, "train.csv")
            test_file_path = os.path.join(self.config.root_dir, "test.csv")
            
            logger.info(f"Saving train set to: {train_file_path}")
            train_set.to_csv(train_file_path, index=False)
            
            logger.info(f"Saving test set to: {test_file_path}")
            test_set.to_csv(test_file_path, index=False)
            
            logger.info("Train and test sets saved successfully")
            logger.info(f"Train size: {len(train_set)}, Test size: {len(test_set)}")
            
            print(f"‚úÖ Train set shape: {train_set.shape}")
            print(f"‚úÖ Test set shape: {test_set.shape}")
            
        except Exception as e:
            logger.error(f"Error during train-test split: {e}")
            raise e

In [100]:
import os
from pathlib import Path

# Search for config.yaml
for root, dirs, files in os.walk('.'):
    if 'config.yaml' in files:
        print(f"Found config.yaml at: {os.path.join(root, 'config.yaml')}")

Found config.yaml at: .\config\config.yaml


In [101]:
import os

# Change to the project directory
os.chdir(r'D:\Machine-learning-project-with-Mlflow')

print("‚úÖ Changed to:", os.getcwd())

# Verify config.yaml exists now
print("Config exists:", os.path.exists('config/config.yaml'))

‚úÖ Changed to: D:\Machine-learning-project-with-Mlflow
Config exists: True


In [102]:
from mlProject.utils.common import read_yaml
from pathlib import Path

config = read_yaml(Path("config/config.yaml"))
print("Sections in config:", list(config.keys()))
print("\nData transformation config:")
print(config.data_transformation)

[2025-12-26 23:34:24,868 : INFO : common : YAML file 'config\config.yaml' read successfully.]
Sections in config: ['artifacts_root', 'data_ingestion', 'data_validation', 'data_transformation']

Data transformation config:
{'root_dir': 'artifacts/data_transformation', 'data_path': 'artifacts/data_ingestion/winequality-red.csv'}


In [103]:
try:
    print("üöÄ Starting Data Transformation Pipeline...\n")
    
    config = ConfigurationManager()
    print("‚úÖ Configuration Manager initialized")
    
    data_transformation_config = config.get_data_transformation_config()
    print("‚úÖ Data Transformation config loaded\n")
    
    data_transformation = DataTransformation(config=data_transformation_config)
    print("‚úÖ Data Transformation object created\n")
    
    data_transformation.train_test_split()
    
    print("\nüéâ Data Transformation Pipeline completed successfully!")
    
except Exception as e:
    print(f"\n‚ùå Pipeline failed with error: {e}")
    raise e

üöÄ Starting Data Transformation Pipeline...

[2025-12-26 23:34:24,987 : INFO : common : YAML file 'config\config.yaml' read successfully.]
[2025-12-26 23:34:24,992 : INFO : common : YAML file 'params.yaml' read successfully.]
[2025-12-26 23:34:25,000 : INFO : common : YAML file 'config\schema.yaml' read successfully.]
[2025-12-26 23:34:25,004 : INFO : common : Created directory at: artifacts_root]
‚úÖ Configuration Manager initialized
[2025-12-26 23:34:25,007 : INFO : common : Created directory at: artifacts/data_transformation]
‚úÖ Data Transformation config loaded

‚úÖ Data Transformation object created

[2025-12-26 23:34:25,009 : INFO : 3099662609 : Reading the data for transformation]
[2025-12-26 23:34:25,182 : INFO : 3099662609 : Data shape: (1599, 12)]
[2025-12-26 23:34:25,184 : INFO : 3099662609 : Splitting the data into train and test sets]
[2025-12-26 23:34:25,433 : INFO : 3099662609 : Train set shape: (1279, 12)]
[2025-12-26 23:34:25,435 : INFO : 3099662609 : Test set shape