In [1]:
import os

In [2]:
%pwd

'd:\\Data Science\\Git-Upload-Projects\\End_to_End_winequality\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Data Science\\Git-Upload-Projects\\End_to_End_winequality'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [6]:
from winequality.constants import *
from winequality.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
     
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schemas_filepath = SCHEMA_FILE_PATH):
        
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schemas_filepath)

            create_directories([self.config.dataStore_root])
        

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        
        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
           
        )

        return data_transformation_config

In [8]:
import os
from winequality.logging import logger
from sklearn.model_selection import train_test_split

import pandas as pd

In [9]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def train_test_split_ratio(self):
        df = pd.read_csv(self.config.data_path)

        train, test = train_test_split(df)

        train.to_csv(os.path.join(self.config.root_dir, "train.csv"), index = False)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"), index = False)

        logger.info("Split data into train and test")
        logger.info(train.shape)
        logger.info(test.shape)
        logger.info(train.head)

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.train_test_split_ratio()
except Exception as e:
    raise e

[2024-04-10 00:07:15,303: INFO: common: YAML file loaded successfully: config\config.yaml]
[2024-04-10 00:07:15,306: INFO: common: YAML file loaded successfully: params.yaml]
[2024-04-10 00:07:15,310: INFO: common: YAML file loaded successfully: schema.yaml]
[2024-04-10 00:07:15,312: INFO: common: Created directory at: dataStore]
[2024-04-10 00:07:15,314: INFO: common: Created directory at: dataStore/data_transformation]


[2024-04-10 00:07:15,355: INFO: 3395035069: Split data into train and test]
[2024-04-10 00:07:15,358: INFO: 3395035069: (1199, 12)]
[2024-04-10 00:07:15,358: INFO: 3395035069: (400, 12)]
[2024-04-10 00:07:15,359: INFO: 3395035069: <bound method NDFrame.head of       fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
339            12.5             0.280         0.54             2.3      0.082   
424             7.7             0.960         0.20             2.0      0.047   
1083            8.7             0.420         0.45             2.4      0.072   
1523            6.8             0.480         0.25             2.0      0.076   
216             8.7             0.625         0.16             2.0      0.101   
...             ...               ...          ...             ...        ...   
1546            7.0             0.570         0.02             2.0      0.072   
1076            9.9             0.320         0.56             2.0      0.073   
437       