In [2]:
%pwd

'e:\\EndToEnd\\01_MLOps\\research'

In [3]:
import os
os.chdir('../')

In [4]:
%pwd

'e:\\EndToEnd\\01_MLOps'

## Entity

In [25]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataValidationConfig:
    root_dir: Path
    STATS_FILE: Path
    local_file_path: str
    all_schema: dict

## Configration

In [53]:
from mlops_wine_quality.constants import *
from mlops_wine_quality.utils.common import read_yaml, create_dirs

class ConfigManger:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)

        create_dirs([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS

        create_dirs([config.root_dir])
        
        return DataValidationConfig(
            root_dir= config.root_dir,
            STATS_FILE= config.STATS_FILE,
            local_file_path= config.local_file_path,
            all_schema= schema
            
        )

## Components

In [66]:
import os
import pandas as pd
from mlops_wine_quality import logger

class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config

    def validate(self) -> bool:
        try:
            validation_status = None
            
            data = pd.read_csv(self.config.local_file_path)
            logger.info(f"Data read from {self.config.local_file_path}")

            # print(data.info())

            data_clos = list(data.columns)
            schema_cols = self.config.all_schema.keys()
            
            for col in data_clos:
                if col not in schema_cols:
                    
                    validation_status = False
                
                else:
                    validation_status = True

                logger.info(f"validation completed with status: {validation_status}")
                
            return validation_status
            
        except Exception as e:

            raise e


## Pipeline

In [69]:
try:
    config = ConfigManger()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(data_validation_config)
    data_validation.validate()
except Exception as e:
    raise e

[2024-01-20 01:57:20,871: INFO: common: Yaml file loaded successfully]
[2024-01-20 01:57:20,877: INFO: common: Yaml file loaded successfully]
[2024-01-20 01:57:20,879: INFO: common: Created directory at artifacts]
[2024-01-20 01:57:20,881: INFO: common: Created directory at artifacts/data_validation]
[2024-01-20 01:57:20,891: INFO: 695780255: Data read from artifacts/data_ingestion/winequality-red.csv]
[2024-01-20 01:57:20,892: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,894: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,896: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,897: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,899: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,899: INFO: 695780255: validation completed with status: True]
[2024-01-20 01:57:20,899: INFO: 695780255: validation completed with status: True]
[2024-01-20 0