In [1]:
import os

In [2]:
%pwd

'c:\\Users\\fifty\\OneDrive\\Desktop\\AI - Data Science\\Mlops\\Malicious_QR_Code_Detection\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\fifty\\OneDrive\\Desktop\\AI - Data Science\\Mlops\\Malicious_QR_Code_Detection'

In [13]:
from pathlib import Path
from dataclasses import dataclass
import pandas as pd
import sys

In [28]:
@dataclass
class DataValidationConfig:
    root_dir: Path
    status_file: str
    train_data: str
    test_data: str
    schema: dict
    status_file: str

In [6]:
from src.MaliciousQRCodeDetection.constants import *
from src.MaliciousQRCodeDetection.utils.common import read_yaml, create_directories

In [29]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir = config.root_dir,
            status_file = config.status_file,
            schema = schema,
            train_data = config.train_data,
            test_data=config.test_data
        )

        return data_validation_config

In [9]:
from src.MaliciousQRCodeDetection.logging.logger import logger
from src.MaliciousQRCodeDetection.exception import MaliciousQRException

In [30]:
class DataValidation:
    def __init__(self,config: DataValidationConfig):
        self.config = config

    def validate_all_columns(self) -> bool:
        try:
            validation_status: None

            train_data = pd.read_csv(self.config.train_data)
            test_data = pd.read_csv(self.config.test_data)

            combined_data = pd.concat([train_data, test_data], axis=0, ignore_index=True)

            all_cols = list(combined_data.columns)
            all_schema = list(self.config.schema['columns'].keys())

            for col in all_cols:
                if col not in all_schema:
                    validation_status = False
                    with open(self.config.status_file, 'w') as f:
                        f.write(f"Validation Status: {validation_status}")
            else:
                validation_status = True
                with open(self.config.status_file, 'w') as f:
                    f.write(f"Validation Status: {validation_status}")
            logger.info('Data validation successfully done..')
            return validation_status            
        except Exception as e:
            raise MaliciousQRException(e,sys)

In [31]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(data_validation_config)
    data_validation.validate_all_columns()
except Exception as e:
    raise MaliciousQRException(e,sys)

[ 2024-12-26 13:50:05,802] 17 root - INFO - yaml file: config\config.yaml loaded successfully
[ 2024-12-26 13:50:05,805] 17 root - INFO - yaml file: schema.yaml loaded successfully
[ 2024-12-26 13:50:05,806] 31 root - INFO - Created directory at: artifacts
[ 2024-12-26 13:50:05,807] 31 root - INFO - Created directory at: artifacts/data_validation
[ 2024-12-26 13:50:08,981] 26 root - INFO - Data validation successfully done..
