In [5]:

# Cell 1: Import and setup
import os
import sys
from pathlib import Path
import importlib
import json

project_root = Path().cwd().parent
sys.path.append(str(project_root))

In [6]:
# Cell 2: Force reload modules
import AerialObjectDetectionAndClassification.configuration.config as config_module
importlib.reload(config_module)

import AerialObjectDetectionAndClassification.components.data_validation as data_validation_module
importlib.reload(data_validation_module)

<module 'AerialObjectDetectionAndClassification.components.data_validation' from 'c:\\Users\\Venks\\Desktop\\Project\\MLOPS_Bootcamp\\Deep Learning\\Computer_Vision\\Aerial_Object_Classification_And_Detection\\AerialObjectDetectionAndClassification\\components\\data_validation.py'>

In [7]:
# Cell 3: Import and initialize with schema
from AerialObjectDetectionAndClassification.configuration.config import ConfigurationManager
from AerialObjectDetectionAndClassification.components.data_validation import DataValidation

In [8]:
config_manager = ConfigurationManager()
data_validation_config = config_manager.get_data_validation_config()
schema_config = config_manager.get_schema_config()

Created directory at: artifacts
Created directory at: artifacts\data_validation


In [9]:
print("=== SCHEMA CONFIGURATION ===")
print("Classification Requirements:")
print(f"  Splits: {schema_config.CLASSIFICATION_DATA.required_splits}")
print(f"  Classes: {schema_config.CLASSIFICATION_DATA.required_classes}")
print(f"  Image Extensions: {schema_config.CLASSIFICATION_DATA.image_extensions}")

print("\nDetection Requirements:")
print(f"  Directories: {schema_config.DETECTION_DATA.required_directories}")
print(f"  Files: {schema_config.DETECTION_DATA.required_files}")
print(f"  Expected Classes: {schema_config.DETECTION_DATA.expected_classes}")

=== SCHEMA CONFIGURATION ===
Classification Requirements:
  Splits: ['train', 'valid', 'test']
  Classes: ['bird', 'drone']
  Image Extensions: ['.jpg', '.jpeg', '.png', '.bmp']

Detection Requirements:
  Directories: ['train/images', 'train/labels', 'valid/images', 'valid/labels', 'test/images', 'test/labels']
  Files: ['data.yaml']
  Expected Classes: ['Bird', 'drone']


In [10]:
# Cell 4: Run schema-based validation
try:
    data_validation = DataValidation(config=data_validation_config, schema=schema_config)
    print("\n‚úÖ DataValidation initialized with schema!")
    
    is_valid = data_validation.initiate_data_validation()
    print(f"Schema-based validation result: {'VALID' if is_valid else 'INVALID'}")
    
except Exception as e:
    print(f"‚ùå Error during schema-based validation: {e}")
    import traceback
    traceback.print_exc()

[ 2025-11-26 09:27:54,799 ] AerialObjectDetectionAndClassification.logger - INFO - Starting schema-based data validation process...
[ 2025-11-26 09:27:54,799 ] AerialObjectDetectionAndClassification.logger - INFO - Validating classification dataset against schema...



‚úÖ DataValidation initialized with schema!


[ 2025-11-26 09:27:55,071 ] AerialObjectDetectionAndClassification.logger - INFO - Validating detection dataset against schema...
[ 2025-11-26 09:27:55,549 ] AerialObjectDetectionAndClassification.logger - INFO - Schema-based data validation completed. Status: VALID
[ 2025-11-26 09:27:55,550 ] AerialObjectDetectionAndClassification.logger - INFO - Both datasets meet the schema requirements.


Schema-based validation result: VALID


In [11]:
# Cell 5: Display comprehensive schema-based results
if 'data_validation' in locals():
    try:
        summary = data_validation.get_validation_summary()
        
        print("\n" + "="*70)
        print("SCHEMA-BASED VALIDATION REPORT")
        print("="*70)
        
        print(f"\nüìã OVERALL STATUS: {'‚úÖ VALID' if summary['is_valid'] else '‚ùå INVALID'}")
        
        print("\nüîç CLASSIFICATION DATASET (vs Schema):")
        class_report = summary['classification_report']
        schema_splits = summary['schema_used']['classification_requirements']['splits']
        schema_classes = summary['schema_used']['classification_requirements']['classes']
        
        for split in schema_splits:
            if class_report[split]['exists']:
                print(f"  {split.upper()}: ‚úÖ Exists")
                for class_name in schema_classes:
                    class_info = class_report[split]['classes'][class_name]
                    if class_info['exists']:
                        count = class_info['image_count']
                        status = "‚úÖ" if class_info['images_found'] else "‚ö†Ô∏è"
                        print(f"    {class_name}: {status} {count} images")
                    else:
                        print(f"    {class_name}: ‚ùå Missing (required by schema)")
            else:
                print(f"  {split.upper()}: ‚ùå Missing (required by schema)")
        
        print("\nüéØ DETECTION DATASET (vs Schema):")
        det_report = summary['detection_report']
        schema_dirs = summary['schema_used']['detection_requirements']['directories']
        schema_files = summary['schema_used']['detection_requirements']['files']
        schema_classes = summary['schema_used']['detection_requirements']['expected_classes']
        
        for dir_path in schema_dirs:
            if det_report[dir_path]['exists']:
                count = det_report[dir_path]['file_count']
                print(f"  {dir_path}: ‚úÖ {count} files")
            else:
                print(f"  {dir_path}: ‚ùå Missing (required by schema)")
        
        for file_name in schema_files:
            if det_report[file_name]['exists']:
                print(f"  {file_name}: ‚úÖ Found")
                if file_name == 'data.yaml':
                    if det_report[file_name].get('classes_match'):
                        print(f"    Classes: ‚úÖ {schema_classes}")
                    else:
                        actual = det_report[file_name].get('actual_classes', [])
                        print(f"    Classes: ‚ùå Expected {schema_classes}, got {actual}")
            else:
                print(f"  {file_name}: ‚ùå Missing (required by schema)")
                
    except Exception as e:
        print(f"Error getting validation summary: {e}")


SCHEMA-BASED VALIDATION REPORT

üìã OVERALL STATUS: ‚úÖ VALID

üîç CLASSIFICATION DATASET (vs Schema):
  TRAIN: ‚úÖ Exists
    bird: ‚úÖ 1414 images
    drone: ‚úÖ 1248 images
  VALID: ‚úÖ Exists
    bird: ‚úÖ 217 images
    drone: ‚úÖ 225 images
  TEST: ‚úÖ Exists
    bird: ‚úÖ 121 images
    drone: ‚úÖ 94 images

üéØ DETECTION DATASET (vs Schema):
  train/images: ‚úÖ 2728 files
  train/labels: ‚úÖ 2728 files
  valid/images: ‚úÖ 448 files
  valid/labels: ‚úÖ 448 files
  test/images: ‚úÖ 224 files
  test/labels: ‚úÖ 224 files
  data.yaml: ‚úÖ Found
    Classes: ‚úÖ ['Bird', 'drone']
