In [None]:
# =============================================================================
# HR Attrition Analysis Pipeline - Main Runner Script
# =============================================================================

import os
import sys
from pathlib import Path

# Adjust the path to import from the 'src' directory
script_dir = Path(__file__).resolve().parent
src_dir = script_dir.parent
sys.path.append(str(src_dir))

# Import the necessary modules for the pipeline
from data.hr_data_pipeline import HRDataProcessor
from analysis.data_analysis import AdvancedEDA
from feature.feature_engg import FeatureEngineer
from modelling.model_training import ModelTrainer

def run_full_pipeline():
    """
    Main function to run the complete data processing, feature engineering,
    and model training pipeline.
    """
    try:
        # Step 1: Run the complete data processing pipeline
        print("Starting HR Attrition Data Processing Pipeline...")
        data_processor = HRDataProcessor()
        actual_data = data_processor.run_complete_pipeline()

        # Check if the data was loaded successfully before proceeding
        if actual_data is None or actual_data.empty:
            print("Error: The data processing pipeline returned an empty or invalid DataFrame.")
            return

        print("\nData processing complete. Starting EDA...")
        
        # Step 2: Perform advanced EDA on the processed data
        eda = AdvancedEDA(df=actual_data)
        eda.run_full_eda()
        
        print("\nEDA complete. Starting Feature Engineering...")

        # Step 3: Run the complete feature engineering pipeline
        fe = FeatureEngineer(actual_data)
        pipeline_results = fe.run_all_steps()

        processed_data_with_target = pipeline_results["processed_data"]
        selected_features = pipeline_results["selected_features"]

        print("\nFeature Engineering complete. Starting Model Training...")

        # Step 4: Run the complete model training and comparison pipeline
        trainer = ModelTrainer(
            data=processed_data_with_target,
            features=selected_features
        )
        trainer.compare_models()
        trainer.print_comparison_table()

        print("\nPipeline execution finished successfully.")

    except Exception as e:
        print(f"An error occurred during the pipeline execution: {e}")
        # You can add more specific error handling here if needed.

if __name__ == "__main__":
    run_full_pipeline()
