## Advanced Consistency Check with Hierarchical Data

**Description**: You have two datasets `orders.csv` and `order_items.csv` . Perform a consistency check to ensure each order in `orders.csv` has corresponding items in `order_items.csv` .

In [3]:
import pandas as pd
import numpy as np  # Import numpy

def validate_order_consistency(orders_file, order_items_file, order_id_col="order_id"):
    """
    Validates that each order in the orders file has corresponding items in the order_items file.

    Args:
        orders_file (str): Path to the CSV file containing order data.
        order_items_file (str): Path to the CSV file containing order item data.
        order_id_col (str, optional): Name of the order ID column. Defaults to "order_id".

    Returns:
        pandas.DataFrame: A DataFrame containing orders with missing items, or an empty DataFrame
                          if all orders have corresponding items. Returns None if errors occur.
    """
    try:
        # Read the CSV files into Pandas DataFrames
        orders_df = pd.read_csv(orders_file)
        order_items_df = pd.read_csv(order_items_file)
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None

    # Check if the order ID column exists in both DataFrames
    if order_id_col not in orders_df.columns:
        print(f"Error: Order ID column '{order_id_col}' not found in orders file.")
        return None
    if order_id_col not in order_items_df.columns:
        print(f"Error: Order ID column '{order_id_col}' not found in order items file.")
        return None

    # Get unique order IDs from both files
    orders_ids = orders_df[order_id_col].unique()
    order_items_ids = order_items_df[order_id_col].unique()

    # Find orders with missing items (orders present in orders_df but not in order_items_df)
    orders_missing_items = orders_ids[~np.isin(orders_ids, order_items_ids)]
    missing_items_df = orders_df[orders_df[order_id_col].isin(orders_missing_items)]


    return missing_items_df



def main():
    """
    Main function to run the order consistency validation and print the results.
    """
    # Provide the paths to your CSV files
    orders_file = 'orders.csv'  # Replace with your actual file path
    order_items_file = 'order_items.csv'  # Replace with your actual file path

    # Create dummy CSV files for demonstration
    try:
        with open(orders_file, 'w') as f:
            f.write("order_id,customer_id,order_date\n1,101,2024-01-10\n2,102,2024-01-15\n3,103,2024-01-20\n4,104,2024-02-01")
        with open(order_items_file, 'w') as f:
            f.write("order_id,item_id,product_id,quantity\n1,1001,P1,2\n1,1002,P2,1\n2,2001,P3,3\n2,2002,P4,1\n3,3001,P5,1") #Order 4 is missing
    except FileExistsError:
        pass

    # Validate order consistency
    missing_items_df = validate_order_consistency(orders_file, order_items_file)

    # Print the results
    if missing_items_df is not None:
        if not missing_items_df.empty:
            print("Orders with missing items:")
            print(missing_items_df.to_string(index=False))
        else:
            print("All orders have corresponding items.")
    else:
        print("Error occurred during order consistency validation. Please check the file paths and column names.")



if __name__ == "__main__":
    main()


Orders with missing items:
 order_id  customer_id order_date
        4          104 2024-02-01
