## Advanced Consistency Check with Hierarchical Data

**Description**: You have two datasets `orders.csv` and `order_items.csv` . Perform a consistency check to ensure each order in `orders.csv` has corresponding items in `order_items.csv` .

In [2]:
# Write your code from here
import pandas as pd
import os

# Function to load a CSV file with error handling
def load_csv(file_path):
    try:
        # Check if file exists
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File '{file_path}' not found.")
        
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check for missing columns in the expected format
        required_columns = ['order_id']  # Assuming 'order_id' is required in both datasets
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"The CSV file '{file_path}' is missing required columns.")
        
        return df
    
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: {e}")
        return None
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{file_path}' is empty.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred while reading '{file_path}': {e}")
        return None

# Function to perform the consistency check
def check_consistency(orders_df, order_items_df):
    if orders_df is None or order_items_df is None:
        print("One or both datasets failed to load, exiting the check.")
        return

    # Ensure order_ids are in the same format for both datasets
    orders_df['order_id'] = orders_df['order_id'].astype(str)
    order_items_df['order_id'] = order_items_df['order_id'].astype(str)
    
    # Merge the two datasets on 'order_id' to find matching entries
    merged_df = pd.merge(orders_df, order_items_df, on='order_id', how='left', indicator=True)
    
    # Identify orders that have no corresponding items
    missing_items = merged_df[merged_df['_merge'] == 'left_only']
    
    # Display the missing order IDs
    if not missing_items.empty:
        print(f"Orders missing corresponding items: {missing_items['order_id'].tolist()}")
    else:
        print("All orders have corresponding items in order_items.csv.")
    
    # Optionally, you could save the missing items to a new CSV
    missing_items.to_csv('missing_order_items.csv', index=False)

# Main function to load data and check consistency
def main():
    orders_file = 'orders.csv'
    order_items_file = 'order_items.csv'

    # Load the datasets
    orders_df = load_csv(orders_file)
    order_items_df = load_csv(order_items_file)
    
    # Perform the consistency check
    check_consistency(orders_df, order_items_df)

if __name__ == "__main__":
    main()

Error: File 'orders.csv' not found.
Error: File 'order_items.csv' not found.
One or both datasets failed to load, exiting the check.
