In [1]:
import pandas as pd

def clean_dataset(input_filepath, output_filepath):
    """
    Reads a dataset, identifies columns without empty cells,
    removes rows with any empty cells, and saves the cleaned data.

    Args:
        input_filepath (str): The path to the input CSV file.
        output_filepath (str): The path where the cleaned CSV file will be saved.
    """
    try:
        # Read the dataset from the specified CSV file
        # The separator has been changed to a comma (',') to handle CSV files.
        print(f"Reading data from '{input_filepath}'...")
        df = pd.read_csv(input_filepath, sep=',')

        # Get the original number of rows for comparison
        original_rows = len(df)
        print(f"Original dataset contains {original_rows} rows.")

        # --- New: Find and report columns with no empty cells ---
        # A list comprehension checks each column for any null values.
        columns_without_empty_cells = [col for col in df.columns if not df[col].isnull().any()]

        if columns_without_empty_cells:
            print("\nColumns with NO empty cells found:")
            for col in columns_without_empty_cells:
                print(f"- {col}")
        else:
            print("\nAll columns have at least one empty cell.")
        print("-" * 30) # Add a separator for better readability
        # --- End of new section ---

        # Drop rows where at least one element is missing.
        # The .dropna() method handles this automatically.
        cleaned_df = df.dropna()

        # Get the new number of rows
        cleaned_rows = len(cleaned_df)
        rows_removed = original_rows - cleaned_rows
        print(f"Removed {rows_removed} rows with empty cells.")
        print(f"Cleaned dataset now contains {cleaned_rows} rows.")

        # Save the cleaned dataframe to a new CSV file
        # The separator is also set to a comma for the output file.
        cleaned_df.to_csv(output_filepath, index=False, sep=',')
        print(f"Successfully saved the cleaned data to '{output_filepath}'.")

    except FileNotFoundError:
        print(f"Error: The file '{input_filepath}' was not found. Please make sure it's in the correct directory.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == "__main__":
    # --- Configuration ---
    # Define the name of your input data file.
    # Updated to match the filename from the error message.
    input_file = "..//Dam Incident Database Search  Association of State Dam Safety.csv"

    # Define the name for the output file that will contain the cleaned data.
    output_file = "cleaned_dam_data.csv"
    # --- End Configuration ---

    # Run the cleaning function
    clean_dataset(input_file, output_file)




Reading data from '..//Dam Incident Database Search  Association of State Dam Safety.csv'...
Original dataset contains 1344 rows.

Columns with NO empty cells found:
- Dam Namex
- StateSelect valueAKARAZCACOCTDEFLGAHIIAIDILINKYLAMAMDMEMIMNMOMSMTNCNDNENHNJNMNVNYOHOKORPARISCSDTNTXUTVAVTVT00000WAWIWVWYx
- Incident Datex
- Incident DriverSelect valueDeterioration or Poor ConditionHydrologic /FloodingHydrologic/FloodingMalfunction of Equipment/GateManmade ActionOtherSeepage/Internal ErosionSeismicStructural StabilityUnknownx
- Incident ID
- Surface Area (acres)
- NID Number
- Latitude
- Longitude
------------------------------
Removed 1344 rows with empty cells.
Cleaned dataset now contains 0 rows.
Successfully saved the cleaned data to 'cleaned_dam_data.csv'.
