<a href="https://colab.research.google.com/github/eeolga/article/blob/main/Logs_cleaning_26012025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

def clean_excel(file_path, output_path, drop_rows_containing=None, keep_columns=None):
    """
    Cleans an Excel file by removing rows containing specific data and keeping only specified columns.

    Parameters:
    - file_path (str): Path to the input Excel file.
    - output_path (str): Path to save the cleaned Excel file.
    - drop_rows_containing (list, optional): List of values; rows containing these values in any column will be removed.
    - keep_columns (list, optional): List of column names to keep; all others will be removed.

    Returns:
    - None: Saves the cleaned Excel file to the specified output path.
    """
    try:
        # Load the Excel file
        df = pd.read_excel(file_path)

        # Drop rows containing specific values
        if drop_rows_containing:
            for value in drop_rows_containing:
                df = df[~df.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)]

        # Keep only specified columns
        if keep_columns:
            df = df[keep_columns]

        # Save the cleaned DataFrame to a new Excel file
        df.to_excel(output_path, index=False)
        print(f"Cleaned Excel file saved to: {output_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
if __name__ == "__main__":
    # File paths
    input_file = "input.xlsx"
    output_file = "cleaned_file.xlsx"

    # Define the rows and columns to filter
    rows_to_remove = ["TEACHER", "VISITOR", "EVALUATOR"]  # Replace with your data names
    columns_to_keep = ["Time", "User_full_name", "Event_context", "Description" ]  # Replace with your column names

    # Clean the Excel file
    clean_excel(input_file, output_file, drop_rows_containing=rows_to_remove, keep_columns=columns_to_keep)

Cleaned Excel file saved to: cleaned_file.xlsx
