<a href="https://colab.research.google.com/github/djdunc/hercules/blob/main/data_processing/locations_single.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Single

In [None]:
import pandas as pd

def filter_patient_data(csv_file, patient_id):
    """
    Loads a CSV file into a pandas DataFrame and filters it by patient ID.

    Args:
        csv_file (str): Path to the CSV file.
        patient_id (str): The patient ID to filter by.

    Returns:
        pandas.DataFrame: A DataFrame containing only the data for the specified
                         patient ID, or an empty DataFrame if the file doesn't exist
                         or the patient ID is not found. Returns an error message if the file is not a valid CSV.
    """
    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        return "Error: File not found."
    except pd.errors.ParserError:
        return "Error: Invalid CSV file format."
    except Exception as e: # Catch other potential pandas exceptions
        return f"An error occurred while reading the file: {e}"


    if 'Patient' not in df.columns:
        return "Error: 'Patient' column not found in CSV."

    filtered_df = df[df['Patient'] == patient_id]
    return filtered_df




In [None]:
def is_location_within_rectangle(x, y, rect):
    """Checks if (x, y) is within a given rectangle.

    Args:
        x: x-coordinate.
        y: y-coordinate.
        rect: A tuple or list defining the rectangle (x1, y1, x2, y2).

    Returns:
        bool: True if within, False otherwise.
    """
    x1, y1, x2, y2 = rect
    return x1 <= x <= x2 and y2 <= y <= y1  # Corrected y-coordinate comparison


In [None]:
def process_data(csv_file_path, patient_id_to_filter, rectangles):
    """Processes data, checking against multiple rectangles.

    Args:
        csv_file_path: Path to CSV.
        patient_id_to_filter: Patient ID.
        rectangles: A dictionary where keys are rectangle names (strings) and
                    values are tuples/lists defining the rectangles (x1, y1, x2, y2).
    """

    filtered_data = filter_patient_data(csv_file_path, patient_id_to_filter)

    if isinstance(filtered_data, pd.DataFrame):
        if not filtered_data.empty:
            try:
                filtered_data[['xlocation', 'ylocation']] = filtered_data['Location'].apply(lambda loc: tuple(map(float, loc.replace('"', '').split(',')))).tolist()
            except (ValueError, AttributeError):
                return "Error: Could not convert xlocation or ylocation to floats. Check data format."

            filtered_data['within_rectangle'] = ""  # Initialize the column

            for rect_name, rect_coords in rectangles.items():
                filtered_data.loc[filtered_data.apply(lambda row: is_location_within_rectangle(row['xlocation'], row['ylocation'], rect_coords), axis=1), 'within_rectangle'] += rect_name + "," # Append the rectangle name, comma separated.

            #Remove the trailing comma
            filtered_data['within_rectangle'] = filtered_data['within_rectangle'].str.rstrip(',')

            print(filtered_data.to_string())

        else:
            print(f"No data found for Patient ID: {patient_id_to_filter}")
    elif isinstance(filtered_data, str):
        print(filtered_data)
    else:
        print("An unexpected error occurred.")



In [None]:
rectangles = {
    "WC": (24.95, 3.33, 27.67, 1.66),
    "Research4": (8.78, 11.28, 12.08, 9.41),
    "Glaucoma 3": (13.66, 11.88, 21.87, 9.18),
    "AF_RHS": (4.56, 6.00, 6.8, 3.67)}

process_data("P1_input.csv", "G0561", rectangles)

      Patient       Location            starttime              endtime  xlocation  ylocation      step_length within_rectangle
65950   G0561   10.697,9.221  2021-10-18 16:01:41  2021-10-18 16:11:37     10.697      9.221  0 days 00:09:56                 
65951   G0561    9.924,9.876  2021-10-18 16:01:26  2021-10-18 16:01:41      9.924      9.876  0 days 00:00:15        Research4
65954   G0561   10.211,8.836  2021-10-18 16:01:03  2021-10-18 16:01:26     10.211      8.836  0 days 00:00:23                 
65955   G0561    9.327,9.652  2021-10-18 16:01:01  2021-10-18 16:01:03      9.327      9.652  0 days 00:00:02        Research4
65956   G0561     9.695,8.91  2021-10-18 16:00:57  2021-10-18 16:01:01      9.695      8.910  0 days 00:00:04                 
65958   G0561    10.617,9.42  2021-10-18 16:00:48  2021-10-18 16:00:57     10.617      9.420  0 days 00:00:09        Research4
65960   G0561   12.537,8.669  2021-10-18 16:00:38  2021-10-18 16:00:48     12.537      8.669  0 days 00:00:10  