<a href="https://colab.research.google.com/github/djdunc/hercules/blob/main/data_processing/locations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Takes patient journey data and adds in machine location information based on a set of rectangles describing where machines are located.

In [1]:
import pandas as pd

def filter_patient_data(csv_file, patient_id):
    """
    Loads a CSV file into a pandas DataFrame and filters it by patient ID.

    Args:
        csv_file (str): Path to the CSV file.
        patient_id (str): The patient ID to filter by.

    Returns:
        pandas.DataFrame: A DataFrame containing only the data for the specified
                         patient ID, or an empty DataFrame if the file doesn't exist
                         or the patient ID is not found. Returns an error message if the file is not a valid CSV.
    """
    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        return "Error: File not found."
    except pd.errors.ParserError:
        return "Error: Invalid CSV file format."
    except Exception as e: # Catch other potential pandas exceptions
        return f"An error occurred while reading the file: {e}"


    if 'Patient' not in df.columns:
        return "Error: 'Patient' column not found in CSV."

    filtered_df = df[df['Patient'] == patient_id]
    return filtered_df




In [2]:
def is_location_within_rectangle(x, y, rect):
    """Checks if (x, y) is within a given rectangle.

    Args:
        x: x-coordinate.
        y: y-coordinate.
        rect: A tuple or list defining the rectangle (x1, y1, x2, y2).

    Returns:
        bool: True if within, False otherwise.
    """
    x1, y1, x2, y2 = rect
    return x1 <= x <= x2 and y2 <= y <= y1  # Corrected y-coordinate comparison


In [3]:
rectangles1 = {
   "Reception": (0, 14.9, 8.38, 6.31),
    "WC": (24.95, 3.33, 27.67, 1.66),
    "MR 1": (25.95, 12.77, 28.24, 9.30),
    "MR 2": (23.51, 12.77, 25.80, 9.30),
    "MR 3": (17.28, 15.93, 20.95, 13.63),
    "MR 4": (13.46, 15.96, 17.13, 13.63),
    "AF LHS": (2.67, 6.08, 4.76, 3.87),
    "AF RHS": (4.94, 6.06, 7.03, 3.87),
    "HFA 1": (0.00, 3.96, 2.30, 2.55),
    "HFA 2": (0.14, 2.32, 1.66, 0.14),
    "HFA 3": (2.38, 2.41, 3.87, 0.17),
    "HFA 4": (4.10, 2.41, 5.60, 0.14),
    "HFA 5": (5.85, 2.41, 7.32, 0.14),
    "HFA 6": (7.61, 2.41, 9.10, 0.17),
    "Vision 1": (23.76, 7.43, 25.57, 3.67),
    "Vision 2": (25.74, 7.40, 27.52, 3.76),
    "Vision 3": (30.28, 7.38, 34.01, 5.63),
    "Vision 4": (30.28, 9.24, 34.01, 7.58),
    "Vision 5": (30.28, 11.08, 34.04, 9.44),
    "Vision 6": (30.25, 13.00, 34.01, 11.31),
    "Vision 7": (30.28, 14.92, 34.01, 13.14),
    "MR Dilation 1": (29.79, 16.02, 30.94, 15.04),
    "MR Dilation 2": (31.51, 16.02, 32.63, 15.07),
    "MR Dilation 3": (33.09, 15.99, 34.15, 15.10),
    "MR Dilation 4": (33.01, 17.77, 34.10, 16.68),
    "MR Dilation 5": (31.05, 17.77, 32.14, 16.68),
    "MR Dilation 6": (29.19, 17.77, 30.28, 16.62),
    "MR Dilation 7": (27.52, 17.71, 28.59, 16.68),
    "Research 1": (8.73, 5.74, 12.43, 3.82),
    "Research 2": (8.67, 8.44, 12.28, 6.63),
    "Research 3": (8.67, 10.45, 12.31, 8.64),
    "Research 4": (8.67, 12.46, 12.34, 10.62),
    "Research 5": (8.64, 14.49, 12.34, 12.63),
    "Research 6": (12.74, 18.20, 16.50, 16.36),
    "Glaucoma 1 ORA": (17.88, 6.40, 21.58, 3.93),
    "Glaucoma 1 OCT": (15.64, 6.40, 17.77, 3.93),
    "Glaucoma 1 WF": (13.60, 6.37, 15.50, 3.93),
    "Glaucoma 2 ORA": (17.88, 9.04, 21.61, 6.57),
    "Glaucoma 2 OCT": (15.64, 9.01, 17.77, 6.57),
    "Glaucoma 2 WF": (13.63, 9.07, 15.53, 6.60),
    "Glaucoma 3 ORA": (17.91, 11.62, 21.61, 9.21),
    "Glaucoma 3 OCT": (15.70, 11.65, 17.79, 9.24),
    "Glaucoma 3 WF": (13.63, 11.65, 15.53, 9.21)
    }

rectangles2 = {
    "Reception": (0.06, 15.35, 7.06, 8.21),
    "Waiting and WC": (20.69, 6.46, 27.67, 1.55),
    "AF LHS": (10.33, 5.94, 12.37, 3.76),
    "AF RHS": (10.27, 8.27, 12.31, 6.17),
    "Glaucoma 1 WF": (10.30, 9.87, 11.62, 8.50),
    "Glaucoma 2 WF": (10.33, 11.54, 11.62, 10.07),
    "Glaucoma 3 WF": (10.30, 13.17, 11.60, 11.71),
    "Glaucoma 4 WF": (9.93, 16.39, 11.25, 14.90),
    "Glaucoma 5 WF": (9.93, 18.11, 11.25, 16.62),
    "Glaucoma 6 WF": (12.69, 18.11, 14.15, 16.85),
    "Glaucoma 7 WF": (14.29, 18.14, 15.79, 16.82),
    "HFA 1": (15.35, 6.43, 17.25, 4.91),
    "HFA 2": (15.35, 4.71, 17.22, 3.27),
    "HFA 3": (15.38, 3.04, 17.25, 1.64),
    "HFA 4": (18.40, 3.04, 20.29, 1.61),
    "HFA 5": (18.40, 4.76, 20.26, 3.30),
    "HFA 6": (18.40, 6.40, 20.32, 4.91),
    "Vision 1": (30.48, 7.46, 34.30, 5.71),
    "Vision 2": (30.48, 9.30, 34.24, 7.69),
    "Vision 3": (30.48, 11.19, 34.24, 9.53),
    "Vision 4": (30.45, 13.06, 34.24, 11.39),
    "Vision 5": (30.45, 14.95, 34.24, 13.29),
    "Vision 6": (30.48, 16.85, 34.24, 15.15),
    "Vision 7": (6.11, 1.89, 9.87, 0.14),
    "MR Dilation 1": (26.03, 13.78, 28.24, 8.27),
    "Research 1": (2.50, 7.81, 4.39, 4.08),
    "Research 2": (4.76, 7.78, 6.63, 4.10),
    "Research 3": (8.29, 11.31, 8.29, 11.31),
    "Research 4": (8.29, 7.35, 10.13, 3.67),
    "Glaucoma 1 ORA": (23.65, 13.66, 25.92, 8.21),
    "Glaucoma 2 ORA": (21.47, 13.69, 23.53, 8.21),
    "Glaucoma 3 ORA": (19.11, 13.66, 21.21, 8.24),
    "Glaucoma 1 OCT": (13.23, 9.96, 15.35, 8.21),
    "Glaucoma 2 OCT": (13.26, 11.88, 15.38, 10.19),
    "Glaucoma 3 OCT": (13.23, 13.83, 15.38, 12.08),
    "Glaucoma 4 OCT": (18.63, 16.07, 20.75, 14.26),
    "Glaucoma 5 OCT": (16.85, 13.78, 19.03, 12.03),
    "Glaucoma 6 OCT": (16.93, 11.77, 19.00, 10.13),
    "Glaucoma 7 OCT": (16.93, 9.87, 19.00, 8.21)
}

rectangles3 = {
    "Reception": (0.17, 15.10, 7.29, 7.98),
    "WC": (24.95, 3.33, 27.67, 1.66),
    "MR 1": (0.11, 3.87, 2.47, 0.11),
    "MR 2": (4.71, 3.85, 6.49, 0.11),
    "MR 3": (6.63, 3.82, 8.09, 0.11),
    "MR 4": (8.27, 3.85, 9.70, 0.14),
    "HFA 1": (17.02, 8.06, 19.06, 6.11),
    "HFA 2": (17.05, 10.07, 19.03, 8.27),
    "HFA 3": (16.99, 12.26, 19.06, 10.39),
    "HFA 4": (19.20, 8.06, 21.21, 6.14),
    "HFA 5": (19.17, 10.16, 21.21, 8.32),
    "HFA 6": (19.20, 12.28, 21.24, 10.42),
    "WF 1": (31.54, 17.62, 33.75, 16.10),
    "WF 2": (31.57, 15.93, 33.78, 14.49),
    "WF 3": (31.57, 14.29, 33.75, 12.74),
    "WF 4": (31.57, 12.54, 33.78, 11.11),
    "WF 5": (31.60, 10.88, 33.75, 9.39),
    "WF 6": (31.54, 9.18, 33.78, 7.69),
    "WF 7": (31.57, 7.49, 33.78, 6.03),
    "Research 1": (2.96, 7.63, 6.06, 5.57),
    "Research 2": (2.67, 2.76, 4.56, 0.06),
    "Research 3": (17.65, 15.81, 20.69, 13.75),
    "Research 4": (12.92, 17.74, 15.99, 15.81),
    "Wait bottom": (14.64, 4.45, 22.44, 1.58),
    "Wait top left": (8.32, 14.84, 14.64, 13.63),
    "Wait top right": (24.74, 17.51, 31.28, 15.96),
    "Glaucoma 1": (8.15, 8.18, 14.49, 6.06),
    "Glaucoma 2": (8.06, 10.82, 14.52, 8.55),
    "Glaucoma 3": (8.12, 13.37, 14.49, 11.16),
    "Glaucoma 5 OCT": (27.95, 12.80, 30.48, 10.96),
    "Glaucoma 6 OCT": (25.37, 12.77, 27.81, 10.94),
    "Glaucoma 7 OCT": (22.73, 12.77, 25.23, 10.94),
    "MR 1 OCT": (22.76, 10.71, 25.26, 8.90),
    "MR 2 OCT": (25.37, 10.76, 27.81, 8.93),
    "MR 3 OCT": (27.98, 10.73, 30.45, 8.93),
    "MR 4 OCT": (24.91, 6.31, 26.84, 3.82)
 }

rectangles4 = {
    "Reception": (0.14, 15.18, 7.15, 9.56),
    "WC": (24.95, 3.33, 27.67, 1.66),
    "Wait 1": (2.84, 8.98, 6.03, 7.81),
    "Wait 2": (2.55, 5.37, 5.97, 4.25),
    "Wait Dilation 3": (14.64, 3.07, 21.96, 1.55),
    "Wait Dilation 4": (17.13, 5.48, 21.47, 4.62),
    "Wait 5": (24.88, 17.51, 30.11, 15.73),
    "Wait 6": (8.32, 18.05, 12.77, 15.99),
    "Glaucoma 1 ORA AF": (8.58, 14.18, 14.98, 11.94),
    "Glaucoma 1 ORA AF": (8.58, 11.65, 15.01, 9.56),
    "MR Lane 1": (8.61, 9.24, 15.04, 7.06),
    "MR Lane 2": (8.58, 6.74, 15.01, 4.53),
    "Cataract pre-op left": (2.38, 2.73, 5.63, 0.23),
    "Cataract pre-op right": (5.85, 2.70, 9.13, 0.20),
    "Cataract consult 1": (30.22, 17.51, 34.27, 15.93),
    "Cataract consult 2": (31.92, 15.67, 34.27, 12.71),
    "Cataract consult 3": (31.92, 11.82, 34.15, 8.70),
    "HFA 1": (17.22, 12.00, 19.26, 10.07),
    "HFA 2": (17.22, 9.79, 19.23, 7.86),
    "HFA 3": (19.34, 12.00, 21.38, 10.13),
    "HFA 4": (19.40, 9.87, 21.44, 7.92),
    "CAT ANT": (17.28, 7.58, 19.26, 5.74),
    "CAT OCT": (19.40, 7.61, 21.44, 5.74),
    "MR OCT top": (24.74, 7.06, 27.81, 5.17),
    "MR OCT bottom": (24.74, 5.02, 27.64, 3.27),
    "GL OCT WF top": (23.36, 12.31, 26.43, 10.56),
    "GL OCT WF bottom": (23.31, 10.36, 26.43, 8.55),
    "GL WF left": (26.58, 12.14, 28.24, 10.62),
    "GL WF right": (28.39, 12.11, 29.99, 10.59),
    "MR WF": (26.58, 10.48, 28.21, 8.87),
    "MR OCT": (28.44, 10.45, 30.05, 8.87),
    "Research 1": (2.53, 7.52, 5.63, 5.57),
    "Research 2": (17.91, 15.87, 21.07, 13.89),
    "Research 3": (13.23, 17.97, 16.33, 16.04)
}


In [7]:
def process_tasks(task_list_csv, rectangles, output_csv_path="combined_output.csv"):
    """
    Processes tasks from a task list CSV, appends results, and sorts by 'starttime'.

    Args:
        task_list_csv (str): Path to the CSV containing task information.
        rectangles: A dictionary where keys are rectangle names (strings) and
                    values are tuples/lists defining the rectangles (x1, y1, x2, y2).
        output_csv_path (str): The path to the output CSV file.
    """
    try:
        tasks_df = pd.read_csv(task_list_csv)
        #print("CSV Column Names:", tasks_df.columns.tolist())
    except FileNotFoundError:
        print(f"Error: Task list CSV not found at {task_list_csv}")
        return
    except Exception as e:
        print(f"Error reading task list CSV: {e}")
        return

    print(tasks_df)

    all_data = pd.DataFrame()  # Initialize an empty DataFrame

    for index, row in tasks_df.iterrows():
        file_path = row['csv_file_path']
        patient_id = row['patient_id_to_filter']
        unique_patient_id = row['Unique_Patient_ID'] # this is the one used by Rosica

        filtered_data = filter_patient_data(file_path, patient_id)

        print(index)

        if isinstance(filtered_data, pd.DataFrame):
            if not filtered_data.empty:
                try:
                    filtered_data[['xlocation', 'ylocation']] = filtered_data['Location'].apply(lambda loc: tuple(map(float, loc.replace('"', '').split(',')))).tolist()
                    filtered_data['starttime'] = pd.to_datetime(filtered_data['starttime'])

                except (ValueError, AttributeError):
                    print(f"Error processing {file_path}: Could not convert xlocation or ylocation to floats. Check data format.")
                    continue

                filtered_data['within_rectangle'] = ""

                for rect_name, rect_coords in rectangles.items():
                    filtered_data.loc[filtered_data.apply(lambda row: is_location_within_rectangle(row['xlocation'], row['ylocation'], rect_coords), axis=1), 'within_rectangle'] += rect_name + ","

                filtered_data['within_rectangle'] = filtered_data['within_rectangle'].str.rstrip(',')
                filtered_data['Unique_Patient_ID'] = unique_patient_id # Add Unique_Patient_ID


                all_data = pd.concat([all_data, filtered_data], ignore_index=True)

            else:
                print(f"No data found for Patient ID: {patient_id} in file: {file_path}")
        elif isinstance(filtered_data, str):
            print(f"Error processing {file_path}: {filtered_data}")
        else:
            print(f"An unexpected error occurred while processing {file_path}")

    if not all_data.empty:
        all_data = all_data.sort_values(by='starttime')
        all_data.to_csv(output_csv_path, index=False)
        print(f"Combined data saved to {output_csv_path}")
    else:
        print("No data was processed.")




In [9]:
task_list_csv = "ethno_patients1.csv"
process_tasks(task_list_csv, rectangles1, "combined_data1.csv")

   csv_file_path patient_id_to_filter Unique_Patient_ID
0   P1_input.csv                G0561             BX357
1   P1_input.csv                G0579            BX3727
2   P1_input.csv                G0589            BX3517
3   P1_input.csv                G0598            BX3520
4   P1_input.csv                G0605            BX3511
5   P1_input.csv                G0701            BX3555
6   P1_input.csv                G0727            BX3751
7   P1_input.csv                G0744            BX3812
8   P1_input.csv                G0748            BX3750
9   P1_input.csv                G0774            BX3930
10  P1_input.csv                G0808            BX3749
11  P1_input.csv                G1002            BX3729
12  P1_input.csv                G1039            BX3746
13  P1_input.csv                R0564             BX356
14  P1_input.csv                R0577            BX3516
15  P1_input.csv                R0597            BX3525
16  P1_input.csv                R0608           

In [8]:
task_list_csv = "ethno_patients2.csv"
process_tasks(task_list_csv, rectangles2, "combined_data2.csv")

  csv_file_path patient_id_to_filter Unique_Patient_ID
0  P2_input.csv                G1238            BX3911
1  P2_input.csv                G1289             BX389
2  P2_input.csv                G1293            BX3920
3  P2_input.csv                G1308            BX3527
4  P2_input.csv                R1020            BX3741
5  P2_input.csv                R1373             BX376
0
1
2
3
4
No data found for Patient ID: R1020 in file: P2_input.csv
5
No data found for Patient ID: R1373 in file: P2_input.csv
Combined data saved to combined_data2.csv


In [10]:
task_list_csv = "ethno_patients3.csv"
process_tasks(task_list_csv, rectangles3, "combined_data3.csv")

    csv_file_path patient_id_to_filter Unique_Patient_ID
0    P3_input.csv                G1838             BX218
1    P3_input.csv                G1839             BX219
2    P3_input.csv                G1840            BX2111
3    P3_input.csv                G1841            BX2112
4    P3_input.csv                G1842            BX2123
..            ...                  ...               ...
97   P3_input.csv                R1903            BX4520
98   P3_input.csv                R1904            BX4530
99   P3_input.csv                R1905            BX4544
100  P3_input.csv                R1906            BX4546
101  P3_input.csv                R4098             BX333

[102 rows x 3 columns]
0
No data found for Patient ID: G1838 in file: P3_input.csv
1
No data found for Patient ID: G1839 in file: P3_input.csv
2
No data found for Patient ID: G1840 in file: P3_input.csv
3
No data found for Patient ID: G1841 in file: P3_input.csv
4
No data found for Patient ID: G1842 in file: P3_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[['xlocation', 'ylocation']] = filtered_data['Location'].apply(lambda loc: tuple(map(float, loc.replace('"', '').split(',')))).tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['starttime'] = pd.to_datetime(filtered_data['starttime'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-

73
74
No data found for Patient ID: G4213 in file: P3_input.csv
75
76
77
78
No data found for Patient ID: R1427 in file: P3_input.csv
79
No data found for Patient ID: R1878 in file: P3_input.csv
80
No data found for Patient ID: R1879 in file: P3_input.csv
81
No data found for Patient ID: R1880 in file: P3_input.csv
82
No data found for Patient ID: R1881 in file: P3_input.csv
83
No data found for Patient ID: R1884 in file: P3_input.csv
84
No data found for Patient ID: R1885 in file: P3_input.csv
85
No data found for Patient ID: R1886 in file: P3_input.csv
86
No data found for Patient ID: R1887 in file: P3_input.csv
87
No data found for Patient ID: R1888 in file: P3_input.csv
88
No data found for Patient ID: R1889 in file: P3_input.csv
89
No data found for Patient ID: R1893 in file: P3_input.csv
90
No data found for Patient ID: R1894 in file: P3_input.csv
91
No data found for Patient ID: R1896 in file: P3_input.csv
92
No data found for Patient ID: R1897 in file: P3_input.csv
93
No data f

In [11]:
task_list_csv = "ethno_patients4.csv"
process_tasks(task_list_csv, rectangles4, "combined_data4.csv")

   csv_file_path patient_id_to_filter Unique_Patient_ID
0   P4_input.csv                G3008             BX489
1   P4_input.csv                G3014            BX4832
2   P4_input.csv                G3015            BX4834
3   P4_input.csv                G3016            BX4845
4   P4_input.csv                R2008            BX4811
5   P4_input.csv                R2009            BX4813
6   P4_input.csv                R2018            BX4833
7   P4_input.csv                R2021            BX4847
8   P4_input.csv                R2024            BX4852
9   P4_input.csv                R2025            BX4858
10  P4_input.csv                C0115             BX422
0
1
2
3
4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[['xlocation', 'ylocation']] = filtered_data['Location'].apply(lambda loc: tuple(map(float, loc.replace('"', '').split(',')))).tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['starttime'] = pd.to_datetime(filtered_data['starttime'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-

5
6
7
8
9
10
Combined data saved to combined_data4.csv


# Extra

process_data was used when processing single files

In [None]:
def process_data(csv_file_path, patient_id_to_filter, rectangles, output_csv_path):
    """Processes data, checking against multiple rectangles.

    Args:
        csv_file_path: Path to CSV.
        patient_id_to_filter: Patient ID.
        rectangles: A dictionary where keys are rectangle names (strings) and
                    values are tuples/lists defining the rectangles (x1, y1, x2, y2).
        output_file_path: Path to output text file.
    """

    filtered_data = filter_patient_data(csv_file_path, patient_id_to_filter)

    if isinstance(filtered_data, pd.DataFrame):
        if not filtered_data.empty:
            try:
                filtered_data[['xlocation', 'ylocation']] = filtered_data['Location'].apply(lambda loc: tuple(map(float, loc.replace('"', '').split(',')))).tolist()
                #Convert starttime to datetime
                filtered_data['starttime'] = pd.to_datetime(filtered_data['starttime'])

            except (ValueError, AttributeError):
                return "Error: Could not convert xlocation or ylocation to floats. Check data format."

            filtered_data['within_rectangle'] = ""  # Initialize the column

            for rect_name, rect_coords in rectangles.items():
                filtered_data.loc[filtered_data.apply(lambda row: is_location_within_rectangle(row['xlocation'], row['ylocation'], rect_coords), axis=1), 'within_rectangle'] += rect_name + "," # Append the rectangle name, comma separated.

            #Remove the trailing comma
            filtered_data['within_rectangle'] = filtered_data['within_rectangle'].str.rstrip(',')

            # Sort by starttime
            filtered_data = filtered_data.sort_values(by='starttime')
            print(filtered_data.to_string())
            # Save to CSV
            filtered_data.to_csv(output_csv_path, index=False)  # index=False prevents writing the index column



        else:
            print(f"No data found for Patient ID: {patient_id_to_filter}")
    elif isinstance(filtered_data, str):
        print(filtered_data)
    else:
        print("An unexpected error occurred.")



Extra bit of code to loop through all the data to see which records are not in any of the input files - takes a few mins to run

In [None]:
import os

def check_patient_id_files(task_list_csv, input_files, output_file="patient_id_file_mapping.txt"):
    """
    Checks if patient IDs from a task list CSV are present in specified input CSV files.

    Args:
        task_list_csv (str): Path to the task list CSV.
        input_files (list): List of paths to the input CSV files.
        output_file (str): Path to the output text file.
    """

    try:
        tasks_df = pd.read_csv(task_list_csv)
    except FileNotFoundError:
        print(f"Error: Task list CSV not found at {task_list_csv}")
        return
    except Exception as e:
        print(f"Error reading task list CSV: {e}")
        return

    results = [] # list to store results

    for index, row in tasks_df.iterrows():
        patient_id = row['patient_id_to_filter']
        found_in_files = []

        for input_file in input_files:
            try:
                df = pd.read_csv(input_file)
                if patient_id in df['Patient'].values: # Check if patient_id is present
                    found_in_files.append(os.path.basename(input_file)) # Append file name to the list
            except FileNotFoundError:
                print(f"Warning: Input file not found: {input_file}")
            except Exception as e:
                print(f"Warning: Error reading input file {input_file}: {e}")

        if found_in_files:
            results.append(f"Patient ID: {patient_id} found in: {', '.join(found_in_files)}")
        else:
            results.append(f"Patient ID: {patient_id} not found in any specified files.")

    # Write results to output file
    with open(output_file, "w") as f:
        for result in results:
            f.write(result + "\n")

    print(f"Patient ID to file mapping written to {output_file}")

# Example Usage:
task_list_csv = "ethno_patients.csv"  # Replace with your task list CSV path
input_files = ["P1_input.csv", "P2_input.csv", "P3_input.csv", "P4_input.csv"] # list of input files

check_patient_id_files(task_list_csv, input_files, "patient_file_mapping.txt") # specify output file