### Training algorithm validation - sliding windows selection

This algorithm is designed to generate sliding windows over time-series-like patient data for model training, ensuring that all entries in a given window belong to the same patient

In [5]:
import pandas as pd

# Example DataFrame with an additional 'iculos' column
data = {
    'file_path': ['patient_001', 'patient_002', 'patient_001', 'patient_003', 'patient_002', 
                  'patient_001', 'patient_002', 'patient_003', 'patient_002', 'patient_001',
                  'patient_001', 'patient_002', 'patient_003', 'patient_002', 'patient_001',
                  'patient_003', 'patient_002', 'patient_003', 'patient_002', 'patient_001'],
    'iculos': [5, 3, 7, 2, 1, 6, 4, 8, 5, 2, 9, 7, 6, 8, 3, 1, 10, 4, 5, 6]  # Mock data for the 'iculos' column
}
df = pd.DataFrame(data)

# Step 1: Map file_path to unique integers and replace the column
unique_patients = df['file_path'].unique()
patient_to_int = {patient_id: idx for idx, patient_id in enumerate(unique_patients)}
df['file_path'] = df['file_path'].map(patient_to_int)

# Step 2: Sort the DataFrame by 'file_path' and 'iculos'
df_sorted = df.sort_values(by=['file_path', 'iculos']).reset_index(drop=True)

# Step 3: Pre-calculate beginning and end indices
beginning_index_dict = {}
end_index_dict = {}

i = 0
start = 0
end = 0

for file_path, group in df_sorted.groupby('file_path'):
    start = group.index[0]
    end = start + 3
    length = len(df_sorted)
    
    while start < group.index[-1]:
        if end >= length:
            end = length
            if df_sorted.iloc[start]['file_path'] == df_sorted.iloc[end-1]['file_path']:
                beginning_index_dict[i] = start
                end_index_dict[i] = end
            break
        if df_sorted.iloc[start]['file_path'] == df_sorted.iloc[end-1]['file_path']:
            beginning_index_dict[i] = start
            end_index_dict[i] = end
            i += 1
            start += 1
            end = start + 3
        else:
            break
        
# Display the dictionaries
print("Beginning Index Dictionary:")
print(beginning_index_dict)
print("\nEnd Index Dictionary:")
print(end_index_dict)

# Example usage function to get window indices
def get_window_indices(window_number):
    start_idx = beginning_index_dict.get(window_number)
    end_idx = end_index_dict.get(window_number)
    return start_idx, end_idx

# Example usage to select a window
def get_window_data(window_number):
    start_idx, end_idx = get_window_indices(window_number)
    if start_idx is not None and end_idx is not None:
        return df_sorted.iloc[start_idx:end_idx]
    else:
        return None

# Example: Get window for window number 13
window_number_example = 13
window_data = get_window_data(window_number_example)

print("\nExample window data for window number 13:")
print(window_data)

# Example: Get window for window number 5
window_number_example_2 = 5
window_data_2 = get_window_data(window_number_example_2)

print("\nExample window data for window number 5:")
print(window_data_2)

Beginning Index Dictionary:
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 7, 6: 8, 7: 9, 8: 10, 9: 11, 10: 12, 11: 15, 12: 16, 13: 17}

End Index Dictionary:
{0: 3, 1: 4, 2: 5, 3: 6, 4: 7, 5: 10, 6: 11, 7: 12, 8: 13, 9: 14, 10: 15, 11: 18, 12: 19, 13: 20}

Example window data for window number 13:
    file_path  iculos
17          2       4
18          2       6
19          2       8

Example window data for window number 5:
   file_path  iculos
7          1       1
8          1       3
9          1       4
