In [40]:
import pandas as pd
import numpy as np

In [41]:
def perturb_data(df, seed=None, scale=0.03):
    """
    Function to perturb data in a DataFrame randomly but reproducibly.
    
    Parameters:
        df (pd.DataFrame): Input DataFrame.
        seed (int): Seed for reproducibility.
        scale (float): Scaling factor for perturbation.
        
    Returns:
        pd.DataFrame: Perturbed DataFrame.
    """
    if seed is not None:
        np.random.seed(seed)
    
    # Iterate over each row (excluding the first one which is assumed to be the index)
    for idx, row in df.iterrows():
        # Find non-missing values in the row
        non_missing_indices = row.iloc[1:].dropna().index
        
        # If there are no non-missing values, skip this row
        if len(non_missing_indices) == 0:
            continue
        
        # Calculate the noise to add to non-missing values
        noise = np.random.normal(0, scale, len(non_missing_indices))
        
        # Add the noise to non-missing values
        for col_idx in non_missing_indices:
            df.at[idx, col_idx] += noise[non_missing_indices.get_loc(col_idx)]
        
        # Limit floats to two decimal places for all rows
        df.iloc[idx, 1:] = df.iloc[idx, 1:].astype(float).round(2)
        
        # Check if 'Glasgow Coma Score' or 'NEWS' is in the 'Metric / Time (hours)' column
        if 'Glasgow Coma Score' in row['Metric / Time (hours)'] or 'NEWS' in row['Metric / Time (hours)']:
            # Fill NaN values with a placeholder (-1)
            df.iloc[idx, 1:] = df.iloc[idx, 1:].fillna(-1)
            # Round to integers for rows containing 'Glasgow Coma Score' or 'NEWS'
            df.iloc[idx, 1:] = df.iloc[idx, 1:].astype(int)
            # Convert back placeholder values (-1) to NaN
            df.iloc[idx, 1:] = df.iloc[idx, 1:].replace(-1, np.nan)
    
    return df

In [42]:
# Slightly change patient_1 for patient_1_demo

# Read CSV
origional_df = pd.read_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_1.csv')
# Make a copy of the original DataFrame
origional_df_copy = origional_df.copy()
# Perturb the data
perturbed_df = perturb_data(origional_df_copy, seed=0, scale=0.05)
print(origional_df)
# Save df
perturbed_df.to_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_1_demo.csv', index=False)
print(perturbed_df)

        Metric / Time (hours)       0       6      12      18      24      30
0                 Temperature   37.78   38.47   39.28   39.28   39.28   39.28
1            Respiratory Rate   19.39   19.56   22.05   20.32   21.57   21.53
2                  Heart Rate   85.53   88.40  106.98  126.53  116.17  121.83
3    Diastolic Blood Pressure   79.62   78.53   74.54   77.25   78.36   75.99
4     Systolic Blood Pressure  144.54  143.46  127.32  134.64  135.35  126.09
5      Mean Arterial Pressure  112.09  111.00  100.93  105.95  106.85  101.04
6                        SpO2   96.40   96.18   94.35   96.22   94.39   98.12
7          Glasgow Coma Score   15.00   15.00   14.00   15.00   15.00   15.00
8  NEWS Conscious Level Score    0.00    0.00    0.00    0.00    0.00    0.00
9     NEWS Supplemental Oxgen    2.00    2.00    2.00    2.00    2.00    2.00
        Metric / Time (hours)       0       6      12      18      24      30
0                 Temperature   37.87   38.49   39.33   39.39   

In [43]:
# Slightly change patient_2 for patient_2_demo

# Read CSV
origional_df = pd.read_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_2.csv')
# Make a copy of the original DataFrame
origional_df_copy = origional_df.copy()
# Perturb the data
perturbed_df = perturb_data(origional_df_copy, seed=0, scale=0.05)
# Save df
perturbed_df.to_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_2_demo.csv', index=False)

In [44]:
# Slightly change patient_9 for patient_3_demo

# Read CSV
origional_df = pd.read_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_9.csv')
# Make a copy of the original DataFrame
origional_df_copy = origional_df.copy()
# Perturb the data
perturbed_df = perturb_data(origional_df_copy, seed=0, scale=0.05)
# Save df
perturbed_df.to_csv('/home/wb1115/VSCode_projects/cdss/cdss_1/csv/vitals/patient_3_demo.csv', index=False)