In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
# This script loads patient data from parquet files in a specified folder
def load_all_patient_data(folder_path):
    """
    Load all patient data from parquet files in the specified folder.

    Parameters:
    folder_path (str): Path to the folder containing parquet files.

    Returns:
    pd.DataFrame: Combined DataFrame with data from all files.
    """
    all_data = []
    try:
        for filename in os.listdir(folder_path):
            if filename.endswith('.parquet'):
                file_path = os.path.join(folder_path, filename)
                df = pd.read_parquet(file_path)
                all_data.append(df)
        
        if all_data:
            combined_data = pd.concat(all_data, ignore_index=True)
            return combined_data
        else:
            print("No .parquet files found in the folder.")
            return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

if __name__ == "__main__":
    folder_path = './Readmitted_patients'
    data = load_all_patient_data(folder_path)
    if data is not None:
        print("Data loaded successfully.")
        # print(data)
    else:
        print("Failed to load data.")
    for subject_id in data['subject_id'].unique():
        print(f"Processing data for subject_id: {subject_id}")
        subject_data = data[data['subject_id'] == subject_id]
        # plt.plot.figure(figsize=(10, 6))
        # Assuming you want to visualize or process the subject's data
        # For example, you could plot some data or perform analysis
        # Here we just print the subject's data for demonstration
        # print(subject_data)  # Display the data for the current subject_id
        # Example of processing: plotting a specific column with None values handled
        subject_data = subject_data.dropna(subset=['valuenum'])

        # If you want to visualize the data, you can use matplotlib or seaborn here
        # For example:
        # plt.plot((subject_data['max_chart_ts']-subject_data['chart_ts'])/3600,subject_data['valuenum'],'o')  # Replace 'some_column' with actual column name
        import matplotlib.pyplot as plt

        # Compute time difference in hours
        subject_data['time_diff_hours'] = (subject_data['max_chart_ts'] - subject_data['chart_ts']) / 3600
        subject_data['valuenum'] = pd.to_numeric(subject_data['valuenum'], errors='coerce')
        # Create the plot
        plt.figure(figsize=(12, 6))

        # Group by itemid and plot each group
        for itemid, group in subject_data.groupby('itemid'):
            # Sort by time if not already sorted
            group = group.sort_values('time_diff_hours')
            
            plt.plot(
                group['time_diff_hours'],
                group['valuenum'],
                label=f'ItemID {itemid}'
            )

        plt.xlabel('Hours from chart time to max_chart_ts')
        plt.ylabel('ValueNum')
        plt.title('ValueNum vs Time (per ItemID)')
        plt.legend(title='ItemID', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.grid(True)
        plt.tight_layout()
        plt.gca().invert_xaxis()

            
        
        # Plot using itemid as color 
        plt.xlabel('Hours before discharge')
        plt.ylabel('Value Number')
10        plt.title(f"Data for subject_id: {subject_id}")
        plt.show()
    
        # Here you can add further processing for each subject's data
        print(subject_data.head())  # Display the first few rows of the subject's data




In [None]:
import matplotlib.pyplot as plt

# Compute time difference in hours
subject_data['time_diff_hours'] = (subject_data['max_chart_ts'] - subject_data['chart_ts']) / 3600

# Create the plot
plt.figure(figsize=(12, 6))

# Group by itemid and plot each group
for itemid, group in subject_data.groupby('itemid'):
    # Sort by valuenum (not time)
    group = group.sort_values('valuenum')

    # Plot with sorted valuenum (Y), time_diff_hours (X reordered accordingly)
    plt.plot(
        group['time_diff_hours'],
        group['valuenum'],
        label=f'ItemID {itemid}'
    )

plt.xlabel('Time Difference (hours)')
plt.ylabel('ValueNum (sorted within group)')
plt.title('ValueNum vs Time Difference (Sorted by ValueNum within each ItemID)')
plt.legend(title='ItemID', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()