In [2]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

def patient_data(patient_id):
    # Read CSV files into DataFrames with memory optimization
    lab_value_df = pd.read_csv('lab_value.csv', usecols=['case_id', 'lab_value_label_id', 'lab_value', 'record_time'])
    vital_value_df = pd.read_csv('vital_value.csv', usecols=['case_id', 'vital_value_label_id', 'vital_value', 'record_time'])
    admission_df = pd.read_csv('admission.csv', usecols=['patient_id', 'case_id', 'admission_time', 'discharge_time', 'patient_height', 'patient_weight'])
    lab_value_label_df = pd.read_csv('lab_value_label.csv', usecols=['id', 'label_name'])
    vital_value_label_df = pd.read_csv('vital_value_label.csv', usecols=['id', 'label_name'])

    # Check if the patient_id exists in admission_df
    if patient_id not in admission_df['patient_id'].unique():
        print(f"Patient with patient_id '{patient_id}' not found.")
        return None

    # Merge vital_value_df with vital_value_label_df to get label_name for vital_value
    vital_value_df = pd.merge(vital_value_df, vital_value_label_df, left_on='vital_value_label_id', right_on='id', how='left')
    vital_value_df = vital_value_df[['case_id', 'vital_value', 'record_time', 'label_name']]

    # Filter vital_value data for the given patient_id
    patient_vital_data = vital_value_df[vital_value_df['case_id'].isin(admission_df[admission_df['patient_id'] == patient_id]['case_id'])]

    if patient_vital_data.empty:
        # If no vital_value records found, create a dummy DataFrame
        patient_vital_data = pd.DataFrame({'case_id': ['N/A'], 'vital_value': ['N/A'], 'record_time': ['N/A'], 'label_name': ['N/A']})

    # Merge lab_value_df with lab_value_label_df to get label_name for lab_value
    lab_value_df = pd.merge(lab_value_df, lab_value_label_df, left_on='lab_value_label_id', right_on='id', how='left')
    lab_value_df = lab_value_df[['case_id', 'lab_value', 'record_time', 'label_name']]

    # Filter lab_value data for the given patient_id
    patient_lab_data = lab_value_df[lab_value_df['case_id'].isin(admission_df[admission_df['patient_id'] == patient_id]['case_id'])]

    if patient_lab_data.empty:
        # If no lab_value records found, create a dummy DataFrame
        patient_lab_data = pd.DataFrame({'case_id': ['N/A'], 'lab_value': ['N/A'], 'record_time': ['N/A'], 'label_name': ['N/A']})

    # Concatenate vital_value and lab_value data for the patient
    patient_data_df = pd.concat([patient_vital_data, patient_lab_data])

    # Merge patient information with patient_data_df
    patient_data_df = patient_data_df.merge(admission_df, on='case_id', how='left')
    
    # Replace NaN (missing) values with the mean for 'vital_value' and 'lab_value' columns
    patient_data_df['vital_value'].fillna(patient_data_df['vital_value'].mean(), inplace=True)
    patient_data_df['lab_value'].fillna(patient_data_df['lab_value'].mean(), inplace=True)

    return patient_data_df


# Call the function to get the patient data
patient_data_df = patient_data(2144009)

if patient_data_df is not None:
    print(patient_data_df)


      case_id  vital_value          record_time     label_name  lab_value  \
0    37097567   132.000000       4/6/2017 17:42  RR (systolic)  84.206161   
1    37097567    36.800000        4/7/2017 2:30              T  84.206161   
2    37097567    74.000000       4/6/2017 21:33     HF_Pulsoxy  84.206161   
3    37097567   120.000000       4/5/2017 16:00  RR (systolic)  84.206161   
4    37097567   148.000000       4/6/2017 18:10     HF_Pulsoxy  84.206161   
..        ...          ...                  ...            ...        ...   
971  37097567    67.551515  2017-04-05 05:19:00    TZ (CS5100)  18.000000   
972  37097567    67.551515  2017-04-06 05:27:00    TZ (CS5100)  16.000000   
973  37097567    67.551515  2017-04-07 04:20:00    TZ (CS5100)  15.000000   
974  37097567    67.551515  2017-04-04 12:32:00          TZ HA  27.000000   
975  37097567    67.551515  2017-04-04 13:57:00          TZ HA  25.000000   

     patient_id       admission_time       discharge_time  patient_height  