In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [4]:
# Load CSV files into DataFrames
lab_value_df = pd.read_csv('lab_value.csv')
vital_value_df = pd.read_csv('vital_value.csv')
admission_df = pd.read_csv('admission.csv')
lab_value_label_df = pd.read_csv('lab_value_label.csv')
vital_value_label_df = pd.read_csv('vital_value_label.csv')


In [5]:
#Remove null values:

info= ["lab_value.csv", "vital_value.csv", "admission.csv","lab_value_label.csv","vital_value_label.csv"]

# Data Information
data_info = []

# Load and clean each DataFrame
for data in info:
    df = pd.read_csv(data)
    if df is not None:
        df_cleaned = df.dropna()
        data_info.append(df_cleaned)

# Print information about cleaned DataFrames
for df_cleaned in data_info:
    print(df_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6143467 entries, 0 to 6143466
Data columns (total 6 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   case_id             int64 
 1   lab_value_label_id  int64 
 2   sample_type_id      int64 
 3   record_time         object
 4   lab_value           int64 
 5   unit_type_id        int64 
dtypes: int64(5), object(1)
memory usage: 328.1+ MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1048172 entries, 0 to 1048574
Data columns (total 4 columns):
 #   Column                Non-Null Count    Dtype  
---  ------                --------------    -----  
 0   case_id               1048172 non-null  int64  
 1   vital_value_label_id  1048172 non-null  int64  
 2   record_time           1048172 non-null  object 
 3   vital_value           1048172 non-null  float64
dtypes: float64(1), int64(2), object(1)
memory usage: 40.0+ MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5523 entries, 1 to 5523
Dat

In [6]:
top_rows = [pd.read_csv(data).head(1000) for data in info]
top_rows

[      case_id  lab_value_label_id  sample_type_id          record_time  \
 0    37033628                  15              79  2017-07-24 08:32:00   
 1    37033628                  42              79  2017-01-12 06:07:00   
 2    37033628                  43              79  2017-06-06 13:35:00   
 3    37033628                  43              79  2017-06-06 14:03:00   
 4    37033628                  44              79  2017-01-12 06:07:00   
 ..        ...                 ...             ...                  ...   
 995  37033628                 188              79  2017-03-28 07:15:00   
 996  37033628                 188              79  2017-04-04 06:53:00   
 997  37033628                 188              79  2017-04-11 05:18:00   
 998  37033628                 188              79  2017-04-25 09:18:00   
 999  37033628                 188              79  2017-05-09 06:51:00   
 
      lab_value  unit_type_id  
 0           25            47  
 1           10             1  
 2

In [7]:
# Merge lab_value and vital_value DataFrames based on lab_value and vital value
#Both_DFs contains lab_value_label_id and vital_value_label_id

df1= pd.merge(lab_value_df.head(1000),vital_value_df.head(1000), left_on=['lab_value'],right_on=['vital_value'])
print(df1.head())

   case_id_x  lab_value_label_id  sample_type_id        record_time_x  \
0   37033628                  15              79  2017-07-24 08:32:00   
1   37033628                  15              79  2017-07-24 08:32:00   
2   37033628                  15              79  2017-07-24 08:32:00   
3   37033628                  15              79  2017-07-24 08:32:00   
4   37033628                  15              79  2017-07-24 08:32:00   

   lab_value  unit_type_id  case_id_y  vital_value_label_id    record_time_y  \
0         25            47   39032093                     1   1/28/2019 9:00   
1         25            47   38375865                     1   1/7/2019 10:30   
2         25            47   38117889                     1  3/25/2018 18:30   
3         25            47   39051551                     1   3/3/2019 10:30   
4         25            47   37198001                     1   9/7/2017 10:30   

   vital_value  
0         25.0  
1         25.0  
2         25.0  
3         25



In [8]:
# Merge lab_value and vital_value DataFrames based on admission and Both_DFs
# final_df contains patient_id and case_id 

df2 = pd.merge(df1.head(1000),admission_df.head(1000), left_on=['case_id_x'],right_on=['case_id'])
print(df2.head())

   case_id_x  lab_value_label_id  sample_type_id        record_time_x  \
0   37033628                  15              79  2017-07-24 08:32:00   
1   37033628                  15              79  2017-07-24 08:32:00   
2   37033628                  15              79  2017-07-24 08:32:00   
3   37033628                  15              79  2017-07-24 08:32:00   
4   37033628                  15              79  2017-07-24 08:32:00   

   lab_value  unit_type_id  case_id_y  vital_value_label_id    record_time_y  \
0         25            47   39032093                     1   1/28/2019 9:00   
1         25            47   38375865                     1   1/7/2019 10:30   
2         25            47   38117889                     1  3/25/2018 18:30   
3         25            47   39051551                     1   3/3/2019 10:30   
4         25            47   37198001                     1   9/7/2017 10:30   

   vital_value  patient_id   case_id       admission_time  \
0         25.0     

In [9]:
patient_lab_data = df1[['case_id_x', 'lab_value', 'lab_value_label_id', 'vital_value', 'record_time_x', 'vital_value_label_id' ]]
print(patient_lab_data)


      case_id_x  lab_value  lab_value_label_id  vital_value  \
0      37033628         25                  15         25.0   
1      37033628         25                  15         25.0   
2      37033628         25                  15         25.0   
3      37033628         25                  15         25.0   
4      37033628         25                  15         25.0   
...         ...        ...                 ...          ...   
3032   37033628         94                 121         94.0   
3033   37033628         94                 121         94.0   
3034   37033628         94                 121         94.0   
3035   37033628        152                 158        152.0   
3036   37033628        115                 158        115.0   

            record_time_x  vital_value_label_id  
0     2017-07-24 08:32:00                     1  
1     2017-07-24 08:32:00                     1  
2     2017-07-24 08:32:00                     1  
3     2017-07-24 08:32:00                  

In [10]:
patient_vital_data = df2 [['patient_id', 'case_id', 'admission_time', 'discharge_time', 'patient_age', 'patient_height', 'patient_weight','sample_type_id']]
print(patient_vital_data)

     patient_id   case_id       admission_time       discharge_time  \
0       1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
1       1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
2       1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
3       1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
4       1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
..          ...       ...                  ...                  ...   
995     1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
996     1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
997     1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
998     1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   
999     1066797  37033628  2017-01-03 11:40:00  2017-07-31 14:00:00   

     patient_age  patient_height  patient_weight  sample_type_id  
0             61             171              54              79  
1            

In [11]:
#Feteching data
patient_data = pd.merge(patient_lab_data, patient_vital_data, left_on=['case_id_x'],right_on=['case_id'])
print(patient_data)

         case_id_x  lab_value  lab_value_label_id  vital_value  \
0         37033628         25                  15         25.0   
1         37033628         25                  15         25.0   
2         37033628         25                  15         25.0   
3         37033628         25                  15         25.0   
4         37033628         25                  15         25.0   
...            ...        ...                 ...          ...   
3036995   37033628        115                 158        115.0   
3036996   37033628        115                 158        115.0   
3036997   37033628        115                 158        115.0   
3036998   37033628        115                 158        115.0   
3036999   37033628        115                 158        115.0   

               record_time_x  vital_value_label_id  patient_id   case_id  \
0        2017-07-24 08:32:00                     1     1066797  37033628   
1        2017-07-24 08:32:00                     1     

In [12]:
patient_data

Unnamed: 0,case_id_x,lab_value,lab_value_label_id,vital_value,record_time_x,vital_value_label_id,patient_id,case_id,admission_time,discharge_time,patient_age,patient_height,patient_weight,sample_type_id
0,37033628,25,15,25.0,2017-07-24 08:32:00,1,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
1,37033628,25,15,25.0,2017-07-24 08:32:00,1,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
2,37033628,25,15,25.0,2017-07-24 08:32:00,1,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
3,37033628,25,15,25.0,2017-07-24 08:32:00,1,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
4,37033628,25,15,25.0,2017-07-24 08:32:00,1,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3036995,37033628,115,158,115.0,2017-06-26 23:55:00,7,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
3036996,37033628,115,158,115.0,2017-06-26 23:55:00,7,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
3036997,37033628,115,158,115.0,2017-06-26 23:55:00,7,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79
3036998,37033628,115,158,115.0,2017-06-26 23:55:00,7,1066797,37033628,2017-01-03 11:40:00,2017-07-31 14:00:00,61,171,54,79


In [13]:
null_counts = patient_data.isnull().sum()
print(null_counts)

case_id_x               0
lab_value               0
lab_value_label_id      0
vital_value             0
record_time_x           0
vital_value_label_id    0
patient_id              0
case_id                 0
admission_time          0
discharge_time          0
patient_age             0
patient_height          0
patient_weight          0
sample_type_id          0
dtype: int64


In [88]:
#patient_data['ConvertedDate']=patient_data['admission_time'].astype(str)
#patient_data['ConvertedDate']

In [89]:
# Filter the dataset for a single patient_id and single case_id

patient_id = 'patient_id'
case_id_x = 'case_id'
filtered_df = patient_data[(patient_data['patient_id'] == patient_id) & (patient_data['case_id_x'] == case_id)]


In [90]:
# Sort the dataset by admission_time
filtered_df = filtered_df.sort_values('admission_time')


In [108]:

selected_columns = patient_data[['lab_value', 'vital_value']].values
#filtered_df = filtered_df[selected_columns]
selected_columns 

array([[ 25.,  25.],
       [ 25.,  25.],
       [ 25.,  25.],
       ...,
       [115., 115.],
       [115., 115.],
       [115., 115.]])

In [111]:
# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
normalized_values = scaler.fit_transform(selected_columns)

#normalized_data = scaler.fit_transform(filtered_df)

In [112]:
# Function to create sequences of data
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)


In [104]:
# Define the sequence length (e.g., 20 hours)
sequence_length = 20


In [114]:
# Create sequences
X, y = create_sequences(normalized_values, sequence_length)

In [115]:
# Split the data into training and testing sets
split_ratio = 0.8  # 80% for training, 20% for testing
split_index = int(split_ratio * len(X))

In [116]:
X_train = X[:split_index]
y_train = y[:split_index]
X_test = X[split_index:]
y_test = y[split_index:]

In [117]:
# Reshape the input data for LSTM
X_train = X_train.reshape((X_train.shape[0], sequence_length, X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], sequence_length, X_test.shape[2]))

In [118]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(2))  # 2 output features: lab_value and vital_value


In [119]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')


In [120]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2125a9cc580>

In [122]:
# Prepare the input data for prediction
last_20_hours_data = normalized_values[-sequence_length:]
input_data = last_20_hours_data.reshape((1, sequence_length, last_20_hours_data.shape[1]))

# Perform prediction


In [123]:
# Perform prediction
predicted_data = model.predict(input_data)





In [124]:
# Inverse transform the predicted data to get the actual lab_value and vital_value
predicted_data = scaler.inverse_transform(predicted_data)
predicted_data

array([[114.9788 , 114.98061]], dtype=float32)