In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import pyarrow.feather as feather
from sklearn.metrics import mean_squared_error
from tensorflow import keras

In [None]:
edf_path="/content/drive/MyDrive/temp/signal_sync_EDF ("

In [None]:
edf = pd.DataFrame()

for i in range (1,23):
  path = edf_path + str(i) + ').feather'
  edf_temp = feather.read_table(path).to_pandas()
  edf_temp['Patient'] = i
  edf = pd.concat([edf, edf_temp.iloc[::100,:]])

In [None]:
edf = edf[edf['SpO2']>20]

In [None]:
patients = edf['Patient'].unique()

In [None]:
train_patients, test_patients = train_test_split(patients, test_size=3, random_state=42)

In [None]:
def create_dataset(patient_data, look_back=1):
    dataX, dataY = [], []
    for i in range(len(patient_data)-look_back-1):
        dataX.append(patient_data[i:(i+look_back)])
        dataY.append(patient_data[i+look_back])
    return np.array(dataX), np.array(dataY)


In [None]:
look_back = 100
scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
train_data = []
for p in train_patients:
    scaled_data = scaler.fit_transform(edf[edf['Patient'] == p]['SpO2'].values.reshape(-1, 1))
    train_data.extend(scaled_data)

train_data = np.array(train_data).flatten()
trainX, trainY = create_dataset(train_data, look_back)

In [None]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))

In [None]:
keras.utils.set_random_seed(42)

In [None]:
model = Sequential()
model.add(LSTM(64, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.fit(trainX, trainY, epochs=30, batch_size=2048, verbose=2, validation_split=0.2)

In [None]:
import pickle

filename = 'lstm_model_ML_0909BIGGER2.pkl'
with open(filename, 'wb') as file:
    pickle.dump(model, file)

In [None]:
for p in test_patients:
    test_data = edf[edf['Patient'] == p]['SpO2'].values.astype('float32')
    scaled_test_data = scaler.fit_transform(test_data.reshape(-1, 1))
    testX, _ = create_dataset(scaled_test_data, look_back)

    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    threshold = int(0.75 * len(testX))
    real_values = scaler.inverse_transform(scaled_test_data)

    predictions = []
    print(len(testX)-threshold)
    for i in range(threshold, len(testX), 100):
        last_observations = testX[i:i+1]
        chunk_predictions = []
        for j in range(100):
            if i + j >= len(testX):
                break
            pred = model.predict(last_observations, verbose=None)
            pred_inv = scaler.inverse_transform(pred)
            chunk_predictions.append(pred_inv[0][0])

            last_observations = np.roll(last_observations, -1)
            last_observations[0, 0, -1] = pred[0, 0]

        predictions.extend(chunk_predictions)

    plt.figure(figsize=(100, 60))
    plt.plot(real_values, label='Real Values', color='grey', alpha=0.7)
    plt.plot(np.arange(threshold + look_back, threshold + look_back + len(predictions)), predictions, label='Predictions', color='lime', alpha=0.7)

    plt.title(f'Patient {p}')
    plt.legend()

    plt.show()
    mse = mean_squared_error(real_values[threshold+look_back+1:], predictions)
    print(f'MSE: {mse}')

In [None]:
for p in test_patients:
    test_data = edf[edf['Patient'] == p]['SpO2'].values.astype('float32')
    scaled_test_data = scaler.fit_transform(test_data.reshape(-1, 1))
    testX, _ = create_dataset(scaled_test_data, look_back)

    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    threshold = int(0.8 * len(testX))
    real_values = scaler.inverse_transform(scaled_test_data)

    predictions = []
    lstm_input = testX[:threshold]
    last_observations = lstm_input[-1:]

    for i in range(threshold, len(testX)):
        pred = model.predict(last_observations, verbose=None)
        pred_inv = scaler.inverse_transform(pred)
        predictions.append(pred_inv[0][0])

        last_observations = np.roll(last_observations, -1)
        last_observations[0, 0, -1] = pred

    # Plotting
    plt.figure()
    plt.plot(real_values, label='Real Values')
    plt.plot(np.arange(threshold + look_back, len(testX) + look_back), predictions, label='Predictions')
    plt.title(f'Patient {p}')
    plt.legend()
    plt.show()
