In [1]:
# ✅ Import Required Libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# ✅ Enable Mixed Precision for Faster Computation
tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [4]:
# ✅ Define Paths
reference_file = "/content/drive/MyDrive/Jan 4/Reference_Output_Until_Jan02_2025 (4).csv"
predictor_folder = "/content/drive/MyDrive/Jan 4/5000_X_12_Until_Jan02_2025"

In [5]:
reference_df = pd.read_csv(reference_file)  # Columns: Reading_File_Name, A, B, reading_id, gender, etc.

In [6]:
# ✅ Extract Target (Omron Systolic)
y = reference_df[['Omron Systolic']].to_numpy(dtype=np.float32)

In [7]:
# ✅ Normalize Target Values
from sklearn.preprocessing import StandardScaler
target_scaler = StandardScaler()
y_scaled = target_scaler.fit_transform(y)

In [8]:
# Save target scaler
joblib.dump(target_scaler, "/content/drive/MyDrive/pkl files/target_scaler19.pkl")


['/content/drive/MyDrive/pkl files/target_scaler19.pkl']

In [9]:
# ✅ Load and Process Predictor Files in a Single Pass
aggregated_data = []
predictor_files = sorted([f for f in os.listdir(predictor_folder) if f.endswith(".csv")])



In [10]:
for file in predictor_files:
    df = pd.read_csv(os.path.join(predictor_folder, file), dtype=np.float32)

    # Handle missing values
    df.fillna(df.median(numeric_only=True), inplace=True)

    aggregated_data.append(df.to_numpy(dtype=np.float32))



In [11]:
# ✅ Convert to NumPy Array
X = np.array(aggregated_data, dtype=np.float32)


In [12]:
# ✅ Ensure Proper Shape for GRU (samples, timesteps, features)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2])


In [13]:
X_2D = X.reshape(-1,X.shape[2])

In [14]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_2D)


In [15]:
X_3D = X_scaled.reshape(X.shape[0], X.shape[1], X.shape[2])

In [16]:
# ✅ Split Data (20% Test, 80% Train)
X_3Dtrain, X_3Dtest, y_train, y_test = train_test_split(X_3D, y_scaled, test_size=0.2, random_state=42)




In [17]:
X_3Dtest.shape

(108, 5000, 12)

In [18]:
y_train

array([[ 0.26008847],
       [ 0.95997643],
       [ 0.26008847],
       [ 0.01012849],
       [-0.1898395 ],
       [ 0.61003244],
       [ 0.61003244],
       [-0.58977544],
       [ 1.7098564 ],
       [-1.6895994 ],
       [ 0.66002446],
       [-0.83973545],
       [-1.2396713 ],
       [-0.9897114 ],
       [-0.8897274 ],
       [-0.68975943],
       [-1.3396554 ],
       [ 0.41006446],
       [-0.23983148],
       [-1.0397034 ],
       [-0.8897274 ],
       [ 0.11011248],
       [ 1.3599124 ],
       [-1.4396393 ],
       [ 0.36007246],
       [ 2.3597524 ],
       [ 0.51004845],
       [-0.38980746],
       [-0.58977544],
       [-0.73975146],
       [ 1.5598804 ],
       [-1.0896955 ],
       [ 0.26008847],
       [ 0.11011248],
       [-0.33981547],
       [-0.33981547],
       [-0.48979145],
       [ 0.85999244],
       [ 0.46005645],
       [ 0.26008847],
       [-0.03986351],
       [-1.0896955 ],
       [ 0.8100004 ],
       [-0.0898555 ],
       [-0.38980746],
       [ 0

In [19]:
# ✅ Define a Lightweight GRU Model for Faster Training
model = Sequential([
    Input(shape=(X.shape[1], X.shape[2])),
    GRU(128, return_sequences=True, activation='tanh'),
    Dropout(0.2),
    BatchNormalization(),
    GRU(64, return_sequences=False, activation='tanh'),
    Dropout(0.2),
    BatchNormalization(),
    Dense(1, dtype=tf.float32)  # Output layer (1 target)
])



In [20]:
# ✅ Compile Model with Adam Optimizer
optimizer = Adam(learning_rate=0.0005, clipvalue=0.5)
model.compile(optimizer=optimizer, loss= 'mae', metrics=['mse', 'mae'])


In [21]:
from tensorflow.keras.utils import plot_model

In [22]:
history = model.fit(
    X_3Dtrain, y_train,
    epochs=100,
    batch_size=128,  # Larger batch for speed
    validation_data=(X_3Dtest, y_test),
    verbose=1,

)



Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 777ms/step - loss: 1.8221 - mae: 1.8221 - mse: 4.9841 - val_loss: 0.6162 - val_mae: 0.6162 - val_mse: 0.6344
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 367ms/step - loss: 1.1131 - mae: 1.1131 - mse: 2.1511 - val_loss: 0.6346 - val_mae: 0.6346 - val_mse: 0.6655
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 358ms/step - loss: 1.0462 - mae: 1.0462 - mse: 1.9027 - val_loss: 0.6339 - val_mae: 0.6339 - val_mse: 0.6655
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 359ms/step - loss: 1.0513 - mae: 1.0513 - mse: 1.8762 - val_loss: 0.6225 - val_mae: 0.6225 - val_mse: 0.6489
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 502ms/step - loss: 0.9568 - mae: 0.9568 - mse: 1.6349 - val_loss: 0.6187 - val_mae: 0.6187 - val_mse: 0.6425
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 360ms/step

In [23]:
# ✅ Predict on Test Data
y_pred = model.predict(X_3Dtest)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 194ms/step


In [24]:
# ✅ Inverse Transform Predictions
y_pred_original = target_scaler.inverse_transform(y_pred)
y_test_original = target_scaler.inverse_transform(y_test)


In [25]:
# ✅ Compute MSE and MAE (Original Scale)
final_mse = mean_squared_error(y_test_original, y_pred_original)
final_mae = mean_absolute_error(y_test_original, y_pred_original)


In [26]:
print(f"✅ Final MSE: {final_mse:.4f}")
print(f"✅ Final MAE: {final_mae:.4f}")


✅ Final MSE: 313.6974
✅ Final MAE: 13.7143


In [27]:
y_pred_original

array([[118.70688 ],
       [132.41824 ],
       [118.04439 ],
       [129.7855  ],
       [124.89556 ],
       [128.50716 ],
       [121.20411 ],
       [129.48796 ],
       [126.94549 ],
       [136.07481 ],
       [123.81502 ],
       [108.40961 ],
       [120.167046],
       [102.686966],
       [109.80446 ],
       [124.287575],
       [114.06066 ],
       [111.99482 ],
       [140.04596 ],
       [115.24819 ],
       [120.37448 ],
       [131.73383 ],
       [124.4822  ],
       [131.62526 ],
       [135.38388 ],
       [127.25468 ],
       [120.987045],
       [123.450226],
       [132.65494 ],
       [121.55973 ],
       [109.81707 ],
       [119.438065],
       [134.11374 ],
       [116.38183 ],
       [103.055275],
       [121.3205  ],
       [125.78784 ],
       [102.31219 ],
       [160.1311  ],
       [114.64172 ],
       [120.90366 ],
       [126.58481 ],
       [141.99794 ],
       [121.81184 ],
       [117.98112 ],
       [122.389015],
       [119.86964 ],
       [143.2

In [28]:
y_test_original

array([[141.],
       [124.],
       [137.],
       [115.],
       [113.],
       [111.],
       [135.],
       [151.],
       [115.],
       [129.],
       [112.],
       [179.],
       [118.],
       [113.],
       [140.],
       [119.],
       [122.],
       [ 96.],
       [105.],
       [127.],
       [147.],
       [125.],
       [126.],
       [126.],
       [111.],
       [122.],
       [132.],
       [118.],
       [122.],
       [125.],
       [112.],
       [115.],
       [142.],
       [132.],
       [129.],
       [117.],
       [106.],
       [122.],
       [159.],
       [124.],
       [118.],
       [124.],
       [143.],
       [119.],
       [140.],
       [134.],
       [101.],
       [147.],
       [116.],
       [125.],
       [114.],
       [118.],
       [115.],
       [126.],
       [135.],
       [112.],
       [124.],
       [125.],
       [130.],
       [147.],
       [142.],
       [147.],
       [105.],
       [ 95.],
       [ 96.],
       [112.],
       [10

In [37]:
correlation = np.corrcoef(y_test_original[:, 0], y_pred_original[:, 0])[0, 1]  # Assuming Omron Systolic is in the first column


In [38]:
Correlation = correlation*100

In [39]:
print(f"Correlation between y_test_original and y_pred_original: {Correlation}")

Correlation between y_test_original and y_pred_original: 11.315882688408335


In [None]:
# ✅ Graphical Representation
plt.figure(figsize=(12, 5))

# Plot Omron Diastolic
plt.subplot(1, 2, 1)
plt.plot(y_test_original[:, 0], label="Actual Diastolic", marker='o')
plt.plot(y_pred_original[:, 0], label="Predicted Diastolic", linestyle='dashed', marker='s')
plt.xlabel("Sample Index")
plt.ylabel("Omron Systolic Value")
plt.legend()
plt.title("Omron Systolic Prediction")
