In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('./deeplearning.mplstyle')
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

RANDOM_STATE = 42

In [4]:
# Load the dataset using pandas
df = pd.read_csv("data.csv")

print(df.head())
df = df.drop('I', axis=1)

# 1. Diferença de Pressões
df['P_diff'] = df['P_sist'] - df['P_dist']

# 2. Razão de Pressão para Frequência Cardíaca
df['Pressure_per_Pulse'] = df['P_sist'] / df['Pulse']

# 3. Índice Respiratório
df['Resp_Index'] = df['qPA'] / df['BreathFreq']

## Removing our target variable

selected_features = ["qPA", "Pulse", "BreathFreq" ,"P_diff", "Pressure_per_Pulse", "Resp_Index"]
X = df[selected_features].values
y_gravity = df["Gravity"].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


print(f"Temperature Max, Min post normalization: {np.max(X_scaled[0]):0.2f}, {np.min(X_scaled[0]):0.2f}")

   I     P_sist     P_dist       qPA       Pulse  BreathFreq    Gravity  Class
0  1  13.592433  12.220855  8.416754   75.921057   21.635259  40.000000      2
1  2  15.775386  13.586879  8.725890   63.813564   19.718734  41.530427      2
2  3   3.649369   1.904802  0.000000  197.210213   19.045471  52.730745      3
3  4  17.264362  13.700638  8.733333  143.636181   17.621141  34.679911      2
4  5  12.705183   9.485389  1.747626   82.636672   12.209535  69.375882      3
Temperature Max, Min post normalization: 1.72, -1.23


In [5]:
cat_variables = ['Class']

# This will replace the columns with the one-hot encoded ones and keep the columns outside 'columns' argument as it is.
df = pd.get_dummies(data = df,
                         prefix = "class",
                         columns = cat_variables)

print(df.head())

      P_sist     P_dist       qPA       Pulse  BreathFreq    Gravity  \
0  13.592433  12.220855  8.416754   75.921057   21.635259  40.000000   
1  15.775386  13.586879  8.725890   63.813564   19.718734  41.530427   
2   3.649369   1.904802  0.000000  197.210213   19.045471  52.730745   
3  17.264362  13.700638  8.733333  143.636181   17.621141  34.679911   
4  12.705183   9.485389  1.747626   82.636672   12.209535  69.375882   

     P_diff  Pressure_per_Pulse  Resp_Index  class_1  class_2  class_3  \
0  1.371578            0.179034    0.389030    False     True    False   
1  2.188507            0.247211    0.442518    False     True    False   
2  1.744567            0.018505    0.000000    False    False     True   
3  3.563724            0.120195    0.495617    False     True    False   
4  3.219794            0.153748    0.143136    False    False     True   

   class_4  
0    False  
1    False  
2    False  
3    False  
4    False  


In [6]:
# Define X (features), y_class e y_gravity
y_class = df[["class_1", "class_2", "class_3", "class_4"]].values

# Divide em treino e teste
X_train, X_test, y_train, y_test, y_class_train, y_class_test = train_test_split(X_scaled, y_gravity, y_class, train_size = 0.7, random_state = RANDOM_STATE)

print(f'train samples: {len(X_train)}\ntest samples: {len(X_test)}')

train samples: 1050
test samples: 450


In [7]:
print(X_train.shape, y_class_train.shape)
print(X.shape[1])

(1050, 6) (1050, 4)
6


usando K-fold

In [16]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
cv_scores = []

for train_idx, val_idx in kf.split(X_scaled):
    X_train_cv, X_val_cv = X_scaled[train_idx], X_scaled[val_idx]
    y_train_cv, y_val_cv = y_gravity[train_idx], y_gravity[val_idx]
    
    # Build a new model for each fold
    model_cv = tf.keras.Sequential([
        tf.keras.Input(X_train_cv.shape[1]),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(8, activation='relu'),
        Dense(1, activation='linear')
    ])
    model_cv.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    model_cv.fit(X_train_cv, y_train_cv, epochs=800, verbose=0)
    
    # Evaluate on validation fold
    loss, mae = model_cv.evaluate(X_val_cv, y_val_cv, verbose=0)
    cv_scores.append(mae)

print(f"Mean MAE across folds: {np.mean(cv_scores):.4f}")

Mean MAE across folds: 2.0863


Usando K-fold

In [23]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
clf_cv_scores = []

for train_idx, val_idx in kf.split(X_scaled):
    X_train_cv, X_val_cv = X_scaled[train_idx], X_scaled[val_idx]
    y_train_cv, y_val_cv = y_gravity[train_idx], y_gravity[val_idx]
    y_class_train_cv, y_class_val_cv = y_class[train_idx], y_class[val_idx]

    # 1. Train regression model
    model_cv = tf.keras.Sequential([
        tf.keras.Input(X_train_cv.shape[1]),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(8, activation='relu'),
        Dense(1, activation='linear')
    ])
    model_cv.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    model_cv.fit(X_train_cv, y_train_cv, epochs=800, verbose=0)

    # 2. Predict gravity for train and validation sets
    y_train_pred_cv = model_cv.predict(X_train_cv)
    y_val_pred_cv = model_cv.predict(X_val_cv)

    # 3. Train classifier on regression predictions (train set)
    clf_model_cv = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(1,)),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(4, activation='softmax')
    ])
    clf_model_cv.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    clf_model_cv.fit(y_train_pred_cv, y_class_train_cv, epochs=800, verbose=0)

    # 4. Evaluate classifier on validation set
    val_acc = clf_model_cv.evaluate(y_val_pred_cv, y_class_val_cv, verbose=0)[1]
    print(f"Fold classifier accuracy: {val_acc*100:.2f}%")
    clf_cv_scores.append(val_acc)

print(f"Mean classifier accuracy across folds: {np.mean(clf_cv_scores)*100:.2f}%")

Fold classifier accuracy: 88.67%
Fold classifier accuracy: 91.00%
Fold classifier accuracy: 92.00%
Fold classifier accuracy: 93.67%
Fold classifier accuracy: 90.00%
Mean classifier accuracy across folds: 91.07%


In [17]:
def regression_accuracy(model, X_test, y_test, tolerance=5.6): #tolerance of 7.5%
    """
    Calculates the percentage of predictions within a tolerance of the true value.
    Args:
        model: Trained Keras model
        X_test: Test features
        y_test: True values
        tolerance: Acceptable error (absolute difference)
    Returns:
        accuracy: Percentage of predictions within tolerance
    """
    y_pred = model.predict(X_test).flatten()
    correct = np.abs(y_pred - y_test) <= tolerance
    accuracy = np.mean(correct)
    print(f"Regression accuracy (within ±{tolerance}): {accuracy*100:.2f}%")
    return accuracy

# Example usage:
print("Train fold accuracy:")
regression_accuracy(model_cv, X_train_cv, y_train_cv)
print("Validation fold accuracy:")
regression_accuracy(model_cv, X_val_cv, y_val_cv)

Train fold accuracy:
Regression accuracy (within ±5.6): 98.25%
Validation fold accuracy:
Regression accuracy (within ±5.6): 97.67%


0.9766666666666667

In [24]:
def evaluate_classifier_accuracy(clf_model, X, Y):
    """
    Evaluates the classifier model accuracy.
    Args:
        clf_model: Trained classifier model
        X: array-like, regression outputs for test set (shape: [n_samples, 1])
        Y: array-like, one-hot encoded true class labels (shape: [n_samples, n_classes])
    Returns:
        accuracy: float, classification accuracy
    """
    print(f"X shape: {X.shape}, Y shape: {Y.shape}")
    # Predict class probabilities
    y_pred_probs = clf_model.predict(X)
    # Get predicted class indices
    y_pred_classes = np.argmax(y_pred_probs, axis=1)
    # Get true class indices
    y_true_classes = np.argmax(Y, axis=1)
    # Calculate accuracy
    accuracy = np.mean(y_pred_classes == y_true_classes)
    print(f"Classifier accuracy: {accuracy*100:.2f}%")
    return accuracy


#print(y_test.shape, y_class_test.shape, y_pred.shape)
y_test_reshaped = y_test.reshape(-1, 1)
#print(y_test_reshaped.shape)  # Should print (450, 1)
evaluate_classifier_accuracy(clf_model_cv, y_test_reshaped, y_class_test)



y_pred_cv = model_cv.predict(X_test)  # shape: (num_samples, 1)
evaluate_classifier_accuracy(clf_model_cv, y_pred_cv, y_class_test)



X shape: (450, 1), Y shape: (450, 4)
Classifier accuracy: 97.11%
X shape: (450, 1), Y shape: (450, 4)
Classifier accuracy: 91.33%


0.9133333333333333

96.67, 92 #datqset
95.78 91.56 #dataset with new features
95.11 90.89 #dataset
95.11 92.44 #dataset with new features
98.67 91.78 

In [26]:
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
import numpy as np

# --- RMSE para o modelo de regressão (model_cv) ---
y_pred_reg = model_cv.predict(X_test).flatten()
mse = mean_squared_error(y_test, y_pred_reg)
rmse = np.sqrt(mse)
print(f"RMSE (regression model, test set): {rmse:.4f}")

# --- Métricas de classificação para o classificador (clf_model_cv) ---
y_pred_probs = clf_model_cv.predict(y_pred_reg.reshape(-1, 1))
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_class_test, axis=1)

acc = accuracy_score(y_true_classes, y_pred_classes)
precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
recall = recall_score(y_true_classes, y_pred_classes, average='weighted')
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

print(f"Classificação (test set):")
print(f"  Acurácia: {acc:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall: {recall:.4f}")
print(f"  F1-score: {f1:.4f}")

print("Matriz de confusão (test set):")
print(confusion_matrix(y_true_classes, y_pred_classes))

RMSE (regression model, test set): 2.0916
Classificação (test set):
  Acurácia: 0.9133
  Precision: 0.9186
  Recall: 0.9133
  F1-score: 0.9135
Matriz de confusão (test set):
[[ 59   7   0   0]
 [  7 231  19   0]
 [  0   3 115   0]
 [  0   0   3   6]]
