In [None]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft, fftfreq
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import joblib  # for loading scaler
from tensorflow.keras.models import load_model

# Load the CSV file with the correct separator and decimal
df = pd.read_csv("dataset/PMCD/PMPDB/raw-data/P01_1/P01_1.csv", sep=";", decimal=",")  # shape: (timepoints, 6)

# Ensure we only select the columns related to the six sensors
sensor_names = ["Bvp", "Eda_E4", "Resp", "Eda_RB", "Ecg", "Emg"]

# Keep only the relevant columns
df = df[sensor_names]

# If ECG is missing (i.e., it's NaN), fill it with zeros
if 'Ecg' not in df.columns or df['Ecg'].isnull().all():
    df['Ecg'] = 0  # Fill all missing ECG values with zeros

# Ensure it's a numpy array
data = df.to_numpy()  # shape: (timepoints, 6)

# Reshape to match your model input: (1 sample, 6 sensors, timepoints)
data = data.T[np.newaxis, ...]  # shape: (1, 6, timepoints)

# Now you can proceed with the rest of your prediction pipeline


KeyError: "['Ecg'] not in index"

In [7]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft, fftfreq
from sklearn.preprocessing import StandardScaler
import joblib
from tensorflow.keras.models import load_model

# === Load your CSV file ===
df = pd.read_csv("dataset/PMCD/PMPDB/raw-data/P01_1/P01_1.csv", sep=";", decimal=",")

# === Ensure only 6 known sensor columns ===
sensor_names = ["Bvp", "Eda_E4", "Resp", "Eda_RB", "Ecg", "Emg"]
df = df[[col for col in sensor_names if col in df.columns]]

# Fill missing columns (e.g., ECG) with zeros
for sensor in sensor_names:
    if sensor not in df.columns:
        df[sensor] = 0

# === Convert to array ===
data = df[sensor_names].to_numpy().T[np.newaxis, ...]  # shape: (1, 6, timepoints)

# === Feature extraction ===
def extract_time_features(sample):
    features = []
    for channel in sample.T:
        features.extend([
            np.mean(channel), np.std(channel), np.var(channel),
            np.min(channel), np.max(channel), np.ptp(channel),
            np.percentile(channel, 25), np.percentile(channel, 50), np.percentile(channel, 75),
            skew(channel, nan_policy="omit"), kurtosis(channel, nan_policy="omit"),
            entropy(np.histogram(channel, bins=10)[0] + 1),
            np.sum(channel), np.sqrt(np.mean(channel ** 2)),
            np.mean(np.abs(channel)), np.mean(np.diff(channel)),
            np.std(np.diff(channel)), np.min(np.diff(channel)),
            np.max(np.diff(channel)), np.mean(np.abs(np.diff(channel)))
        ])
    return np.array(features)

def extract_freq_features(sample, sample_rate=1.0):
    features = []
    for channel in sample.T:
        N = len(channel)
        freqs = fftfreq(N, 1 / sample_rate)
        fft_vals = np.abs(fft(channel))
        features.extend([
            np.sum(fft_vals), np.mean(fft_vals), np.std(fft_vals),
            np.max(fft_vals), freqs[np.argmax(fft_vals)],
            np.sum(freqs * fft_vals) / np.sum(fft_vals),
            np.sum((freqs ** 2) * fft_vals) / np.sum(fft_vals),
        ])
    return np.array(features)

# Apply extraction
time_feat = extract_time_features(data[0])  # shape: (120,)
freq_feat = extract_freq_features(data[0])  # shape: (42,)
combined_feat = np.hstack((time_feat, freq_feat)).reshape(1, -1)  # shape: (1, 162)

# === Load feature selector and scaler ===
selector = joblib.load("feature_selector.pkl")  # expects 162 input features
scaler = joblib.load("scaler.pkl")

# === Transform features ===
selected_feat = selector.transform(combined_feat)
normalized_feat = scaler.transform(selected_feat)

# === Load model and predict ===
model = load_model("pain_model.h5")
pred = model.predict(normalized_feat)
pain_level = np.argmax(pred)

print(f"Predicted Pain Level: {pain_level}")


ValueError: X has 1186326 features, but SelectKBest is expecting 162 features as input.

In [4]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft, fftfreq
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import joblib  # for loading scaler
from tensorflow.keras.models import load_model

# Load the CSV file with the correct separator and decimal
df = pd.read_csv("dataset/PMCD/PMPDB/raw-data/P01_1/P01_1.csv", sep=";", decimal=",")  # shape: (timepoints, 6)

# Print the column names to check what they are
print(f"Columns in the data: {df.columns.tolist()}")

# Ensure we only select the columns related to the six sensors
sensor_names = ["Bvp", "Eda_E4", "Resp", "Eda_RB", "Ecg", "Emg"]

# Check for missing columns and add them as needed
for sensor in sensor_names:
    if sensor not in df.columns:
        print(f"Warning: '{sensor}' column is missing from the data.")
        df[sensor] = 0  # Fill missing sensor data with zeros

# Now, keep only the relevant columns
df = df[sensor_names]

# If ECG is missing (i.e., it's NaN), fill it with zeros
if 'Ecg' not in df.columns or df['Ecg'].isnull().all():
    df['Ecg'] = 0  # Fill all missing ECG values with zeros

# Ensure it's a numpy array
data = df.to_numpy()  # shape: (timepoints, 6)

# Reshape to match your model input: (1 sample, 6 sensors, timepoints)
data = data.T[np.newaxis, ...]  # shape: (1, 6, timepoints)

# Now you can proceed with the rest of your prediction pipeline


Columns in the data: ['Seconds', 'Bvp', 'Eda_E4', 'Tmp', 'Resp', 'Eda_RB', 'Bvp_RB', 'Emg', 'Grip', 'Pain rates', 'Pain labels']


In [None]:
def extract_time_features(sample):
    features = []
    for channel in sample.T:
        features.extend([
            np.mean(channel), np.std(channel), np.var(channel),
            np.min(channel), np.max(channel), np.ptp(channel),
            np.percentile(channel, 25), np.percentile(channel, 50), np.percentile(channel, 75),
            skew(channel, nan_policy="omit"), kurtosis(channel, nan_policy="omit"),
            entropy(np.histogram(channel, bins=10)[0] + 1),
            np.sum(channel), np.sqrt(np.mean(channel ** 2)),
            np.mean(np.abs(channel)), np.mean(np.diff(channel)),
            np.std(np.diff(channel)), np.min(np.diff(channel)),
            np.max(np.diff(channel)), np.mean(np.abs(np.diff(channel)))
        ])
    return np.array(features)

def extract_freq_features(sample, sample_rate=1.0):
    features = []
    for channel in sample.T:
        N = len(channel)
        freqs = fftfreq(N, 1 / sample_rate)
        fft_vals = np.abs(fft(channel))
        features.extend([
            np.sum(fft_vals), np.mean(fft_vals), np.std(fft_vals),
            np.max(fft_vals), freqs[np.argmax(fft_vals)],
            np.sum(freqs * fft_vals) / np.sum(fft_vals),
            np.sum((freqs ** 2) * fft_vals) / np.sum(fft_vals),
        ])
    return np.array(features)

# Extract features
time_feat = extract_time_features(data[0])  # shape: (6×20,)
freq_feat = extract_freq_features(data[0])  # shape: (6×7,)

combined_feat = np.hstack((time_feat, freq_feat)).reshape(1, -1)  # shape: (1, total_features)





In [6]:
# Handle missing values (same as training)
imputer = SimpleImputer(strategy="mean")
combined_feat = imputer.fit_transform(combined_feat)

# Load the selector you used for training (if saved), or re-apply the selection logic
selector = joblib.load("feature_selector.pkl")  # Assuming you saved it
selected_feat = selector.transform(combined_feat)  # shape: (1, 100)


ValueError: X has 1186326 features, but SelectKBest is expecting 162 features as input.

In [None]:
scaler = joblib.load("scaler.pkl")
normalized_feat = scaler.transform(selected_feat)


In [None]:
model = load_model("pain_model.h5")

# Predict
prediction = model.predict(normalized_feat)
predicted_class = np.argmax(prediction)
print(f"Predicted Pain Class: {predicted_class}")


In [8]:
import pandas as pd
import numpy as np

# Read the CSV correctly
df = pd.read_csv("dataset/PMCD/PMPDB/raw-data/P01_1/P01_1.csv", sep=";", decimal=",")

# Keep only these 6 sensors
sensor_names = ["Bvp", "Eda_E4", "Resp", "Eda_RB", "Ecg", "Emg"]
df = df[[col for col in sensor_names if col in df.columns]]

# If any sensor is missing, add it and fill with zeros
for sensor in sensor_names:
    if sensor not in df.columns:
        df[sensor] = 0.0

# Ensure order is correct
df = df[sensor_names]

# Convert to numpy format and reshape
data = df.to_numpy().T[np.newaxis, ...]  # shape: (1, 6, timepoints)


In [12]:
from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft, fftfreq

def extract_time_features(sample):
    features = []
    for channel in sample.T:
        features.extend([
            np.mean(channel), np.std(channel), np.var(channel),
            np.min(channel), np.max(channel), np.ptp(channel),
            np.percentile(channel, 25), np.percentile(channel, 50), np.percentile(channel, 75),
            skew(channel, nan_policy="omit"), kurtosis(channel, nan_policy="omit"),
            entropy(np.histogram(channel, bins=10)[0] + 1),
            np.sum(channel), np.sqrt(np.mean(channel ** 2)),
            np.mean(np.abs(channel)), np.mean(np.diff(channel)),
            np.std(np.diff(channel)), np.min(np.diff(channel)),
            np.max(np.diff(channel)), np.mean(np.abs(np.diff(channel)))
        ])
    return np.array(features)

def extract_freq_features(sample, sample_rate=1.0):
    features = []
    for channel in sample.T:
        N = len(channel)
        freqs = fftfreq(N, 1 / sample_rate)
        fft_vals = np.abs(fft(channel))
        features.extend([
            np.sum(fft_vals), np.mean(fft_vals), np.std(fft_vals),
            np.max(fft_vals), freqs[np.argmax(fft_vals)],
            np.sum(freqs * fft_vals) / np.sum(fft_vals),
            np.sum((freqs ** 2) * fft_vals) / np.sum(fft_vals),
        ])
    return np.array(features)

# Extract features
time_feat = extract_time_features(data[0])
freq_feat = extract_freq_features(data[0])
combined_feat = np.hstack((time_feat, freq_feat)).reshape(1, -1)
print(f"X_train shape: {combined_feat.shape}")


X_train shape: (1, 1186326)


In [13]:
import joblib

# Load selector and scaler
selector = joblib.load("feature_selector.pkl")
scaler = joblib.load("scaler.pkl")

# Apply same transformations
selected_feat = selector.transform(combined_feat)
normalized_feat = scaler.transform(selected_feat)


ValueError: X has 1186326 features, but SelectKBest is expecting 162 features as input.

In [None]:
from tensorflow.keras.models import load_model

# Load model
model = load_model("pain_model.h5")

# Predict
prediction = model.predict(normalized_feat)
predicted_class = np.argmax(prediction, axis=1)[0]
print("Predicted Pain Level:", predicted_class)
