In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import dataset
df = pd.read_csv('./dataset/230322_OlderPredictTc_data_thermal.csv')

# Only use previous values from same individual
# df['previous_tre_int'] = df.groupby('id_all')['tre_int'].shift(1)
# df['previous_mtsk_int'] = df.groupby('id_all')['mtsk_int'].shift(1)

# Select only time > 0
df = df[df.time > 0]

# Unique ID to identify an individual
df['unique_id'] = df['study'].astype(str) + '_' + df['condition'].astype(str) + '_' + df['id_all'].astype(str)

# Select only features and output
features = ['female', 'age', 'height', 'mass', 'ta_set', 'rh_set']
output = ['tre_int', 'mtsk_int']
df = df[features + output + ['id_all', 'unique_id', 'study', 'condition', 'time']]

# Create train_df based on participants assigned to training set
train_ids = [46, 34, 68, 30, 40, 98, 89, 65, 24, 58, 85, 67, 28, 39, 35, 77, 26,
             80, 70, 37, 52, 56, 74, 78, 71, 60, 86, 43, 91, 82, 22, 59, 21, 87,
             95, 66, 44, 25, 76, 94, 53, 32, 73, 23, 49]
train_df = df[df['id_all'].isin(train_ids)]

# Check data
train_df.isnull().sum()

female       0
age          0
height       0
mass         0
ta_set       0
rh_set       0
tre_int      0
mtsk_int     0
id_all       0
unique_id    0
study        0
condition    0
time         0
dtype: int64

In [49]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import TimeDistributed

# Reset index
train_df.reset_index(inplace=True)

# Scalars
features_scaler = MinMaxScaler(feature_range=(0,1))
output_scaler = MinMaxScaler(feature_range=(0,1))
# Fit scalers
X_scaled = features_scaler.fit_transform(train_df[features])
y_scaled = output_scaler.fit_transform(train_df[output])

# For each feature row, we need to map it so that
# Create sequences based on unique_id
unique_ids = train_df['unique_id'].unique()
X_seq, y_seq = [], []
for uid in unique_ids:
    seq_data = train_df[train_df['unique_id'] == uid]
    X_seq.append(X_scaled[seq_data.index])
    y_seq.append(y_scaled[seq_data.index])

# Pad sequences to have the same length
max_len = max(len(seq) for seq in X_seq)
X_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in X_seq])
y_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in y_seq])

# Define the RNN model
model = Sequential()
model.add(LSTM(64, input_shape=(None, X_padded.shape[-1]), return_sequences=True))
model.add(TimeDistributed(Dense(y_padded.shape[-1])))
model.compile(loss='mse', optimizer='adam')

# Train the model
model.fit(X_padded, y_padded, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/50


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - loss: 0.4806 - val_loss: 0.2586
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 204ms/step - loss: 0.2190 - val_loss: 0.0747
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 214ms/step - loss: 0.0695 - val_loss: 0.0230
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step - loss: 0.0422 - val_loss: 0.0491
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 206ms/step - loss: 0.0611 - val_loss: 0.0378
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 209ms/step - loss: 0.0473 - val_loss: 0.0199
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 203ms/step - loss: 0.0259 - val_loss: 0.0193
Epoch 8/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 209ms/step - loss: 0.0192 - val_loss: 0.0270
Epoch 9/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

<keras.src.callbacks.history.History at 0x3facaebb0>

In [135]:
# SIMULATE
from helpers import get_sample

def run_and_save_trial(study, condition):
    # Get sample
    sample = get_sample(study, condition)

    # Unique ID to identify an individual
    sample['unique_id'] = sample['study'].astype(str) + '_' + sample['condition'].astype(str) + '_' + sample['id_all'].astype(str)

    # Fit scalers
    all_X_scaled = features_scaler.fit_transform(sample[features])

    # Create sequences based on unique_id
    all_unique_ids = sample['unique_id'].unique()
    all_X_seq = []
    seq_lengths = []  # Store the original sequence lengths

    for uid in all_unique_ids:
        seq_data = sample['unique_id'] == uid
        data_for_uid = all_X_scaled[seq_data]
        all_X_seq.append(data_for_uid)
        seq_lengths.append(len(data_for_uid))  # Store the original sequence length

    # Pad sequences to have the same length
    all_X_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in all_X_seq])

    # Make predictions
    predictions = model.predict(all_X_padded)

    # Remove predictions corresponding to padded inputs
    unpadded_predictions = []
    for i, length in enumerate(seq_lengths):
        unpadded_predictions.append(predictions[i, :length])

    # Flatten the unpadded predictions
    unpadded_predictions = np.concatenate(unpadded_predictions, axis=0)

    # Inverse transform the predictions
    unpadded_predictions = output_scaler.inverse_transform(unpadded_predictions)

    all_core_temps = unpadded_predictions[:, 0]
    all_skin_temps = unpadded_predictions[:, 1]

    print(all_core_temps.shape[0])
    print(all_skin_temps.shape[0])
    print(sample.shape[0])

    # Save to csv
    df = pd.DataFrame(all_core_temps, columns=["tre_predicted"])
    df["mtsk_predicted"] = all_skin_temps
    df.to_csv('results/regression-{}-{}.csv'.format(study, condition), index=False)

In [136]:
run_and_save_trial('heatwave 1 (prolonged)', 'hot')
run_and_save_trial('heatwave 2 (indoor)', 'cool')
run_and_save_trial('heatwave 2 (indoor)', 'temp')
run_and_save_trial('heatwave 2 (indoor)', 'warm')
run_and_save_trial('heatwave 2 (indoor)', 'hot')
run_and_save_trial('heatwave 3 (cooling)', 'hot')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
31860
31860
31860
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
7680
7680
7680
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
7680
7680
7680
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
17760
17760
17760
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
16800
16800
16800
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
10260
10260
10260
