In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import dataset
df = pd.read_csv('./dataset/230322_OlderPredictTc_data_thermal.csv')

# Only use previous values from same individual
# df['previous_tre_int'] = df.groupby('id_all')['tre_int'].shift(1)
# df['previous_mtsk_int'] = df.groupby('id_all')['mtsk_int'].shift(1)

# Select only time > 0
df = df[df.time > 0]

# Unique ID to identify an individual
df['unique_id'] = df['study'].astype(str) + '_' + df['condition'].astype(str) + '_' + df['id_all'].astype(str)

# Select only features and output
features = ['female', 'age', 'height', 'mass', 'ta_set', 'rh_set']
output = ['tre_int', 'mtsk_int']
df = df[features + output + ['id_all', 'unique_id', 'study', 'condition', 'time']]

# Create train_df based on participants assigned to training set
train_ids = [46, 34, 68, 30, 40, 98, 89, 65, 24, 58, 85, 67, 28, 39, 35, 77, 26,
             80, 70, 37, 52, 56, 74, 78, 71, 60, 86, 43, 91, 82, 22, 59, 21, 87,
             95, 66, 44, 25, 76, 94, 53, 32, 73, 23, 49]
train_df = df[df['id_all'].isin(train_ids)]

# Check data
train_df.isnull().sum()

female       0
age          0
height       0
mass         0
ta_set       0
rh_set       0
tre_int      0
mtsk_int     0
id_all       0
unique_id    0
study        0
condition    0
time         0
dtype: int64

In [14]:
def generate_extra_data(input_df):
    # Create extra data for each participant
    # This simulates sitting in air conditioning for 60 minutes before the trial
    extra_data = pd.DataFrame()
    for uid in input_df['unique_id'].unique():
        participant_data = input_df[input_df['unique_id'] == uid].iloc[0]
        new_data = pd.DataFrame({
            'female': [participant_data['female']] * 60,
            'age': [participant_data['age']] * 60,
            'height': [participant_data['height']] * 60,
            'mass': [participant_data['mass']] * 60,
            'ta_set': [23] * 60,
            'rh_set': [9] * 60,
            'tre_int': [participant_data['tre_int']] * 60,
            'mtsk_int': [participant_data['mtsk_int']] * 60,
            'id_all': [participant_data['id_all']] * 60,
            'unique_id': [participant_data['unique_id']] * 60,
            'study': [participant_data['study']] * 60,
            'condition': [participant_data['condition']] * 60,
            'time': list(range(-60, 0))
        })
        extra_data = extra_data.append(new_data, ignore_index=True)
    return extra_data

In [15]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Reset index
train_df.reset_index(inplace=True)

# 60 mins of data in aircon
extra_data = generate_extra_data(train_df)
# Concatenate extra data with train_df
train_df = pd.concat([extra_data, train_df], ignore_index=True)

# Scalars
features_scaler = MinMaxScaler(feature_range=(0,1))
output_scaler = MinMaxScaler(feature_range=(0,1))
# Fit scalers
X_scaled = features_scaler.fit_transform(train_df[features])
y_scaled = output_scaler.fit_transform(train_df[output])

# Create sequences based on unique_id
unique_ids = train_df['unique_id'].unique()
X_seq, y_seq = [], []
for uid in unique_ids:
    seq_data = train_df[train_df['unique_id'] == uid]
    X_seq.append(X_scaled[seq_data.index])
    y_seq.append(y_scaled[seq_data.index])

# Pad sequences to have the same length
max_len = max(len(seq) for seq in X_seq)
print("Max sequence length:", max_len)
X_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in X_seq])
y_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in y_seq])

  extra_data = extra_data.append(new_data, ignore_index=True)


Max sequence length: 600


In [16]:
import tensorflow as tf
# Disable GPU training, comment out to enable
tf.config.set_visible_devices([], 'GPU')

from keras.layers import Input, LSTM, Dense, TimeDistributed, Dropout
from keras.models import Sequential
from keras.regularizers import l2

# Define the input shape
input_shape = (None, X_padded.shape[-1])

# Define the improved RNN model
model = Sequential()
model.add(Input(shape=input_shape))
model.add(LSTM(16, return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
                    kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01)))
model.add(TimeDistributed(Dense(8, activation='relu', kernel_regularizer=l2(0.01))))
model.add(Dropout(0.2))
model.add(TimeDistributed(Dense(y_padded.shape[-1])))

# Compile the model
model.compile(loss='mse', optimizer='adam')

# Train the model with early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_padded, y_padded, validation_split=0.2, epochs=100, batch_size=32, callbacks=[early_stopping])

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - loss: 0.6452 - val_loss: 0.6243
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - loss: 0.5901 - val_loss: 0.5748
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - loss: 0.5729 - val_loss: 0.5290
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - loss: 0.5260 - val_loss: 0.4852
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - loss: 0.4709 - val_loss: 0.4418
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 0.4386 - val_loss: 0.4047
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 0.4235 - val_loss: 0.3759
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - loss: 0.4085 - val_loss: 0.3572
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x124064790>

In [19]:
# Save the model weights
model.save('model_weights/rnn.h5')



In [17]:
# SIMULATE
from helpers import get_sample

def run_and_save_trial(study, condition):
    # Get sample
    sample = get_sample(study, condition)

    # Unique ID to identify an individual
    sample['unique_id'] = sample['study'].astype(str) + '_' + sample['condition'].astype(str) + '_' + sample['id_all'].astype(str)

    extra_data = generate_extra_data(sample)
    # Concatenate extra data with train_df
    sample_extra_data = pd.concat([extra_data, sample], ignore_index=True)

    # Fit scalers
    all_X_scaled = features_scaler.fit_transform(sample_extra_data[features])

    # Create sequences based on unique_id
    all_unique_ids = sample_extra_data['unique_id'].unique()
    all_X_seq = []
    seq_lengths = []  # Store the original sequence lengths

    for uid in all_unique_ids:
        seq_data = sample_extra_data['unique_id'] == uid
        data_for_uid = all_X_scaled[seq_data]
        all_X_seq.append(data_for_uid)
        seq_lengths.append(len(data_for_uid))  # Store the original sequence length

    # Pad sequences to have the same length
    all_X_padded = np.array([np.pad(seq, ((0, max_len - len(seq)), (0, 0)), mode='constant') for seq in all_X_seq])

    # Make predictions
    predictions = model.predict(all_X_padded)

    # Remove predictions corresponding to padded inputs and extra data
    unpadded_predictions = []
    for i, length in enumerate(seq_lengths):
        unpadded_predictions.append(predictions[i, 60:length])  # Slice to remove 60 mins of extra data

    # Flatten the unpadded predictions
    unpadded_predictions = np.concatenate(unpadded_predictions, axis=0)

    # Inverse transform the predictions
    unpadded_predictions = output_scaler.inverse_transform(unpadded_predictions)

    all_core_temps = unpadded_predictions[:, 0]
    all_skin_temps = unpadded_predictions[:, 1]

    print(all_core_temps.shape[0])
    print(all_skin_temps.shape[0])
    print(sample.shape[0])

    # Save to csv
    df = pd.DataFrame(all_core_temps, columns=["tre_predicted"])
    df["mtsk_predicted"] = all_skin_temps
    df.to_csv('results/rnn-{}-{}.csv'.format(study, condition), index=False)

In [18]:
run_and_save_trial('heatwave 1 (prolonged)', 'hot')
run_and_save_trial('heatwave 2 (indoor)', 'cool')
run_and_save_trial('heatwave 2 (indoor)', 'temp')
run_and_save_trial('heatwave 2 (indoor)', 'warm')
run_and_save_trial('heatwave 2 (indoor)', 'hot')
run_and_save_trial('heatwave 3 (cooling)', 'hot')

  extra_data = extra_data.append(new_data, ignore_index=True)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4s/step
31860
31860
31860


  extra_data = extra_data.append(new_data, ignore_index=True)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
7680
7680
7680


  extra_data = extra_data.append(new_data, ignore_index=True)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
7680
7680
7680


  extra_data = extra_data.append(new_data, ignore_index=True)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
17760
17760
17760


  extra_data = extra_data.append(new_data, ignore_index=True)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
16800
16800
16800


  extra_data = extra_data.append(new_data, ignore_index=True)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
10260
10260
10260
