# Import libraries and model hyperparameters

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import argparse
import random
random.seed(1)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dropout, Flatten, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Define model hyperparameters
parser = argparse.ArgumentParser(description='TL-CNN-LSTM Fine Tuning Model')
parser.add_argument('--input_size', type=int, default=7)
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--num_epochs', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=0.000001)
parser.add_argument('--input_features', type=list, default=['Mode',
                                                            'Indoor Temp',
                                                            'Indoor Humidity',
                                                            'Air Velocity',
                                                            'Globe Temperature',
                                                            'Outdoor Temp',
                                                            'Outdoor Humidity'])
parser.add_argument('--experiment', type=str, default='mode_al') 
# choose either 'condition_random', 'mode_random', 'mode_al', 'all'
args = parser.parse_args(args=[])

mode_mapping = {'AC':0, 'NV':1}
thermalpref_mapping = {'No Change':0, 'Warmer':1, 'Cooler':2}
thermalacc_mapping = {'Acceptable':0, 'Unacceptable':1}
airpref_mapping = {'No Change':0, 'More':1, 'Less':2}

2023-12-05 23:30:32.341268: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load data

In [2]:
# load training data from ASHRAE dataset
ashrae_thermalpref_train = pd.read_csv('../data/ashrae_thermalpref_sampled_data.csv')
ashrae_thermalacc_train = pd.read_csv('../data/ashrae_thermalacc_sampled_data.csv')
ashrae_airpref_train = pd.read_csv('../data/ashrae_airpref_sampled_data.csv')

# load training data from BCA dataset
if args.experiment == 'condition_random':
    bca_thermalpref_train = pd.read_csv('../data/bca_thermalpref_train_condition_random_data.csv')
    bca_thermalacc_train = pd.read_csv('../data/bca_thermalacc_train_condition_random_data.csv')
    bca_airpref_train = pd.read_csv('../data/bca_airpref_train_condition_random_data.csv')
elif args.experiment == 'mode_random':
    bca_thermalpref_train = pd.read_csv('../data/bca_thermalpref_train_mode_random_data.csv')
    bca_thermalacc_train = pd.read_csv('../data/bca_thermalacc_train_mode_random_data.csv')
    bca_airpref_train = pd.read_csv('../data/bca_airpref_train_mode_random_data.csv')
elif args.experiment == 'mode_al':
    bca_thermalpref_train = pd.read_csv('../data/bca_thermalpref_train_mode_al_data.csv')
    bca_thermalacc_train = pd.read_csv('../data/bca_thermalacc_train_mode_al_data.csv')
    bca_airpref_train = pd.read_csv('../data/bca_airpref_train_mode_al_data.csv')
elif args.experiment == 'all':
    bca_thermalpref_train = pd.read_csv('../data/bca_thermalpref_train_data.csv')
    bca_thermalacc_train = pd.read_csv('../data/bca_thermalacc_train_data.csv')
    bca_airpref_train = pd.read_csv('../data/bca_airpref_train_data.csv') 
else:
    raise ValueError(f"Experiment {args.experiment} is not supported.")

# load test data from BCA dataset
bca_thermalpref_test = pd.read_csv('../data/bca_thermalpref_test_data.csv')
bca_thermalacc_test = pd.read_csv('../data/bca_thermalacc_test_data.csv')
bca_airpref_test = pd.read_csv('../data/bca_airpref_test_data.csv')

In [3]:
# perform one hot encoding
ashrae_thermalpref_train['Mode'] = ashrae_thermalpref_train['Mode'].apply(lambda x: mode_mapping[x])
bca_thermalpref_train['Mode'] = bca_thermalpref_train['Mode'].apply(lambda x: mode_mapping[x])
bca_thermalpref_test['Mode'] = bca_thermalpref_test['Mode'].apply(lambda x: mode_mapping[x])

ashrae_thermalacc_train['Mode'] = ashrae_thermalacc_train['Mode'].apply(lambda x: mode_mapping[x])
bca_thermalacc_train['Mode'] = bca_thermalacc_train['Mode'].apply(lambda x: mode_mapping[x])
bca_thermalacc_test['Mode'] = bca_thermalacc_test['Mode'].apply(lambda x: mode_mapping[x])

ashrae_airpref_train['Mode'] = ashrae_airpref_train['Mode'].apply(lambda x: mode_mapping[x])
bca_airpref_train['Mode'] = bca_airpref_train['Mode'].apply(lambda x: mode_mapping[x])
bca_airpref_test['Mode'] = bca_airpref_test['Mode'].apply(lambda x: mode_mapping[x])

In [4]:
# perform label mapping
ashrae_thermalpref_train['Thermal Preference'] = ashrae_thermalpref_train['Thermal Preference'].apply(lambda x: thermalpref_mapping[x])
bca_thermalpref_train['Thermal Preference'] = bca_thermalpref_train['Thermal Preference'].apply(lambda x: thermalpref_mapping[x])
bca_thermalpref_test['Thermal Preference'] = bca_thermalpref_test['Thermal Preference'].apply(lambda x: thermalpref_mapping[x])

ashrae_thermalacc_train['Thermal Acceptability'] = ashrae_thermalacc_train['Thermal Acceptability'].apply(lambda x: thermalacc_mapping[x])
bca_thermalacc_train['Thermal Acceptability'] = bca_thermalacc_train['Thermal Acceptability'].apply(lambda x: thermalacc_mapping[x])
bca_thermalacc_test['Thermal Acceptability'] = bca_thermalacc_test['Thermal Acceptability'].apply(lambda x: thermalacc_mapping[x])

ashrae_airpref_train['Air Movement Preference'] = ashrae_airpref_train['Air Movement Preference'].apply(lambda x: airpref_mapping[x])
bca_airpref_train['Air Movement Preference'] = bca_airpref_train['Air Movement Preference'].apply(lambda x: airpref_mapping[x])
bca_airpref_test['Air Movement Preference'] = bca_airpref_test['Air Movement Preference'].apply(lambda x: airpref_mapping[x])

# Load pretrained model

In [5]:
def load_pretrained_model(model_name):
    """
    Loads the CNN LSTM model that was pretrained on the ASHRAE dataset.
    
    Parameters:
        model_name: The name of the CNN LSTM model pretrained on the ASHRAE dataset.
        
    Return:
        model: The CNN LSTM model pretrained on the ASHRAE dataset.
    """
    # Load the pre-trained CNN LSTM model
    pretrained_model = tf.keras.models.load_model(model_name + '.h5')
    
    # Print model summary
    print(pretrained_model.summary())
    
    return pretrained_model

def generate_ashrae_scaler(ashrae_data, target_col):
    """
    Generates a feature scaler based on the ASHRAE dataset.
    
    Parameters:
        ashrae_data: The ASHRAE dataset that the model is pretrained on.
        target_col: The target column in the ASHRAE dataset.
        
    Returns:
        scaler: The MinMax scaler fitted on the ASHRAE dataset.
    """
    X = np.array(ashrae_data[args.input_features])
    y = np.array(ashrae_data[target_col])

    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    
    # Create and fit a Min-Max scaler
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    
    return scaler
    

# Model training and evaluation

In [6]:
def train_model(model, train_df, scaler, model_name, target_col, num_classes):
    # Assuming train_df has columns for features and a 'target' column for labels
    X = np.array(train_df[args.input_features])
    y = np.array(train_df[target_col])

    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    
    # Create and fit a Min-Max scaler
    X_train_scaled = scaler.transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Convert labels to one-hot encoding
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=num_classes)

    # Define a callback to save the best model during training
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint('model.h5', save_best_only=True)
    
    # Early stopping callback
    early_stopping_callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',  # Monitor validation loss
        patience=10,          # Number of epochs with no improvement after which training will be stopped
        restore_best_weights=True  # Restore the best model weights when training stops
    )

    # Train the model
    history = model.fit(X_train_scaled, 
                        y_train, 
                        epochs=args.num_epochs, 
                        batch_size=args.batch_size,
                        validation_data=(X_val_scaled, y_val), 
                        callbacks=[checkpoint_callback, early_stopping_callback])

    # Save the final trained model
    model.save(model_name + '.h5')
    
    print(history)

    return model

def evaluate_model(model, test_df, scaler, model_name, target_col, metrics=['accuracy', 'weighted_f1']):
    # Assuming test_df has columns for features and a 'target' column for labels
    X_test = np.array(test_df[args.input_features])
    y_true = np.array(test_df[target_col])

    # Load the pre-trained model
    loaded_model = tf.keras.models.load_model(model_name + '.h5')
    
    # Apply numerical scaler on X_test
    X_test_scaled = scaler.transform(X_test)

    # Get predictions from the model
    y_pred = loaded_model.predict(X_test_scaled)
    y_pred_classes = np.argmax(y_pred, axis=1)

    evaluation_results = {}

    if 'accuracy' in metrics:
        accuracy = accuracy_score(y_true, y_pred_classes)
        evaluation_results['accuracy'] = accuracy

    if 'weighted_f1' in metrics:
        weighted_f1 = f1_score(y_true, y_pred_classes, average='weighted')
        evaluation_results['weighted_f1'] = weighted_f1

    return evaluation_results


In [7]:
# Train and evaluate thermal preference model (source: ASHRAE, target: BCA)
print("Training Thermal Preference Model")
thermalpref_model = load_pretrained_model('cnnlstm_lower_thermalpref_model')
thermalpref_scaler = generate_ashrae_scaler(ashrae_thermalpref_train, 
                                            target_col='Thermal Preference')
thermalpref_model = train_model(thermalpref_model, 
                                bca_thermalpref_train, 
                                thermalpref_scaler,
                                model_name='tl_cnnlstm_fe_thermalpref_model', 
                                target_col='Thermal Preference', 
                                num_classes=3)
thermalpref_eval = evaluate_model(thermalpref_model, 
                                  bca_thermalpref_test, 
                                  thermalpref_scaler,
                                  model_name='tl_cnnlstm_fe_thermalpref_model', 
                                  target_col='Thermal Preference')
print("Thermal Preference Accuracy:", thermalpref_eval['accuracy'])
print("Thermal Preference Weighted F1 Score:", thermalpref_eval['weighted_f1'])

Training Thermal Preference Model


2023-12-05 23:30:35.699041: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 7, 128)            768       
                                                                 
 dropout (Dropout)           (None, 7, 128)            0         
                                                                 
 lstm (LSTM)                 (None, 7, 256)            394240    
                                                                 
 lstm_1 (LSTM)               (None, 256)               525312    
                                                                 
 flatten (Flatten)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 64)                16448     
                                                                 
 dense_1 (Dense)             (None, 16)                1

In [8]:
# Train and evaluate thermal acceptability model (source: ASHRAE, target: BCA)
print("Training Thermal Acceptability Model")
thermalacc_model = load_pretrained_model('cnnlstm_lower_thermalacc_model')
thermalacc_scaler = generate_ashrae_scaler(ashrae_thermalacc_train, 
                                           target_col='Thermal Acceptability')
thermalacc_model = train_model(thermalacc_model, 
                               bca_thermalacc_train, 
                               thermalacc_scaler,
                               model_name='tl_cnnlstm_fe_thermalacc_model', 
                               target_col='Thermal Acceptability', 
                               num_classes=2)
thermalacc_eval = evaluate_model(thermalacc_model, 
                                  bca_thermalacc_test, 
                                  thermalacc_scaler,
                                  model_name='tl_cnnlstm_fe_thermalacc_model', 
                                  target_col='Thermal Acceptability')
print("Thermal Acceptability Accuracy:", thermalacc_eval['accuracy'])
print("Thermal Acceptability Weighted F1 Score:", thermalacc_eval['weighted_f1'])

Training Thermal Acceptability Model
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 7, 128)            768       
                                                                 
 dropout_1 (Dropout)         (None, 7, 128)            0         
                                                                 
 lstm_2 (LSTM)               (None, 7, 256)            394240    
                                                                 
 lstm_3 (LSTM)               (None, 256)               525312    
                                                                 
 flatten_1 (Flatten)         (None, 256)               0         
                                                                 
 dense_3 (Dense)             (None, 64)                16448     
                                                                 
 dense_4 (Dense) 

In [9]:
# Train and evaluate air movement preference model (source: ASHRAE, target: BCA)
print("Training Air Movement Preference Model")
airpref_model = load_pretrained_model('cnnlstm_lower_airpref_model')
airpref_scaler = generate_ashrae_scaler(ashrae_airpref_train, 
                                        target_col='Air Movement Preference')
airpref_model = train_model(airpref_model, 
                            bca_airpref_train, 
                            airpref_scaler,
                            model_name='tl_cnnlstm_fe_airpref_model', 
                            target_col='Air Movement Preference',
                            num_classes=3)
airpref_eval = evaluate_model(airpref_model, 
                              bca_airpref_test, 
                              airpref_scaler,
                              model_name='tl_cnnlstm_fe_airpref_model', 
                              target_col='Air Movement Preference')
print("Air Movement Preference Accuracy:", airpref_eval['accuracy'])
print("Air Movement Preference Weighted F1 Score:", airpref_eval['weighted_f1'])

Training Air Movement Preference Model
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 7, 128)            768       
                                                                 
 dropout_2 (Dropout)         (None, 7, 128)            0         
                                                                 
 lstm_4 (LSTM)               (None, 7, 256)            394240    
                                                                 
 lstm_5 (LSTM)               (None, 256)               525312    
                                                                 
 flatten_2 (Flatten)         (None, 256)               0         
                                                                 
 dense_6 (Dense)             (None, 64)                16448     
                                                                 
 dense_7 (Dense