In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load datasets
train_data = pd.read_csv('TrainData_A.csv')
aggregated_load = pd.read_csv('AggregatedLoad_A.csv')
test_data = pd.read_csv('TestData_A.csv')
template = pd.read_csv('DisaggregatedLoad_Template.csv')

# Rename columns for convenience
train_data.columns = ['index', 'aggregated_load'] + [f'appliance_{i}' for i in range(1, 22)]
aggregated_load.columns = ['index', 'aggregated_load']
test_data.columns = ['index', 'aggregated_load'] + [f'appliance_{i}' for i in range(1, 22)]
template.columns = ['index'] + [f'appliance_{i}_pred' for i in range(1, 22)]


In [2]:
from sklearn.preprocessing import StandardScaler

# Normalize the data for aggregated load and appliance loads separately
scaler_agg = StandardScaler()
scaler_appliances = StandardScaler()

# Fit scaler on the aggregated load from training data
train_agg_normalized = scaler_agg.fit_transform(train_data[['aggregated_load']])
# Fit scaler on the appliance loads from training data
train_appliances_normalized = scaler_appliances.fit_transform(train_data.iloc[:, 2:])

# Split data into training and test sets (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(train_agg_normalized, train_appliances_normalized, test_size=0.1, random_state=42)

# Reshape the training and test data
X_train = X_train.reshape(-1, 1, 1)
X_test = X_test.reshape(-1, 1, 1)

# Prepare features for aggregated load data
X_aggregated = scaler_agg.transform(aggregated_load[['aggregated_load']]).reshape(-1, 1, 1)


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten
from tensorflow.keras.optimizers import Adam

# CNN Model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(1, 1)))
cnn_model.add(Flatten())
cnn_model.add(Dense(21, activation='linear'))

cnn_model.compile(optimizer=Adam(), loss='mean_squared_error')
cnn_model.summary()

# CNN Training
cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Predicting with CNN
y_pred_cnn_test = cnn_model.predict(X_test)
y_pred_cnn_full = cnn_model.predict(X_aggregated)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 868us/step - loss: 0.9309 - val_loss: 0.7190
Epoch 2/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 876us/step - loss: 0.8529 - val_loss: 0.7013
Epoch 3/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 794us/step - loss: 0.8717 - val_loss: 0.6890
Epoch 4/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 883us/step - loss: 0.8495 - val_loss: 0.6827
Epoch 5/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 949us/step - loss: 0.7993 - val_loss: 0.6776
Epoch 6/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.8473 - val_loss: 0.6711
Epoch 7/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - loss: 0.7958 - val_loss: 0.6654
Epoch 8/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - loss: 0.7906 - val_loss: 0.6611
Epoch 9/10


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(1, 1)))
lstm_model.add(Dense(21))

lstm_model.compile(optimizer=Adam(), loss='mean_squared_error')
lstm_model.summary()

# LSTM Training
lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Predicting with LSTM
y_pred_lstm_test = lstm_model.predict(X_test)
y_pred_lstm_full = lstm_model.predict(X_aggregated)


  super().__init__(**kwargs)


Epoch 1/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 800us/step - loss: 0.9517 - val_loss: 0.7274
Epoch 2/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 792us/step - loss: 0.8757 - val_loss: 0.7027
Epoch 3/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 741us/step - loss: 0.8362 - val_loss: 0.6978
Epoch 4/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.8677 - val_loss: 0.6931
Epoch 5/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.8439 - val_loss: 0.6896
Epoch 6/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 942us/step - loss: 0.8244 - val_loss: 0.6868
Epoch 7/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 905us/step - loss: 0.8805 - val_loss: 0.6814
Epoch 8/10
[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 859us/step - loss: 0.8057 - val_loss: 0.6782
Epoch 9/10


In [5]:
from sklearn.ensemble import RandomForestRegressor

# Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train.reshape(-1, 1), y_train)

# Predicting with Random Forest
y_pred_rf_test = rf_model.predict(X_test.reshape(-1, 1))
y_pred_rf_full = rf_model.predict(X_aggregated.reshape(-1, 1))


In [6]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Function to calculate RMSE as in the provided notebook
def calculate_rmse(true_values, pred_values):
    true_values_scaled = true_values / true_values.max(axis=0)
    pred_values_scaled = pred_values / true_values.max(axis=0)
    rmse = np.sqrt(mean_squared_error(true_values_scaled, pred_values_scaled, multioutput='raw_values'))
    return rmse

# Calculate RMSE for each appliance
rmse_cnn = calculate_rmse(y_test, y_pred_cnn_test)
rmse_lstm = calculate_rmse(y_test, y_pred_lstm_test)
rmse_rf = calculate_rmse(y_test, y_pred_rf_test)

# Combine RMSE into a DataFrame for comparison
rmse_df = pd.DataFrame({
    'appliance': [f'appliance_{i}' for i in range(1, 22)],
    'rmse_cnn': rmse_cnn,
    'rmse_lstm': rmse_lstm,
    'rmse_rf': rmse_rf
})

print(rmse_df)


       appliance  rmse_cnn  rmse_lstm   rmse_rf
0    appliance_1  0.121922   0.123207  0.100819
1    appliance_2  0.059460   0.060390  0.030179
2    appliance_3  0.123851   0.145791  0.060395
3    appliance_4  0.082174   0.083106  0.045308
4    appliance_5  0.022384   0.022986  0.020715
5    appliance_6  0.138541   0.139157  0.102564
6    appliance_7  0.039206   0.039159  0.035869
7    appliance_8  0.098930   0.113304  0.043282
8    appliance_9  0.025902   0.026438  0.018887
9   appliance_10  0.486315   0.486168  0.109288
10  appliance_11  0.011254   0.011310  0.020275
11  appliance_12  0.214283   0.219933  0.151422
12  appliance_13  0.061655   0.061329  0.184744
13  appliance_14  0.051349   0.051459  0.044044
14  appliance_15  0.091433   0.091854  0.085714
15  appliance_16  0.131690   0.131799  0.121844
16  appliance_17  0.126685   0.126734  0.086728
17  appliance_18  0.164742   0.164339  0.124723
18  appliance_19  0.043803   0.043585  0.038451
19  appliance_20  0.040694   0.040608  0

In [7]:
# Determine the best model for each appliance
best_models = rmse_df[['rmse_cnn', 'rmse_lstm', 'rmse_rf']].idxmin(axis=1)
best_models = best_models.replace({'rmse_cnn': 'rmse_cnn', 'rmse_lstm': 'rmse_lstm', 'rmse_rf': 'rmse_rf'})
rmse_df['best_model'] = best_models

print(rmse_df)


       appliance  rmse_cnn  rmse_lstm   rmse_rf best_model
0    appliance_1  0.121922   0.123207  0.100819    rmse_rf
1    appliance_2  0.059460   0.060390  0.030179    rmse_rf
2    appliance_3  0.123851   0.145791  0.060395    rmse_rf
3    appliance_4  0.082174   0.083106  0.045308    rmse_rf
4    appliance_5  0.022384   0.022986  0.020715    rmse_rf
5    appliance_6  0.138541   0.139157  0.102564    rmse_rf
6    appliance_7  0.039206   0.039159  0.035869    rmse_rf
7    appliance_8  0.098930   0.113304  0.043282    rmse_rf
8    appliance_9  0.025902   0.026438  0.018887    rmse_rf
9   appliance_10  0.486315   0.486168  0.109288    rmse_rf
10  appliance_11  0.011254   0.011310  0.020275   rmse_cnn
11  appliance_12  0.214283   0.219933  0.151422    rmse_rf
12  appliance_13  0.061655   0.061329  0.184744  rmse_lstm
13  appliance_14  0.051349   0.051459  0.044044    rmse_rf
14  appliance_15  0.091433   0.091854  0.085714    rmse_rf
15  appliance_16  0.131690   0.131799  0.121844    rmse_

In [8]:
# Initialize the hybrid predictions array for test data
hybrid_predictions_test = np.zeros_like(y_pred_cnn_test)

# Assign the best model's predictions to the hybrid model for test data
for i in range(21):
    if rmse_df.loc[i, 'best_model'] == 'CNN':
        hybrid_predictions_test[:, i] = y_pred_cnn_test[:, i]
    elif rmse_df.loc[i, 'best_model'] == 'LSTM':
        hybrid_predictions_test[:, i] = y_pred_lstm_test[:, i]
    else:
        hybrid_predictions_test[:, i] = y_pred_rf_test[:, i]

# Calculate and show summed RMSE for the hybrid model on test data
rmse_hybrid_test = calculate_rmse(y_test, hybrid_predictions_test)
print("Hybrid Model RMSE Sum on Test Data:", rmse_hybrid_test)

# Initialize the hybrid predictions array for AggregatedLoad_A
hybrid_predictions_full = np.zeros_like(y_pred_cnn_full)

# Assign the best model's predictions to the hybrid model for AggregatedLoad_A
for i in range(21):
    if rmse_df.loc[i, 'best_model'] == 'CNN':
        hybrid_predictions_full[:, i] = y_pred_cnn_full[:, i]
    elif rmse_df.loc[i, 'best_model'] == 'LSTM':
        hybrid_predictions_full[:, i] = y_pred_lstm_full[:, i]
    else:
        hybrid_predictions_full[:, i] = y_pred_rf_full[:, i]

# Save predictions to CSV
def save_predictions(predictions, filename):
    predictions_scaled = scaler_appliances.inverse_transform(predictions)
    template_copy = template.copy()
    template_copy.iloc[:, 1:] = predictions_scaled
    template_copy.to_csv(filename, index=False)
    return predictions_scaled


save_predictions(y_pred_cnn_full, 'Predicted_CNN.csv')
save_predictions(y_pred_lstm_full, 'Predicted_LSTM.csv')
save_predictions(y_pred_rf_full, 'Predicted_RF.csv')
hybrid_predictions_full=save_predictions(hybrid_predictions_full, 'Predicted_Hybrid.csv')


# Load ground truth data for AggregatedLoad_A
true_A = pd.read_csv("TestData_A.csv", index_col=0)
true_A = true_A.iloc[:, 1:]  # Ignore aggregate load column
true_A_values_scaled = true_A.values / true_A.max(axis=0).values

# Calculate and show summed RMSE for the hybrid model on AggregatedLoad_A
pred_A_values_scaled = hybrid_predictions_full / true_A.max(axis=0).values
rmse_sum_hybrid = np.sum(mean_squared_error(true_A_values_scaled, pred_A_values_scaled, multioutput="raw_values"))
print("Hybrid Model RMSE Sum on AggregatedLoad_A:", rmse_sum_hybrid)



Hybrid Model RMSE Sum on Test Data: [0.10081894 0.03017883 0.06039494 0.04530753 0.02071481 0.10256443
 0.03586897 0.04328206 0.01888676 0.10928818 0.02027536 0.15142191
 0.18474366 0.04404356 0.08571352 0.1218444  0.0867279  0.12472304
 0.03845139 0.0168115  0.027491  ]
Hybrid Model RMSE Sum on AggregatedLoad_A: 0.196449121884341
