In [18]:
import pandas as pd
import numpy as np

import tensorflow

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error

import warnings
warnings.simplefilter("ignore", UserWarning)

# Load the dataset
file_path = "Energy_Consumption_Dataset.csv"  # Replace with your dataset file path
try:
    df = pd.read_csv(file_path)
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print(f"File not found: {file_path}")
   # exit()

# Step 1: Data Preprocessing
df_reduced = df.replace({'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 
                         'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 7, 
                         'Yes': 1, 'No': 0, 'On': 0, 'Off': 1}).infer_objects(copy=False)

# Step 2: Verify Data and remove outliers
print(df_reduced)

# Use IQR to detect and remove outliers
Q1 = df_reduced.quantile(0.25)
Q3 = df_reduced.quantile(0.75)

IQR = Q3 - Q1

# Filter the dataset
df_cleaned = df_reduced[~((df_reduced < (Q1 - 1.5 * IQR)) | (df_reduced > (Q3 + 1.5 * IQR))).any(axis=1)]

print(f"Removed {len(df_reduced) - len(df_cleaned)} outliers.")

# Step 3: Model Processing

df_reduced = df_cleaned

# Define input features (X) and target variable (y)
input_data = df_reduced.drop(columns=["EnergyConsumption"])  # Features
output_data = df_reduced["EnergyConsumption"]  # Target variable

# Split the data into training and testing sets
input_data_train, input_data_test, output_data_train, output_data_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)

# Standardize the numerical features for better performance
scaler = StandardScaler()

input_data_train = scaler.fit_transform(input_data_train)
input_data_test = scaler.transform(input_data_test)

# Step 2: Build the TensorFlow Model
model = Sequential([
    Dense(400, input_dim=input_data_train.shape[1], activation="relu"),  # Input layer with 4 neurons
    Dense(1280, activation="relu"),                             # Hidden layer with 128 neurons
    Dense(1, activation="linear")                              # Output layer (1 neuron for regression)
])

# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae"])  # Loss: Mean Squared Error, Metric: Mean Absolute Error

# Step 3: Train the Model
history = model.fit(input_data_train, output_data_train, epochs=50, batch_size=16, validation_data=(input_data_test, output_data_test), verbose=1)

# Step 4: Evaluate the Model
loss, mae = model.evaluate(input_data_test, output_data_test, verbose=0)

print(f"Test Loss (MSE): {loss:.4f}")
print(f"Test Mean Absolute Error (MAE): {mae:.4f}")

# Step 5: Make Predictions
predictions = model.predict(input_data_test)

# Flatten the predictions to make them 1D
predictions = predictions.flatten()

# Print first 50 predictions
print("Predictions for EnergyConsumption:")
print(predictions[:50]*2.5*85.0*365.0*24.0*1.0E-6) # Terawatt 

# Calculate Mean Absolute Error manually (for comparison)
mae_manual = mean_absolute_error(output_data_test, predictions)
print(f"Manual MAE: {mae_manual:.4f}")

mre = np.mean(np.abs((output_data_test - predictions) / output_data_test))
print(mre*100.0, 'per cent')

print(predictions.mean()*2.5*85.0*365.00*24.0*1.0E-6)

Dataset loaded successfully!
     Month  Hour  DayOfWeek  Holiday  HVACUsage  LightingUsage  Temperature  \
0        7    15          3        0          0              0    31.133301   
1        4     3          5        1          1              0    32.160465   
2       11    10          5        0          1              1    34.952566   
3        8    23          6        1          1              1    19.829930   
4        5    22          7        1          1              0    15.807017   
..     ...   ...        ...      ...        ...            ...          ...   
995      6    23          2        1          0              1    17.465676   
996      5     8          6        1          0              0    25.371234   
997      6     2          4        1          1              0    19.916949   
998      6     9          1        0          1              0    22.162794   
999      7    18          6        0          1              0    34.790107   

      Humidity  Square

  df_reduced = df.replace({'Monday': 1, 'Tuesday': 2, 'Wednesday': 3,


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 78832.3906 - mae: 274.7935 - val_loss: 12078.9609 - val_mae: 97.6144
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5983.1860 - mae: 63.4519 - val_loss: 2982.1260 - val_mae: 43.0983
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3608.1733 - mae: 47.1467 - val_loss: 2826.0120 - val_mae: 42.3877
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3023.7046 - mae: 43.8165 - val_loss: 2814.9319 - val_mae: 42.3688
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3118.9343 - mae: 43.5054 - val_loss: 2744.5833 - val_mae: 42.1096
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3111.1245 - mae: 44.2365 - val_loss: 2706.9570 - val_mae: 42.1363
Epoch 7/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m