In [19]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Load the dataset
# Replace 'architectural_features.csv' with your dataset path
data = pd.read_csv('ENB2012data.csv')

# Step 2: Explore the dataset
print("Dataset Overview:")
print(data.head())
print(data.info())
print(data.describe())

# Step 3: Separate features and target variables
X = data[['Heating Load', 'Cooling Load']]
y_heating = data['Heating Load']
y_cooling = data['Cooling Load']

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train_heating, y_test_heating = train_test_split(X, y_heating, test_size=0.2, random_state=42)
_, _, y_train_cooling, y_test_cooling = train_test_split(X, y_cooling, test_size=0.2, random_state=42)

# Step 5: Normalize the feature data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: Train models for heating and cooling load
# Heating Load Model
heating_model = RandomForestRegressor(random_state=42)
heating_model.fit(X_train_scaled, y_train_heating)

# Cooling Load Model
cooling_model = RandomForestRegressor(random_state=42)
cooling_model.fit(X_train_scaled, y_train_cooling)

# Step 7: Evaluate the models
# Heating Load Predictions
y_pred_heating = heating_model.predict(X_test_scaled)
print("\nHeating Load Model Evaluation:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_heating, y_pred_heating))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_heating, y_pred_heating))
print("R² Score:", r2_score(y_test_heating, y_pred_heating))

# Cooling Load Predictions
y_pred_cooling = cooling_model.predict(X_test_scaled)
print("\nCooling Load Model Evaluation:")
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test_cooling, y_pred_cooling))
print("Mean Squared Error (MSE):", mean_squared_error(y_test_cooling, y_pred_cooling))
print("R² Score:", r2_score(y_test_cooling, y_pred_cooling))

# Step 8: Optional - Save the models for future use
import joblib
joblib.dump(heating_model, 'heating_model.pkl')
joblib.dump(cooling_model, 'cooling_model.pkl')


Dataset Overview:
     X1     X2     X3      X4   X5  X6   X7  X8     Y1     Y2
0  0.98  514.5  294.0  110.25  7.0   2  0.0   0  15.55  21.33
1  0.98  514.5  294.0  110.25  7.0   3  0.0   0  15.55  21.33
2  0.98  514.5  294.0  110.25  7.0   4  0.0   0  15.55  21.33
3  0.98  514.5  294.0  110.25  7.0   5  0.0   0  15.55  21.33
4  0.90  563.5  318.5  122.50  7.0   2  0.0   0  20.84  28.28
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 765 entries, 0 to 764
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X1      765 non-null    float64
 1   X2      765 non-null    float64
 2   X3      765 non-null    float64
 3   X4      765 non-null    float64
 4   X5      765 non-null    float64
 5   X6      765 non-null    int64  
 6   X7      765 non-null    float64
 7   X8      765 non-null    int64  
 8   Y1      765 non-null    float64
 9   Y2      765 non-null    float64
dtypes: float64(8), int64(2)
memory usage: 59.9 KB
None
     

KeyError: "None of [Index(['Heating Load', 'Cooling Load'], dtype='object')] are in the [columns]"

In [11]:
import pandas as pd

# Manually create the dataset from the extracted table
data = pd.DataFrame({
    "X1": [0.62, 0.76, 0.66],
    "X2": [808.5, 660.5, 795.3],
    "X3": [367.5, 416.5, 318.5],
    "X4": [220.5, 122.5, 220.5],
    "X5": [3.5, 7.0, 3.5],
    "X6": [3, 4, 4],
    "X7": [0.1, 0.1, 0.1],
    "X8": [1, 2, 5]  # This might represent an identifier or target variable
})

# Display the dataset to confirm it is correct
data


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8
0,0.62,808.5,367.5,220.5,3.5,3,0.1,1
1,0.76,660.5,416.5,122.5,7.0,4,0.1,2
2,0.66,795.3,318.5,220.5,3.5,4,0.1,5


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Adding placeholder target variables for Heating Load (HL) and Cooling Load (CL)
# In a real scenario, these would be provided in the dataset.
data["Heating Load"] = [15.5, 20.3, 18.7]  # Example values for demonstration
data["Cooling Load"] = [10.1, 12.5, 11.4]  # Example values for demonstration

# Separate features and targets
X = data[["X1", "X2", "X3", "X4", "X5", "X6", "X7"]]  # Features
y_heating = data["Heating Load"]  # Target for Heating Load
y_cooling = data["Cooling Load"]  # Target for Cooling Load

# Split into training and testing sets
X_train, X_test, y_train_heating, y_test_heating = train_test_split(
    X, y_heating, test_size=0.33, random_state=42
)
X_train, X_test, y_train_cooling, y_test_cooling = train_test_split(
    X, y_cooling, test_size=0.33, random_state=42
)

# Train models for heating and cooling loads
heating_model = RandomForestRegressor(random_state=42)
heating_model.fit(X_train, y_train_heating)

cooling_model = RandomForestRegressor(random_state=42)
cooling_model.fit(X_train, y_train_cooling)

# Predictions
y_pred_heating = heating_model.predict(X_test)
y_pred_cooling = cooling_model.predict(X_test)

# Evaluate the models
heating_metrics = {
    "MAE": mean_absolute_error(y_test_heating, y_pred_heating),
    "MSE": mean_squared_error(y_test_heating, y_pred_heating),
    "R2": r2_score(y_test_heating, y_pred_heating),
}

cooling_metrics = {
    "MAE": mean_absolute_error(y_test_cooling, y_pred_cooling),
    "MSE": mean_squared_error(y_test_cooling, y_pred_cooling),
    "R2": r2_score(y_test_cooling, y_pred_cooling),
}

heating_metrics, cooling_metrics




({'MAE': 3.616000000000014, 'MSE': 13.0754560000001, 'R2': nan},
 {'MAE': 1.5860000000000003, 'MSE': 2.515396000000001, 'R2': nan})

In [15]:
def predict_loads(features):
    """
    Predict heating and cooling loads based on input features.

    Parameters:
    - features: dict with keys ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7']

    Returns:
    - dict with predicted 'Heating Load' and 'Cooling Load'
    """
    # Convert input dictionary to a DataFrame for prediction
    feature_values = pd.DataFrame([features])
    heating_load = heating_model.predict(feature_values)[0]
    cooling_load = cooling_model.predict(feature_values)[0]
    
    return {"Heating Load": heating_load, "Cooling Load": cooling_load}


# Example usage with the 3 rows from the table
predictions = []
feature_combinations = [
    {"X1": 0.62, "X2": 808.5, "X3": 367.5, "X4": 220.5, "X5": 3.5, "X6": 3, "X7": 0.1},
    {"X1": 0.76, "X2": 660.5, "X3": 416.5, "X4": 122.5, "X5": 7.0, "X6": 4, "X7": 0.1},
    {"X1": 0.66, "X2": 795.3, "X3": 318.5, "X4": 220.5, "X5": 3.5, "X6": 4, "X7": 0.1},
]

for features in feature_combinations:
    predictions.append(predict_loads(features))

predictions  # Display predicted heating and cooling loads for the given combinations


[{'Heating Load': 19.116000000000014, 'Cooling Load': 11.686},
 {'Heating Load': 19.947999999999983, 'Cooling Load': 12.258},
 {'Heating Load': 19.116000000000014, 'Cooling Load': 11.686}]