In [12]:
import pandas as pd

# List of column names with numbers removed
columns = [
    "BA_climate", "IECC_climate_code", "HDD65", "CDD65", "HDD30YR_PUB", "CDD30YR_PUB", 
    "TYPEHUQ", "STORIES", "BEDROOMS", "NCOMBATH", "OTHROOMS", "TOTROOMS", "WINDOWS", 
    "ADQINSUL", "NUMFRIG", "RCOOKUSE", "ROVENUSE", "NUMMEAL", "DWASHUSE", "WASHLOAD", 
    "DRYRUSE", "EQUIPM", "NUMPORTEL", "NUMPORTHUM", "ACEQUIPM_PUB", "NUMPORTAC", 
    "NUMCFAN", "NUMFLOORFAN", "USECFAN", "LGTIN1TO4", "LGTIN4TO8", "LGTINMORE8", "HHAGE", 
    "NHSLDMEM", "NUMCHILD", "ATHOME", "MONEYPY", "SQFTRANGE", "TOTSQFT_EN", "TOTHSQFT", 
    "TOTCSQFT", "KWH"
]

# Create a DataFrame with the column names
df = pd.read_csv("Final_data.csv")
df = df[columns]
# Display the empty DataFrame with the specified columns
print(df)

       BA_climate  IECC_climate_code   HDD65   CDD65  HDD30YR_PUB  \
0             4.0                7.0  3844.0  1679.0       4451.0   
1             5.0                6.0  3766.0  1458.0       4429.0   
2             4.0                7.0  3819.0  1696.0       4500.0   
3             5.0                3.0  2614.0  1718.0       3229.0   
4             5.0                6.0  4219.0  1363.0       4896.0   
...           ...                ...     ...     ...          ...   
18491         5.0                6.0  4572.0  1037.0       4547.0   
18492         5.5               13.0  9861.0   283.0       9862.0   
18493         2.0                2.0   405.0  4725.0        672.0   
18494         2.0                3.0  1245.0  3038.0       1752.0   
18495         5.0                6.0  4423.0  1424.0       4225.0   

       CDD30YR_PUB  TYPEHUQ  STORIES  BEDROOMS  NCOMBATH  ...  HHAGE  \
0           1027.0    2.000    2.000     4.000     3.000  ...   65.0   
1           1305.0    3.000

In [13]:
X = df.drop("KWH",axis=1)
y = df["KWH"]

In [14]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train,y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
# default model we plan to use
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
import joblib

# Step 1: Train the Random Forest Regressor with the best parameters
rf = RandomForestRegressor(n_estimators=300, 
                           min_samples_split=5, 
                           min_samples_leaf=1, 
                           max_features='sqrt', 
                           max_depth=30, 
                           random_state=42)
rf.fit(X_train, y_train)

# Step 2: Train the Neural Network with the given architecture
nn = Sequential()
nn.add(Dense(units=256, activation='relu', input_dim=X_train.shape[1]))  # First hidden layer
nn.add(Dropout(0.3))
nn.add(Dense(units=128, activation='relu'))
nn.add(Dropout(0.3))
nn.add(Dense(units=128, activation='relu'))
nn.add(Dense(units=128, activation='relu'))
nn.add(Dense(units=32, activation='relu'))
nn.add(Dense(1, activation='linear'))  # Output layer
nn.compile(optimizer='adam', loss='mean_squared_error')

# Train the neural network
nn.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

# Step 3: Get predictions from both models
rf_preds_train = rf.predict(X_train)
nn_preds_train = nn.predict(X_train)

rf_preds_test = rf.predict(X_test)
nn_preds_test = nn.predict(X_test)

# Step 4: Train the Gradient Boosting Regressor with the given parameters
gb = GradientBoostingRegressor(n_estimators=500,  # Updated to match the best parameter
                               learning_rate=0.1, 
                               max_depth=6, 
                               subsample=1.0,  # Use 100% of the samples
                               random_state=42)
gb.fit(X_train, y_train)

gb_preds_train = gb.predict(X_train)
gb_preds_test = gb.predict(X_test)

# Step 5: Train XGBoost with the provided parameters
xgb_model = xgb.XGBRegressor(colsample_bytree=0.9, 
                             learning_rate=0.1, 
                             max_depth=6, 
                             n_estimators=500, 
                             subsample=1.0, 
                             random_state=42)
xgb_model.fit(X_train, y_train)

xgb_preds_train = xgb_model.predict(X_train)
xgb_preds_test = xgb_model.predict(X_test)

# Step 6: Combine predictions from all models
X_meta_train = np.column_stack((rf_preds_train, nn_preds_train.flatten(), gb_preds_train,xgb_preds_train))
X_meta_test = np.column_stack((rf_preds_test, nn_preds_test.flatten(), gb_preds_test,xgb_preds_test))

# Step 7: Train the meta-model (Linear Regression)
meta_model = LinearRegression()
meta_model.fit(X_meta_train, y_train)

# # Step 8: Make final predictions with the meta-model
final_preds = meta_model.predict(X_meta_test)

# # # Step 9: Evaluate the model
mse = mean_squared_error(y_test, final_preds)
print(f'Mean Squared Error of the stacked model: {mse}')

mae = mean_absolute_error(y_test, final_preds)
r2 = r2_score(y_test, final_preds)
print(f'Mean Absolute Error of the stacked model: {mae}')
print(f'R-squared of the stacked model: {r2}')
joblib.dump(rf, "random_forest.pkl")
nn.save("neural_network.h5")
joblib.dump(gb, "gradient_boosting.pkl")
joblib.dump(xgb_model, "xgboost_model.pkl")
joblib.dump(meta_model, 'meta_model.pkl')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step




In [16]:
# model using knn model (k nearest neighbors)
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from sklearn.neighbors import KNeighborsRegressor

# Step 1: Train the Random Forest Regressor with the best parameters
rf = RandomForestRegressor(n_estimators=300, 
                           min_samples_split=5, 
                           min_samples_leaf=1, 
                           max_features='sqrt', 
                           max_depth=30, 
                           random_state=42)
rf.fit(X_train, y_train)

# Step 2: Train the Neural Network with the given architecture
model = Sequential()
model.add(Dense(units=256, activation='relu', input_dim=X_train.shape[1]))  # First hidden layer
model.add(Dropout(0.3))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(1, activation='linear'))  # Output layer
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the neural network
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

# Step 3: Get predictions from both models
rf_preds_train = rf.predict(X_train)
nn_preds_train = model.predict(X_train)

rf_preds_test = rf.predict(X_test)
nn_preds_test = model.predict(X_test)
'''
# Step 4: Train the Gradient Boosting Regressor with the given parameters
gb = GradientBoostingRegressor(n_estimators=500,  # Updated to match the best parameter
                               learning_rate=0.1, 
                               max_depth=6, 
                               subsample=1.0,  # Use 100% of the samples
                               random_state=42)
gb.fit(X_train, y_train)

gb_preds_train = gb.predict(X_train)
gb_preds_test = gb.predict(X_test)
'''
# Step 5: Train XGBoost with the provided parameters
xgb_model = xgb.XGBRegressor(colsample_bytree=0.9, 
                             learning_rate=0.1, 
                             max_depth=6, 
                             n_estimators=500, 
                             subsample=1.0, 
                             random_state=42)
xgb_model.fit(X_train, y_train)

xgb_preds_train = xgb_model.predict(X_train)
xgb_preds_test = xgb_model.predict(X_test)

# Step 6: Train the K-Nearest Neighbors model
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

knn_preds_train = knn.predict(X_train)
knn_preds_test = knn.predict(X_test)

# Step 7: Combine predictions from all models
X_meta_train = np.column_stack((rf_preds_train, nn_preds_train.flatten(), xgb_preds_train, knn_preds_train))
X_meta_test = np.column_stack((rf_preds_test, nn_preds_test.flatten(), xgb_preds_test, knn_preds_test))

# Step 8: Train the meta-model (Linear Regression)
meta_model = LinearRegression()
meta_model.fit(X_meta_train, y_train)

# Step 9: Make final predictions with the meta-model
final_preds = meta_model.predict(X_meta_test)

# Step 10: Evaluate the model
mse = mean_squared_error(y_test, final_preds)
print(f'Mean Squared Error of the stacked model: {mse}')

mae = mean_absolute_error(y_test, final_preds)
r2 = r2_score(y_test, final_preds)
print(f'Mean Absolute Error of the stacked model: {mae}')
print(f'R-squared of the stacked model: {r2}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 729us/step
Mean Squared Error of the stacked model: 12637104.098853076
Mean Absolute Error of the stacked model: 2547.599091374098
R-squared of the stacked model: 0.6657705648019081


In [17]:
# model using knn and svm (support vector machine) model
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

# Step 1: Train the Random Forest Regressor with the best parameters
rf = RandomForestRegressor(n_estimators=300, 
                           min_samples_split=5, 
                           min_samples_leaf=1, 
                           max_features='sqrt', 
                           max_depth=30, 
                           random_state=42)
rf.fit(X_train, y_train)

# Step 2: Train the Neural Network with the given architecture
model = Sequential()
model.add(Dense(units=256, activation='relu', input_dim=X_train.shape[1]))  # First hidden layer
model.add(Dropout(0.3))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(1, activation='linear'))  # Output layer
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the neural network
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

# Step 3: Get predictions from both models
rf_preds_train = rf.predict(X_train)
nn_preds_train = model.predict(X_train)

rf_preds_test = rf.predict(X_test)
nn_preds_test = model.predict(X_test)

# Step 4: Train the Gradient Boosting Regressor with the given parameters
gb = GradientBoostingRegressor(n_estimators=500,  # Updated to match the best parameter
                               learning_rate=0.1, 
                               max_depth=6, 
                               subsample=1.0,  # Use 100% of the samples
                               random_state=42)
gb.fit(X_train, y_train)

gb_preds_train = gb.predict(X_train)
gb_preds_test = gb.predict(X_test)

# Step 5: Train XGBoost with the provided parameters
xgb_model = xgb.XGBRegressor(colsample_bytree=0.9, 
                             learning_rate=0.1, 
                             max_depth=6, 
                             n_estimators=500, 
                             subsample=1.0, 
                             random_state=42)
xgb_model.fit(X_train, y_train)

xgb_preds_train = xgb_model.predict(X_train)
xgb_preds_test = xgb_model.predict(X_test)

# Step 6: Train the K-Nearest Neighbors model
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

knn_preds_train = knn.predict(X_train)
knn_preds_test = knn.predict(X_test)

# Step 7: Train the Support Vector Machine (SVM) model
svm = SVR(kernel='rbf')
svm.fit(X_train, y_train)

svm_preds_train = svm.predict(X_train)
svm_preds_test = svm.predict(X_test)

# Step 8: Combine predictions from all models
X_meta_train = np.column_stack((rf_preds_train, nn_preds_train.flatten(), gb_preds_train, xgb_preds_train, knn_preds_train, svm_preds_train))
X_meta_test = np.column_stack((rf_preds_test, nn_preds_test.flatten(), gb_preds_test, xgb_preds_test, knn_preds_test, svm_preds_test))

# Step 9: Train the meta-model (Linear Regression)
meta_model = LinearRegression()
meta_model.fit(X_meta_train, y_train)

# Step 10: Make final predictions with the meta-model
final_preds = meta_model.predict(X_meta_test)

# Step 11: Evaluate the model
mse = mean_squared_error(y_test, final_preds)
print(f'Mean Squared Error of the stacked model: {mse}')

mae = mean_absolute_error(y_test, final_preds)
r2 = r2_score(y_test, final_preds)
print(f'Mean Absolute Error of the stacked model: {mae}')
print(f'R-squared of the stacked model: {r2}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


KeyboardInterrupt: 