In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
df = pd.read_excel(r"C:\Users\abhis\Downloads\NewNormalized_DB.xlsx")


In [None]:
filtered_df = df[df['ProjectTypology'] == 'Business']

# Drop unnecessary column
filtered_df = filtered_df.drop(columns=['Roof-Window-Area/Roof-Area'])
filtered_df = filtered_df.drop(columns=['ProjectTypology'])


In [None]:
filtered_df = filtered_df.dropna()

In [None]:
cols_to_scale = [
    'Above-Grade/Below-Grade', 'Conditioned-Area/UnConditioned-Area', 'Roof-Area/Total-AG-Floor-Area',
    'Total-Above-Grade-Ext-Wall-Area/Total-AG-FloorArea', 'Power-Lighting(W/SQFT)', 'Equipment-Tot(W/SQFT)', 
    'ROOF-U-Value(BTU/HR-SQFT-F)', 'ALL WALLS-Wall-U-Value(BTU/HR-SQFT-F)', 'UNDERGRND-Wall-U-Value(BTU/HR-SQFT-F)', 
    'ROOF-Window-U-Value(BTU/HR-SQFT-F)', 'ALL WALLS-Window-U-Value(BTU/HR-SQFT-F)', 'WWR', 
    'Total-LSC(KW/SQFT)', 'Total-LOAD(KW/SQFT)', 'Total-LOAD/Conditioned-Area(KW/SQFT)'
]
categorical_features = ['Climate']

In [None]:
X = filtered_df.drop(columns=['Energy_Outcome(KWH/SQFT)'])
y = filtered_df['Energy_Outcome(KWH/SQFT)']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
scaler = MinMaxScaler()
X_train[cols_to_scale] = scaler.fit_transform(X_train[cols_to_scale])
X_test[cols_to_scale] = scaler.transform(X_test[cols_to_scale])


In [None]:
categorical_features = ['Climate']

# Modify OneHotEncoder to handle unknown categories
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(handle_unknown='ignore', drop='first'), categorical_features)
    ],
    remainder='passthrough'
)

In [None]:
X_train_encoded = preprocessor.fit_transform(X_train)
X_test_encoded = preprocessor.transform(X_test)


In [None]:
onehot_columns = preprocessor.transformers_[0][1].get_feature_names_out(categorical_features)
X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=list(onehot_columns) + list(X_train.drop(columns=categorical_features).columns))
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=list(onehot_columns) + list(X_test.drop(columns=categorical_features).columns))

In [None]:
main_model = RandomForestRegressor()
main_model.fit(X_train_encoded_df, y_train)

In [None]:
y_pred = main_model.predict(X_test_encoded_df)


In [None]:
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)

print(f"MAE: {MAE}")
print(f"MSE: {MSE}")
print(f"R2 Score: {R2}")


In [None]:
# Function to preprocess new data for prediction
def preprocess_new_data(new_data, scaler, preprocessor):
    # Ensure new_data is a DataFrame
    new_data = pd.DataFrame(new_data)

    # Apply MinMax scaling (Use the same columns used during training)
    new_data[cols_to_scale] = scaler.transform(new_data[cols_to_scale])

    # Apply One-Hot Encoding
    new_data_encoded = preprocessor.transform(new_data)

    # Convert to DataFrame with proper column names
    new_data_encoded_df = pd.DataFrame(new_data_encoded, 
                                       columns=list(onehot_columns) + list(new_data.drop(columns=categorical_features).columns))
    
    return new_data_encoded_df

# Example new data (Must have the same feature names as training data)
new_data = {
    'Above-Grade/Below-Grade': [1.740841133],
    'Conditioned-Area/UnConditioned-Area': [0.8152585082],
    'Roof-Area/Total-AG-Floor-Area': [0.630872462],
    'Total-Above-Grade-Ext-Wall-Area/Total-AG-FloorArea': [0.5938106128],
    'Power-Lighting(W/SQFT)': [1.063847852],
    'Equipment-Tot(W/SQFT)': [0.9882261944],
    'ROOF-U-Value(BTU/HR-SQFT-F)': [0.051],
    'ALL WALLS-Wall-U-Value(BTU/HR-SQFT-F)': [0.118],
    'UNDERGRND-Wall-U-Value(BTU/HR-SQFT-F)': [0.024],
    'ROOF-Window-U-Value(BTU/HR-SQFT-F)': [0],
    'ALL WALLS-Window-U-Value(BTU/HR-SQFT-F)': [0.918],
    'WWR': [0.265731904],
    'Total-LSC(KW/SQFT)': [0.003720438671],
    'Total-LOAD(KW/SQFT)': [0.01641817803],
    'Total-LOAD/Conditioned-Area(KW/SQFT)': [0.033800463],
    'Climate': ['Composite']  # Categorical feature
}

# Convert new data into DataFrame
new_data_df = pd.DataFrame(new_data)

# Preprocess new data
new_data_encoded_df = preprocess_new_data(new_data_df, scaler, preprocessor)

# Make prediction
predicted_energy_outcome = main_model.predict(new_data_encoded_df)

# Output result
print(f"Predicted Energy Outcome (KWH/SQFT): {predicted_energy_outcome[0]}")


In [None]:
import pickle

# Save the trained model, scaler, and preprocessor
with open('energy_prediction_model.pkl', 'wb') as file:
    pickle.dump({'model': main_model, 'scaler': scaler, 'preprocessor': preprocessor}, file)

print("Model saved successfully!")
