In [1]:
cd ..

c:\Users\Usuario\OneDrive\Documents\IE\3. Trimestre\Venture Lab & Capstone\Capstone\Tech side\vl_optimizer


# **Chiller:** 
### The model quantifies how different operational factors influence the adjusted energy consumption of the chiller system.

$$
C = \beta_0 + \beta_1 \cdot \text{HISPD} + \beta_2 \cdot \text{EAP} + \beta_3 \cdot \text{NAC}
$$

- $\beta_1$ multiplies $\text{HISPD}$ (Heat Index Set Point Difference), adjusting energy consumption based on the difference between the heat index and the set point, which might reflect external thermal load or environmental conditions.
- $\beta_2$ is associated with $\text{EAP}$ (Efficiency Adjusted Power), reflecting the adjusted power consumption based on the efficiency of the chiller.
- $\beta_3$ corresponds to $\text{NAC}$ (Number of Active Chillers), indicating the effect of the operational capacity of chillers on energy consumption.

## Import required libraries

In [2]:
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump, load

## Import Synthesized data

In [3]:
file_path = 'data/synthesized_data/chiller_group_synthesized.csv'
df = pd.read_csv(file_path, index_col=False)
df.columns

Index(['Time', '03 Chiller Group_Electric_Active Energy (kWh)', 'outdoor_temp',
       'outdoor_humidity', 'heat_index',
       'Chiller Group_Electric_Power Factor (real)(efficiency)', 'set_point',
       'num_active_chiller'],
      dtype='object')

## Get Coefficients

In [4]:
# Count total NaN values in the entire DataFrame
total_nans = df.isna().sum().sum()
print(f'Total NaN values in the DataFrame: {total_nans}')

# Count NaN values in each column
nans_per_column = df.isna().sum()
print('NaN values in each column:')
print(nans_per_column)

# Count the number of rows that contain any NaN values
rows_with_nans_count = df.isna().any(axis=1).sum()
print(f'Number of rows with at least one NaN value: {rows_with_nans_count}')

# Replace NaN values with the mean in a specific column
column_to_fill = 'Chiller Group_Electric_Power Factor (real)(efficiency)'  # replace with your specific column name
df[column_to_fill].fillna(df[column_to_fill].mean(), inplace=True)

# Verify if there are any NaNs left in that column
nan_counts_after_filling = df[column_to_fill].isna().sum()
print(f'Total NaN values in the column "{column_to_fill}" after filling: {nan_counts_after_filling}')



Total NaN values in the DataFrame: 1
NaN values in each column:
Time                                                      0
03 Chiller Group_Electric_Active Energy (kWh)             0
outdoor_temp                                              0
outdoor_humidity                                          0
heat_index                                                0
Chiller Group_Electric_Power Factor (real)(efficiency)    1
set_point                                                 0
num_active_chiller                                        0
dtype: int64
Number of rows with at least one NaN value: 1
Total NaN values in the column "Chiller Group_Electric_Power Factor (real)(efficiency)" after filling: 0


In [5]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump

# Define the target and features
target = '03 Chiller Group_Electric_Active Energy (kWh)'
df['heat_index_set_point_diff'] = df['heat_index'] - df['set_point']
df['efficiency_adjusted_power'] = df['Chiller Group_Electric_Power Factor (real)(efficiency)']

# Calculate the adjusted energy consumption incorporating assumed COP
assumed_COP = 3.0  # Assumed value for COP
df['adjusted_energy'] = df[target] * df['efficiency_adjusted_power'] * assumed_COP

# Define the features
features = ['heat_index_set_point_diff', 'efficiency_adjusted_power', 'num_active_chiller']

# Split the data into features (X) and target (y)
X = df[features]
y = df['adjusted_energy']

# Initialize the linear regression model
model = LinearRegression()

# Perform k-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
cv_results_mse = cross_val_score(model, X, y, cv=kfold, scoring='neg_mean_squared_error')
cv_results_r2 = cross_val_score(model, X, y, cv=kfold, scoring='r2')

# Fit the model on the entire dataset
model.fit(X, y)

# Get the intercept and coefficients
intercept = model.intercept_
coefficients = model.coef_

# Save the trained model to a file
model_params = {
    'intercept': intercept,
    'coefficients': coefficients,
    'features': features
}
model_filename = 'equation_modeling/models/chiller_consumption_model.joblib'
dump(model_params, model_filename)

# Calculate and print performance metrics
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f'Mean Squared Error (Cross-Validation): {-cv_results_mse.mean()}')
print(f'Standard Deviation (Cross-Validation): {cv_results_mse.std()}')
print(f'R^2 Score (Cross-Validation): {cv_results_r2.mean()}')
print(f'Overall R^2 Score: {r2}')

# Optional: Print the coefficients of the model
coefficients_df = pd.DataFrame(coefficients, features, columns=['Coefficient'])
print(f'Intercept: {intercept}')
print(coefficients_df)

# Function to make predictions using the saved model
def predict(X, model_params):
    intercept = model_params['intercept']
    coefficients = model_params['coefficients']
    return intercept + X.dot(coefficients)

# Optional: Load the model and make predictions
# loaded_model_params = load(model_filename)
# predictions = predict(X, loaded_model_params)
# print(f'Model predictions: {predictions}')

Mean Squared Error (Cross-Validation): 8633.52303439887
Standard Deviation (Cross-Validation): 672.519465870454
R^2 Score (Cross-Validation): 0.2974779516221756
Overall R^2 Score: 0.2986972230758401
Intercept: -3.342928666864431
                           Coefficient
heat_index_set_point_diff     1.511416
efficiency_adjusted_power    22.876512
num_active_chiller           22.343848


# Equations

	1.	Total Consumption (total_consumption_chiller_group):

\text{total_consumption_chiller_group} = \beta_1 \cdot \text{number_active_chillers} + \beta_2 \cdot \text{outdoor_temp} + \beta_3 \cdot \text{outdoor_humidity} + \beta_4 \cdot \text{Chiller Set point} + \beta_5 \cdot \text{Chiller Efficiency}

	2.	Chiller Set Point (chiller_set_point):

\text{chiller_set_point} = \gamma_1 \cdot \text{number_active_chillers} + \gamma_2 \cdot \text{outdoor_temp} + \gamma_3 \cdot \text{outdoor_humidity} + \gamma_4 \cdot \text{Chiller Efficiency}

	3.	Number of Active Chillers (number_active_chillers):

\text{number_active_chillers} = \delta_1 \cdot \text{outdoor_temp} + \delta_2 \cdot \text{outdoor_humidity} + \delta_3 \cdot \text{Chiller Efficiency}

