In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from influxdb import InfluxDBClient
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

class PredictionModel:
    def __init__(self):
        self.X_train, self.y_train = [], []
        self.X_test, self.y_test = [], []
        self.model = None
        # Configuration
        self.PE_GAS = 1.00
        self.PE_ELEC = 2.17
        self.COP_H = 0.98
        self.EER_C = 5.4

    def load_from_influx(self, client, measurement, time_range):
        """
        Retrieves data from InfluxDB and prepares it for the ML model.
        """
        query = f'SELECT * FROM "{measurement}" WHERE time > now() - {time_range}'
        result = client.query(query)
        points = list(result.get_points())
        
        df = pd.DataFrame(points)
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
        
        # Unit Conversion: If data is in Joules, convert to kWh (J / 3,600,000)
        energy_fields = ['Electricity', 'DistrictHeating', 'DistrictCooling']
        for field in energy_fields:
            if field in df.columns:
                df[field] = df[field] / 3600000 

        # Add Temporal Features (as recommended by research papers)
        df['hour'] = df.index.hour
        df['is_weekend'] = df.index.dayofweek.isin([5, 6]).astype(int)
        
        return df

    def split(self, dataset, train_ratio, mode='multi'):
        """
        Prepares features and targets. 
        Calculates Total Primary Energy using config if mode is 'total'.
        """
        # Features based on your building sensors
        X_cols = ['T_out', 'DNI', 'Tin_Zone1', 'Tin_Zone2', 'Tin_Zone4', 'hour', 'is_weekend']
        X = dataset[X_cols]

        if mode == 'multi':
            # Target: Individual components [Electricity, Heating, Cooling]
            y = dataset[['Electricity', 'DistrictHeating', 'DistrictCooling']]
        else:
            # Target: Total Primary Energy using the coefficients
            dataset['Total_Primary_Energy'] = (
                (dataset['DistrictHeating'] / self.COP_H * self.PE_GAS) + 
                (dataset['DistrictCooling'] / self.EER_C * self.PE_ELEC) + 
                (dataset['Electricity'] * self.PE_ELEC)
            )
            y = dataset['Total_Primary_Energy']

        split_point = int(len(X) * train_ratio)
        self.X_train, self.y_train = X.iloc[:split_point], y.iloc[:split_point]
        self.X_test, self.y_test = X.iloc[split_point:], y.iloc[split_point:]

    def train(self, mode='multi'):
        """Trains the XGBoost model based on the selected mode."""
        model = xgb.XGBRegressor(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=5,
            subsample=0.8,
            colsample_bytree=0.8,
            objective='reg:squarederror'
        )
        
        if mode == 'multi':
            self.model = MultiOutputRegressor(model)
        else:
            self.model = xgb.XGBRegressor(model)
            
        self.model.fit(self.X_train, self.y_train)
        print(f"Model trained on InfluxDB data (Mode: {mode})")
    
    def test_and_evaluate(self, mode='multi'):
        """
        Evaluates the model.
        """
        if self.model is None:
            print("ERROR: Model is not trained.")
            return

        # Predict
        predictions = self.model.predict(self.X_test)

        print(f"\n--- Evaluation Results ({mode.upper()} mode) ---")
        
        if mode == 'multi':
            # In multi-mode, predictions and y_test have 3 columns: [Elec, Heat, Cool]
            targets = ['Electricity', 'DistrictHeating', 'DistrictCooling']
            for i, target_name in enumerate(targets):
                mse = mean_squared_error(self.y_test.iloc[:, i], predictions[:, i])
                r2 = r2_score(self.y_test.iloc[:, i], predictions[:, i])
                print(f"{target_name} -> MSE: {mse:.2f}, R²: {r2:.4f}")
        else:
            # Single-output for Total Primary Energy
            mse = mean_squared_error(self.y_test, predictions)
            r2 = r2_score(self.y_test, predictions)
            print(f"Total Primary Energy -> MSE: {mse:.2f}, R²: {r2:.4f}")

    def plot_feature_importance(self, mode='multi'):
        """
        Plots Feature Importance.
        """
        if mode == 'total':
            # Simple plot for single XGBoost regressor
            plt.figure(figsize=(10, 6))
            xgb.plot_importance(self.model, importance_type='gain')
            plt.title("Feature Importance: Total Primary Energy")
            plt.show()
        else:
            # For MultiOutput, plot importance for each internal estimator
            targets = ['Electricity', 'DistrictHeating', 'DistrictCooling']
            for i, target_name in enumerate(targets):
                plt.figure(figsize=(10, 6))
                # Access the individual XGBoost model for this specific target
                individual_model = self.model.estimators_[i]
                xgb.plot_importance(individual_model, importance_type='gain')
                plt.title(f"Feature Importance: {target_name}")
                plt.show()

In [None]:
# InfluxDB Connection Details
INFLUX_HOST = 'localhost'       # The host where InfluxDB is running
INFLUX_PORT = 8086              # The port
INFLUX_USER = 'admin'           # Username
INFLUX_PASS = 'admin123'        # Password
INFLUX_DB   = 'base_building' # Name of the BMS database

# Configuration
MEASUREMENT = "bms_data" # The measurement containing these fields
TIME_RANGE  = "365d" # The time period to analyze

# Initialize the Client
try:
    client = InfluxDBClient(host=INFLUX_HOST, port=INFLUX_PORT, 
                            username=INFLUX_USER, password=INFLUX_PASS, 
                            database=INFLUX_DB)
    print("Connected to InfluxDB v1.")
except Exception as e:
    print(f"Error connecting to InfluxDB v1: {e}")
    exit(1)

In [None]:
# MutiOutput
# Train the model
model = PredictionModel()
df_influx = model.load_from_influx(client, MEASUREMENT, TIME_RANGE)
model.split(df_influx, 0.8, mode='multi')
model.train(mode='multi')

# Evaluate the model
model.test_and_evaluate(mode='multi')
model.plot_feature_importance(mode='multi')

In [None]:
# Total primary energy
# Train the model
model = PredictionModel()
df_influx = model.load_from_influx(client, MEASUREMENT, TIME_RANGE)
model.split(df_influx, 0.8, mode='total')
model.train(mode='total')

# Evaluate the model
model.test_and_evaluate(mode='total')
model.plot_feature_importance(mode='total')