# Energy Consumption Prediction Model

Train a machine learning model to predict energy consumption (kWh) based on building parameters.

**Model**: Random Forest Regressor
**Features**: 5 (Temperature, Humidity, Time of Day, Building Load, Usage Factor)
**Target**: Energy Consumption in kWh (0-500)

## 1. Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import os

# Display settings
plt.style.use('seaborn-v0_8-darkgrid')
np.random.seed(42)

print('âœ“ Libraries imported successfully')

## 2. Create Synthetic Energy Consumption Dataset

In [None]:
# Generate synthetic energy consumption data
n_samples = 1000

# Create features
data = {
    'temperature': np.random.uniform(5, 35, n_samples),      # Â°C
    'humidity': np.random.uniform(20, 80, n_samples),         # %
    'time_of_day': np.random.randint(0, 24, n_samples),       # hours (0-23)
    'building_load': np.random.uniform(50, 200, n_samples),  # kW
    'usage_factor': np.random.uniform(0.3, 1.0, n_samples),  # 0-1
}

# Calculate energy consumption based on features
# High temp -> More AC (more consumption)
# Low temp -> More heating (more consumption)
# Peak hours (9-17) -> More consumption
temp_effect = np.abs(data['temperature'] - 22) * 2
time_effect = np.array([15 if 9 <= t <= 17 else 5 for t in data['time_of_day']])
base_consumption = (data['building_load'] * data['usage_factor'] + 
                   temp_effect + 
                   time_effect * data['usage_factor'])

# Add some random noise
consumption = base_consumption + np.random.normal(0, 10, n_samples)

data['consumption_kwh'] = np.maximum(consumption, 0)

# Create DataFrame
df = pd.DataFrame(data)

print(f'Dataset shape: {df.shape}')
print(f'\nFirst 5 rows:')
print(df.head())
print(f'\nDataset statistics:')
print(df.describe())

## 3. Data Preparation

In [None]:
# Check for missing values
print('Missing values:')
print(df.isnull().sum())

# Separate features and target
X = df.drop('consumption_kwh', axis=1)  # Features
y = df['consumption_kwh']  # Target

# Split into train and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'\nTraining set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')
print(f'\nFeatures: {X_train.columns.tolist()}')
print(f'\nTarget range: {y.min():.2f} - {y.max():.2f} kWh')

## 4. Train Random Forest Model

In [None]:
# Create and train Random Forest Regressor
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=12,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

print('Training model...')
model.fit(X_train, y_train)
print('âœ“ Model training completed')

## 5. Evaluate Model Performance

In [None]:
# Make predictions
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Calculate metrics
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
train_mae = mean_absolute_error(y_train, y_pred_train)
test_mae = mean_absolute_error(y_test, y_pred_test)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print('='*50)
print('MODEL EVALUATION METRICS')
print('='*50)
print(f'\nTraining Set:')
print(f'  RMSE: {train_rmse:.4f} kWh')
print(f'  MAE:  {train_mae:.4f} kWh')
print(f'  RÂ²:   {train_r2:.4f}')
print(f'\nTest Set:')
print(f'  RMSE: {test_rmse:.4f} kWh')
print(f'  MAE:  {test_mae:.4f} kWh')
print(f'  RÂ²:   {test_r2:.4f}')
print('='*50)

## 6. Feature Importance Analysis

In [None]:
# Get feature importance
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

print('\nFeature Importance:')
print(feature_importance)

# Plot feature importance
plt.figure(figsize=(10, 6))
colors = plt.cm.viridis(np.linspace(0, 1, len(feature_importance)))
plt.barh(feature_importance['Feature'], feature_importance['Importance'], color=colors)
plt.xlabel('Importance Score')
plt.ylabel('Feature')
plt.title('Feature Importance - Energy Consumption Model')
plt.tight_layout()
plt.show()

## 7. Save Trained Model

In [None]:
# Create models directory if it doesn't exist
os.makedirs('../models', exist_ok=True)

# Save model
model_path = '../models/energy_model.pkl'
joblib.dump(model, model_path)

# Save feature names for later use
features_path = '../models/energy_features.pkl'
joblib.dump(X_train.columns.tolist(), features_path)

print(f'âœ“ Model saved to {model_path}')
print(f'âœ“ Features saved to {features_path}')
print(f'\nModel file size: {os.path.getsize(model_path) / 1024:.2f} KB')

## 8. Test Predictions with Sample Data

In [None]:
# Create sample input data for testing
sample_data = pd.DataFrame({
    'temperature': [25],       # 25Â°C
    'humidity': [50],          # 50%
    'time_of_day': [14],       # 2 PM
    'building_load': [150],    # 150 kW
    'usage_factor': [0.8],     # 80% utilization
})

# Make prediction
prediction = model.predict(sample_data)[0]

print('Sample Input Data:')
print(sample_data)
print(f'\nPredicted Energy Consumption: {prediction:.2f} kWh')

# Interpret consumption level
if prediction < 100:
    level = 'ðŸŸ¢ Low (Excellent Efficiency)'
elif prediction < 200:
    level = 'ðŸŸ¡ Moderate (Good Efficiency)'
elif prediction < 300:
    level = 'ðŸŸ  High (Average)'
else:
    level = 'ðŸ”´ Very High (Needs Optimization)'

print(f'Consumption Level: {level}')
print(f'\nEstimated Monthly Cost (â‚¹20/kWh): â‚¹{prediction * 30 * 20:,.2f}')