#### This notebook focuses on creating an interactive dashboard to visualize model results and data.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import joblib

In [2]:
# Load the feature-engineered dataset
feature_engineered_data_path = "../data/processed/feature_engineered_energy_data.csv"
df = pd.read_csv(feature_engineered_data_path)

In [3]:
# Load the best saved model
model_save_path = "../data/models/trained_models.pkl"
best_model = joblib.load(model_save_path)

In [4]:
# Model predictions
df['PredictedTotalRenewableEnergy'] = best_model.predict(df.drop(columns=['TotalRenewableEnergy', 'Country']))

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [5]:
# Interactive visualization of predictions vs actual values
fig = px.scatter(df, x='TotalRenewableEnergy', y='PredictedTotalRenewableEnergy', 
                 title='Predictions vs Actual Values',
                 labels={'TotalRenewableEnergy': 'Actual Values', 
                         'PredictedTotalRenewableEnergy': 'Predictions'},
                 trendline='ols')
fig.show()

In [6]:
# Residual analysis visualization
residuals = df['TotalRenewableEnergy'] - df['PredictedTotalRenewableEnergy']
fig = px.scatter(x=df['PredictedTotalRenewableEnergy'], y=residuals, 
                 title='Residual Analysis',
                 labels={'x': 'Predicted Values', 'y': 'Residuals'})
fig.add_hline(y=0, line_dash='dash', line_color='red')
fig.show()

In [7]:
# Distribution of residuals
fig = px.histogram(residuals, nbins=30, 
                   title='Residual Distribution',
                   labels={'value': 'Residuals', 'count': 'Frequency'})
fig.show()

In [8]:
# Feature importance (if the model is XGBoost or LightGBM)
if hasattr(best_model, 'feature_importances_'):
    feature_importances = best_model.feature_importances_
    feature_names = df.drop(columns=['TotalRenewableEnergy', 'Country', 'PredictedTotalRenewableEnergy']).columns
    
    importance_df = pd.DataFrame({
        'Feature': feature_names,
        'Importance': feature_importances
    }).sort_values(by='Importance', ascending=False)
    
    fig = px.bar(importance_df, x='Importance', y='Feature', 
                 title='Feature Importance',
                 labels={'Importance': 'Importance', 'Feature': 'Feature'})
    fig.show()