<a href="https://colab.research.google.com/github/ctrivino1/YLearn/blob/main/Feature_Importance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import matplotlib.pyplot as plt

# Load California housing dataset
california_housing = fetch_california_housing()
X = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
y = california_housing.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Fit XGBoost Regressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(X_train, y_train)

# Get feature importances
feature_importances = xgb.feature_importances_

# Visualize importances
sorted_idx = feature_importances.argsort()
plt.barh(X.columns[sorted_idx], feature_importances[sorted_idx])
plt.xlabel("XGBoost Feature Importance")
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def correlation_heatmap(data):
    correlations = data.corr()

    fig, ax = plt.subplots(figsize=(10, 10))
    sns.heatmap(correlations, vmax=1.0, center=0, fmt='.2f', cmap="YlGnBu",
                square=True, linewidths=.5, annot=True, cbar_kws={"shrink": .70}
                )
    plt.show()

# Assuming X_train is your training data
correlation_heatmap(X_train)


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

# Load California housing dataset
california_housing = fetch_california_housing()
X = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
y = california_housing.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Fit XGBoost Regressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(X_train, y_train)

# Perform permutation feature importance
perm_importance = permutation_importance(xgb, X_test, y_test, n_repeats=100, random_state=42)

# Visualize importances
sorted_idx = perm_importance.importances_mean.argsort()
plt.barh(X.columns[sorted_idx], perm_importance.importances_mean[sorted_idx])
plt.xlabel("Permutation Importance")
plt.show()


In [None]:
!pip install shap

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import shap

# Load California housing dataset
california_housing = fetch_california_housing()
X = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
y = california_housing.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Fit XGBoost Regressor
xgb = XGBRegressor(n_estimators=100)
xgb.fit(X_train, y_train)

# Create a SHAP explainer
explainer = shap.TreeExplainer(xgb)

# Calculate SHAP values
shap_values = explainer.shap_values(X_test)

# Summary plot of SHAP feature importances
shap.summary_plot(shap_values, X_test, plot_type="bar")
