<h2 style='background:blue; border:0; color:white'><center>Dtreeviz is Python library which facilitates decision tree visualization and model interpretation, pivotal for understanding gradient boosting machines and Random Forests. It aids in comprehending model operations and interpreting results, crucial for learning and explaining machine learning models based on decision trees</center></h2>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Install All Important libraries<center><h1>

In [None]:
!pip install dtreeviz
%config InlineBackend.figure_format = 'retina'
%config InlineBackend.figure_format = 'svg' 
%matplotlib inline

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Import Packages<center><h1>

In [None]:
import graphviz.backend as be
from sklearn.datasets import *
from dtreeviz.trees import *
from IPython.display import Image, display_svg, SVG
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

In [None]:
dataset = pd.read_csv('/kaggle/input/iris/Iris.csv')
dataset.head()

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Exploring the data a bit<center><h1>

In [None]:
dataset['Species'].nunique()

In [None]:
dataset['Species'].unique()

In [None]:
null_columns = dataset.isnull().any()
null_columns

In [None]:
dataset["Species"] = dataset.Species.astype("category").cat.codes
dataset.head()

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center> Visualization of Classification tasks<center><h1>

In [None]:
features = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]
target = "Species"
random_state = 1234
tree_classifier = DecisionTreeClassifier(max_depth=3, random_state=random_state)
tree_classifier.fit(dataset[features].values, dataset[target].values)

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center> Initialize dtreeviz model where we need to provide the model X, Y, feature names, target name, and target class names<center><h1>

In [None]:
import dtreeviz
viz_model = dtreeviz.model(tree_classifier,
                           X_train=dataset[features], y_train=dataset[target],
                           feature_names=features,
                           target_name=target, class_names=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])

# Default visualization of the decision tree

In [None]:
viz_model.view(scale=2)

# Horizontal Decision Tree by changing the orientation to left-to-right

In [None]:
viz_model.view(orientation="LR",scale=1.2)

# In this case the decision tree is small but this wont happen in other PS/datasets too so we can reduce the amount of detail by turning off the fancy view

In [None]:
viz_model.view(fancy=False,scale=2)

# We can even explore the tree depth/root wise using depth_range_to_display(x,y) parameter where x is the node or any leaf from where we want to start exploring and y is the depth

In [None]:
viz_model.view(depth_range_to_display=(1, 2)) 

In [None]:
viz_model.view(depth_range_to_display=(1, 3))

# For few tasks, we often want to understand how the tree behaves for a specific row or we want to look at the path for that specific row for that let's pick a specific row and explore

In [None]:
#This operation creates a new DataFrame consisting only of the columns specified in the features list with 10th row values
x = dataset[features].iloc[10]
x

In [None]:
viz_model.view(x=x)

In [None]:
viz_model.view(x=x, show_just_path=True)

# We can also get a string representation explaining all the comparisons made on that row

In [None]:
print(viz_model.explain_prediction_path(x))

# Now we can even explore each feature importance for that specific row

In [None]:
viz_model.instance_feature_importance(x, figsize=(3.5,2))

In [None]:
c = dataset[features].iloc[110]
c

In [None]:
viz_model.instance_feature_importance(c, figsize=(3.5,2))

In [None]:
viz_model.view(x=c, show_just_path=True)

# we also got few functions to get information about the leaves of the tree

In [None]:
viz_model.leaf_sizes(figsize=(3.5,2))

In [None]:
viz_model.ctree_leaf_distributions(figsize=(3.5,2))

In [None]:
viz_model.leaf_purity(figsize=(3.5,2))

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Similarly we got same functions for Regression task too<center><h1>

In [None]:
features_reg = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "Species"]
target_reg = "PetalWidthCm"
tree_regressor = DecisionTreeRegressor(max_depth=3, random_state=random_state, criterion="absolute_error")
tree_regressor.fit(dataset[features_reg].values, dataset[target_reg].values)

In [None]:
viz_rmodel = dtreeviz.model(model=tree_regressor, 
                            X_train=dataset[features_reg], 
                            y_train=dataset[target_reg], 
                            feature_names=features_reg, 
                            target_name=target_reg)

In [None]:
viz_rmodel.view()

In [None]:
viz_rmodel.view(orientation="LR")

In [None]:
viz_rmodel.view(fancy=False)

In [None]:
viz_rmodel.view(depth_range_to_display=(0, 1))

In [None]:
viz_rmodel.view(depth_range_to_display=(0, 2))

In [None]:
viz_rmodel.view(depth_range_to_display=(0, 3))

In [None]:
x = dataset[features_reg].iloc[10]
x

In [None]:
viz_rmodel.view(x = x)

In [None]:
viz_rmodel.view(show_just_path=True, x = x)

In [None]:
print(viz_rmodel.explain_prediction_path(x))

In [None]:
viz_rmodel.instance_feature_importance(x, figsize=(3.5,2))

In [None]:
c = dataset[features_reg].iloc[101]
c

In [None]:
viz_rmodel.instance_feature_importance(c, figsize=(3.5,2))

In [None]:
viz_rmodel.view(x = c)

In [None]:
viz_rmodel.leaf_sizes(figsize=(3.5,2))

In [None]:
viz_rmodel.rtree_leaf_distributions()

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Feature Space Partitioning<center><h1>

# Decision trees partition the feature space by splitting it into regions that maximize the purity of target values within each node. Visualizing this partitioning helps understand how the model makes decisions, but it becomes challenging with more than a few dimensions due to the complexity of representing higher-dimensional spaces visually

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Classification<center><h1>

In [None]:
features = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]
target = "Species"
random_state = 1234
dtc_iris = DecisionTreeClassifier(max_depth=3, random_state=random_state)
dtc_iris.fit(dataset[features].values, dataset[target].values)

In [None]:
import dtreeviz
viz_model = dtreeviz.model(dtc_iris,
                           X_train=dataset[features], y_train=dataset[target],
                           feature_names=features,
                           target_name=target, class_names=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])

# The following diagram indicates that the decision tree splits the petal width feature into three mostly-pure regions

In [None]:
viz_model.ctree_feature_space(show={'splits','title'}, features=['PetalWidthCm'],
                             figsize=(5,1))

In [None]:
viz_model.ctree_feature_space(nbins=40, gtype='barstacked', show={'splits','title'}, features=['PetalWidthCm'],
                             figsize=(5,1.5))

# Now Let's look at how a decision tree partitions two-dimensional feature space

In [None]:
viz_model.ctree_feature_space(show={'splits','title'}, features=['PetalWidthCm', 'PetalLengthCm'])

<a id="1"></a>
<h1 style='background:brown; border:0; color:white'><center>Regression<center><h1>

In [None]:
features_reg = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "Species"]
target_reg = "PetalWidthCm"
dtr_cars = DecisionTreeRegressor(max_depth=3, random_state=random_state, criterion="absolute_error")
dtr_cars.fit(dataset[features_reg].values, dataset[target_reg].values)


In [None]:
viz_rmodel = dtreeviz.model(model=dtr_cars, 
                            X_train=dataset[features_reg], 
                            y_train=dataset[target_reg], 
                            feature_names=features_reg, 
                            target_name=target_reg)

# The following visualization illustrates how the decision tree breaks up the SepalWidthCm in order to get relatively pure PetalWidthCm target values.

In [None]:
viz_rmodel.rtree_feature_space(features=['SepalWidthCm'])

# In order to visualize two-dimensional feature space, we can draw in three dimensions

In [None]:
viz_rmodel.rtree_feature_space3D(features=['SepalWidthCm','SepalLengthCm'],
                                 fontsize=10,
                                 elev=30, azim=20,
                                 show={'splits', 'title'},
                                 colors={'tessellation_alpha': .5})

# Equivalently, we can show a heat map as if we were looking at the three-dimensional plot from the top down

In [None]:
viz_rmodel.rtree_feature_space3D(features=['SepalWidthCm','PetalLengthCm'],
                                 fontsize=10,
                                 elev=30, azim=20,
                                 show={'splits', 'title'},
                                 colors={'tessellation_alpha': .5})

In [None]:
viz_rmodel.rtree_feature_space(features=['PetalLengthCm','SepalWidthCm'])