In [4]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
wine = fetch_ucirepo(id=109) 
  
# data (as pandas dataframes) 
X = wine.data.features 
y = wine.data.targets 
  
# metadata 
print(wine.metadata) 
  
# variable information 
print(wine.variables) 

{'uci_id': 109, 'name': 'Wine', 'repository_url': 'https://archive.ics.uci.edu/dataset/109/wine', 'data_url': 'https://archive.ics.uci.edu/static/public/109/data.csv', 'abstract': 'Using chemical analysis to determine the origin of wines', 'area': 'Physics and Chemistry', 'tasks': ['Classification'], 'characteristics': ['Tabular'], 'num_instances': 178, 'num_features': 13, 'feature_types': ['Integer', 'Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1992, 'last_updated': 'Mon Aug 28 2023', 'dataset_doi': '10.24432/C5PC7J', 'creators': ['Stefan Aeberhard', 'M. Forina'], 'intro_paper': {'title': 'Comparative analysis of statistical pattern recognition methods in high dimensional settings', 'authors': 'S. Aeberhard, D. Coomans, O. Vel', 'published_in': 'Pattern Recognition', 'year': 1994, 'url': 'https://www.semanticscholar.org/paper/83dc3e4030d7b9fbdbb4bde03ce12ab70ca10528', 'do

In [8]:
# Algorithm 1 DTEC algorithm
# Require: Dataset X, number of clusters K (not obligatory).
# Ensure: An unsupervised evidential decision tree T.
# Initialize the root node of decision tree T using dataset X;
# while there is unevaluated node of single cluster do
# Evaluate all possible cutting points at the taken node by the evidential silhouette metric using Eqs. (4)-(7);
# Select the cutting point with the largest average silhouette value;
# if the average silhouette value after splitting is larger than before then
# Split this node of single cluster using Eqs. (8)-(10);
# Determine the boundaries of the generated child nodes;
# Use these boundaries to split the node of meta-cluster which includes the above single cluster;
# else
# Go to next node;
# end if
# end while
# while K is available and the number of generated clusters is not equal to K do
# if the number of generated clusters is larger than K then
# Evaluate the quality of each single cluster by the evidential silhouette metric using Eq. (11);
# Merge the cluster having lowest quality with its nearest cluster;
# else
# Continue splitting at the leaf node that has the largest average evidential silhouette value after splitting.
# end if
# end while

In [11]:
# Initialize the root node of decision tree T using dataset X;
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import itertools



In [None]:
# Initializing root node of decision tree T using dataset X

class DecisionNode:
    """Class to represent a decision node in a decision tree."""
    
    def __init__(self, left, right, decision_function, class_label=None):
        """Create a node with a left child, right child, decision function and optional class label.
        This is a binary tree so each node has two children (left and right). 
        The decision function is used to make a decision when the node is asked to classify an instance.
        
        Args:
            left (DecisionNode) : left child node
            right (DecisionNode) : right child node
            decision_function (function) : function to make decision
            class_label (int) : optional class label for the node
        """
        self.left = left
        self.right = right
        self.decision_function = decision_function
        self.class_label = class_label
        
    def decide(self, feature):
        """Classify an instance based on its feature vector using the decision function."""
        if self.class_label is not None:
            return self.class_label
        elif self.decision_function(feature):
            return self.left.decide(feature)
        else:
            return self.right.decide(feature)
        

# Pignistic probability BetP(A) = summation(|A ∩ B|/|B|) . m(B) 
# where A is a subset of B, and m(B) is the mass function of B.
# The pignistic probability is a measure of the belief in the proposition A given the evidence B.
class DecisonTree:
    """Class to represent a decision tree model for classification."""
    
    Dataset = 
    
    def __init__(self, max_depth=None):
        """Create a decision tree model.
        
        Args:
            max_depth (int) : maximum depth of the tree
        """
        self.max_depth = max_depth
        
    def fit(self, X, y):
        """Build the decision tree model by fitting to the data.
        
        Args:
            X (array-like) : feature vectors
            y (array-like) : class labels
        """
        self.root = self._build_tree(X, y, depth=0)
        
    def _build_tree(self, X, y, depth):
        """Recursively build the decision tree model.
        
        Args:
            X (array-like) : feature vectors
            y (array-like) : class labels
            depth (int) : current depth of the tree
        """
        if self.max_depth is not None and depth >= self.max_depth:
            return DecisionNode(None, None, None, class_label=self._majority_class(y))
        
    
    def pignistic_probability(self, A, B, m):
        """Calculate the pignistic probability of A given B.
        
        Args:
            A (array-like) : subset of B
            B (array-like) : evidence
            m (array-like) : mass function of B
        """
        return sum(len(set(A) & set(B)) / len(B) * m[B] for B in B)
    
    def  lenSet(self, feature, l, r):
        # return length of dataset that has the feature value in the range [l, r]
        return len([x for x in feature if l <= x <= r])