In [30]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from ucimlrepo import fetch_ucirepo

In [8]:
# Algorithm 1 DTEC algorithm
# Require: Dataset X, number of clusters K (not obligatory).
# Ensure: An unsupervised evidential decision tree T.
# Initialize the root node of decision tree T using dataset X;
# while there is unevaluated node of single cluster do
# Evaluate all possible cutting points at the taken node by the evidential silhouette metric using Eqs. (4)-(7);
# Select the cutting point with the largest average silhouette value;
# if the average silhouette value after splitting is larger than before then
# Split this node of single cluster using Eqs. (8)-(10);
# Determine the boundaries of the generated child nodes;
# Use these boundaries to split the node of meta-cluster which includes the above single cluster;
# else
# Go to next node;
# end if
# end while
# while K is available and the number of generated clusters is not equal to K do
# if the number of generated clusters is larger than K then
# Evaluate the quality of each single cluster by the evidential silhouette metric using Eq. (11);
# Merge the cluster having lowest quality with its nearest cluster;
# else
# Continue splitting at the leaf node that has the largest average evidential silhouette value after splitting.
# end if
# end while

In [31]:

class DecisionNode:
    """Class to represent a decision node in a decision tree."""
    
    def __init__(self, left, right, mass_functions, decision_function, instance_indices, class_label=None):
        """Create a node with a left child, right child, decision function and optional class label.
        This is a binary tree so each node has two children (left and right). 
        The decision function is used to make a decision when the node is asked to classify an instance.
        
        Args:
            left (DecisionNode) : left child node
            right (DecisionNode) : right child node
            decision_function (function) : function to make decision
            class_label (int) : optional class label for the node
        """
        self.left = left
        self.right = right
        self.mass_functions = mass_functions
        self.instance_indices = instance_indices
        self.decision_function = decision_function
        self.class_label = class_label
        
    def decide(self, feature):
        """Classify an instance based on its feature vector using the decision function."""
        if self.class_label is not None:
            return self.class_label
        elif self.decision_function(feature):
            return self.left.decide(feature)
        else:
            return self.right.decide(feature)
        

# Pignistic probability BetP(A) = summation(|A âˆ© B|/|B|) . m(B) 
# where A is a subset of B, and m(B) is the mass function of B.
# The pignistic probability is a measure of the belief in the proposition A given the evidence B.
class DecisonTree:
    """Class to represent a decision tree model for classification."""
    
    def __init__(self, max_depth=None):
        """Create a decision tree model.
        
        Args:
            max_depth (int) : maximum depth of the tree
        """
        self.max_depth = max_depth
        self.Dataset = fetch_ucirepo(id=109)
        self.X = self.Dataset.data.features
        #convert X to a numpy array
        self.X = np.array(self.X)
        self.y = self.Dataset.data.targets
        #convert y to a numpy array
        self.y = np.array(self.y)
        self.mass_functions = {}
        self.metadata = self.Dataset.metadata
        self.variables = self.Dataset.variables
        self.root = None  
        
    def fit(self, X, y):
        """Build the decision tree model by fitting to the data.
        
        Args:
            X (array-like) : feature vectors
            y (array-like) : class labels
        """
        self.root = self._build_tree(X, y, depth=0)
        
    def _build_tree(self, X, y, depth):
        """Recursively build the decision tree model.
        
        Args:
            X (array-like) : feature vectors
            y (array-like) : class labels
            depth (int) : current depth of the tree
        """
        if self.max_depth is not None and depth >= self.max_depth:
            return DecisionNode(None, None, None, class_label=self._majority_class(y))
    
        
    def feature_distance(self, xi, xj):
        """Calculate the distance between two feature vectors."""
        return np.sum((xi - xj) ** 2)
    
    def pignistic_probability_unit(self, A, B):
        """Calculate the pignistic probability of A given B.
        
        Args:
            A (array-like) : subset of B
            B (array-like) : evidence
            m (array-like) : mass function of B
        """
        return len(set(A) & set(B)) / len(B)
    
    def cutting_points(self, feature):
        """Find all possible cutting points for a feature."""
        return np.unique(self.X[:, feature])
    
    def cut_feature(self, instance_indices, feature, cutting_point):
        """Split the dataset based on a feature and cutting point and return the indices of the points."""
        L = np.where(self.X[:, feature] < cutting_point)[0]
        R = np.where(self.X[:, feature] > cutting_point)[0]
        L = np.intersect1d(L, instance_indices)
        R = np.intersect1d(R, instance_indices)
        return L, R
    
    def calculate_centers(self, l, r, parent_mass):
        """Calculate the centers of the child nodes."""
        c_l = np.sum([self.X[i] * parent_mass[i] for i in l], axis=0) / np.sum([parent_mass[i] for i in l])
        c_r = np.sum([self.X[i] * parent_mass[i] for i in r], axis=0) / np.sum([parent_mass[i] for i in r])
        return c_l, c_r
    
    def calculate_mass_functions(self, instance_indices, cutting_point, c_l, c_r, gamma):
        """Calculate the mass function for the child nodes."""
        d_l = np.array([self.feature_distance(self.X[i], c_l) for i in instance_indices]) / self.feature_distance(c_l, cutting_point)
        d_r = np.array([self.feature_distance(self.X[i], c_r) for i in instance_indices]) / self.feature_distance(c_r, cutting_point)
        d_m = np.array([self.feature_distance(self.X[i], cutting_point) for i in instance_indices]) / (self.feature_distance(c_l, c_r) / gamma)
        
        m_l = d_l / (d_l + d_r + d_m)
        m_r = d_r / (d_l + d_r + d_m)
        m_m = d_m / (d_l + d_r + d_m)
        
        return m_l, m_r, m_m
    
    def assign_clusters(self, instance_indices, m_l, m_r, m_m):
        """Assign instances to clusters based on the mass functions."""
        all_mass_functions = np.array([m_l, m_r, m_m])
        cluster_assignments = np.argmax(all_mass_functions, axis=0)
        clusters = [instance_indices[cluster_assignments == i] for i in range(3)]
        l = clusters[0]
        r = clusters[1]
        m = clusters[2]
        return l, r, m
    
    def calculate_silhouette(self, l, r, m, m_l, m_r, m_m):
        pignistic_probability_l = self.pignistic_probability_unit(l, l)
        pignistic_probability_r = self.pignistic_probability_unit(r, r)
        pignistic_probability_m = self.pignistic_probability_unit(m, m)
        pignistic_probability_l_r = self.pignistic_probability_unit(l, r)
        pignistic_probability_l_m = self.pignistic_probability_unit(l, m)
        pignistic_probability_r_m = self.pignistic_probability_unit(r, m)
                
        a_l = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * pignistic_probability_l * m_l[j] for j in l]) for i in l) / (np.sum([pignistic_probability_l * m_l[j] for j in l]))])
        a_r = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * pignistic_probability_r * m_r[j] for j in r]) for i in r) / (np.sum([pignistic_probability_r * m_r[j] for j in r]))])
        a_m = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * pignistic_probability_m * m_m[j] for j in m]) for i in m) / (np.sum([pignistic_probability_m * m_m[j] for j in m]))])
        
        b_l = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * (pignistic_probability_l_r * m_r[j] + pignistic_probability_l_m * m_m[j]) for j in l]) for i in l) / (np.sum([pignistic_probability_l_r * m_r[j] + pignistic_probability_l_m * m_m[j] for j in l]))])
        b_r = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * (pignistic_probability_l_r * m_l[j] + pignistic_probability_r_m * m_m[j]) for j in r]) for i in r) / (np.sum([pignistic_probability_l_r * m_l[j] + pignistic_probability_r_m * m_m[j] for j in r]))])
        b_m = np.array([(np.sum([self.feature_distance(self.X[i], self.X[j]) * (pignistic_probability_l_m * m_l[j] + pignistic_probability_r_m * m_r[j]) for j in m]) for i in m) / (np.sum([pignistic_probability_l_m * m_l[j] + pignistic_probability_r_m * m_r[j] for j in m]))])
        
        es_l = (b_l - a_l) / np.maximum(a_l, b_l)
        es_r = (b_r - a_r) / np.maximum(a_r, b_r)
        es_m = (b_m - a_m) / np.maximum(a_m, b_m)
        
        combined_es = np.concatenate((es_l, es_r, es_m))
        
        
        average_silhouette = np.sum(max((pignistic_probability_l * m_l[i] + pignistic_probability_l_m * m_m[i] + pignistic_probability_l_r * m_r[i]), (pignistic_probability_r * m_r[i] + pignistic_probability_r_m * m_m[i] + pignistic_probability_l_r * m_l[i]), (pignistic_probability_m * m_m[i] + pignistic_probability_l_m * m_l[i] + pignistic_probability_r_m * m_r[i])) * combined_es[i] for i in range(len(combined_es))) / np.sum(max((pignistic_probability_l * m_l[i] + pignistic_probability_l_m * m_m[i] + pignistic_probability_l_r * m_r[i]), (pignistic_probability_r * m_r[i] + pignistic_probability_r_m * m_m[i] + pignistic_probability_l_r * m_l[i]), (pignistic_probability_m * m_m[i] + pignistic_probability_l_m * m_l[i] + pignistic_probability_r_m * m_r[i])) for i in range(len(combined_es)))
        
        return average_silhouette

        

In [22]:
Dataset = fetch_ucirepo(id=109)
X = Dataset.data.features
#convert X to a numpy array
X = np.array(X)
print(X)
print(X.shape)

[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
(178, 13)


In [23]:
print(np.unique(X[:, 0]))

[11.03 11.41 11.45 11.46 11.56 11.61 11.62 11.64 11.65 11.66 11.76 11.79
 11.81 11.82 11.84 11.87 11.96 12.   12.04 12.07 12.08 12.16 12.17 12.2
 12.21 12.22 12.25 12.29 12.33 12.34 12.36 12.37 12.42 12.43 12.45 12.47
 12.51 12.52 12.53 12.58 12.6  12.64 12.67 12.69 12.7  12.72 12.77 12.79
 12.81 12.82 12.84 12.85 12.86 12.87 12.88 12.93 12.96 12.99 13.03 13.05
 13.07 13.08 13.11 13.16 13.17 13.2  13.23 13.24 13.27 13.28 13.29 13.3
 13.32 13.34 13.36 13.39 13.4  13.41 13.45 13.48 13.49 13.5  13.51 13.52
 13.56 13.58 13.62 13.63 13.64 13.67 13.68 13.69 13.71 13.72 13.73 13.74
 13.75 13.76 13.77 13.78 13.82 13.83 13.84 13.86 13.87 13.88 13.9  13.94
 14.02 14.06 14.1  14.12 14.13 14.16 14.19 14.2  14.21 14.22 14.23 14.3
 14.34 14.37 14.38 14.39 14.75 14.83]


In [27]:
print(np.where(X[:, 0] < 15.67)[0])
print(X[0])

[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177]
[1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00
 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]
