In [1]:
import pandas as pd
import numpy as np 
import time
from collections import Counter
import re
import math
from gensim.models import KeyedVectors
from pprint import pprint


MAX_LEAFS = 256



class Tree(object):
    
    def __init__(self, tree_hierarchy, clusters_predict=[], mode="sklearn", sentences_all_classes=None, true_classes_all=None):
        """
        @param mode : Two possible values
            - "FBE" : Tree object for Feedback Explorer output
            - "sklearn" : Tree object for scikit learn output
        
        @param sentences_all_classes : List of all possible classes occuring in the sentences file (only for mode FBE)
        @param true_labels_all : All occuring true labels (mesh codes) of the documents/abstracts
        """
        self.tree = None
        if mode in ["sklearn", "FBE"]:
            self.mode = mode
        else:
            raise ValueError("Provided mode '{}' is not supported".format(mode))
        self.tree_hierarchy = tree_hierarchy # pandas dataframe with tree structure coming from hierarchical clustering
        self.n_nodes = 0 # updated by calling self.count_nodes()
        self.n_leafs = 0 # updated by calling self.count_leafs()
        self.temp_n_leafs = 1 # In mode 'FBE' helps to construct the tree with the right number of nodes
        self.clusters_predict = clusters_predict # predicted cluster for each document
        self.unique_cluster_predict = list(set(clusters_predict)) # list of all classes to calculate performance metrices
        self.leaf_nodes = [] # list of all leaf nodes
        self.sentences_all_classes = sentences_all_classes # List of all classes occuring in sentences (phrases.parquet)
        self.true_classes_documents = true_classes_all.values.tolist() # list of true labels (mesh codes) in the abstracts
        self.true_classes_documents_unique = list(set(true_classes_all)) # all possible occuring true labels (mesh codes) in the abstracts
        self.precision_all_nodes = [] # macro
        self.precision_all_nodes_weighted = []
        self.precision_all_nodes_weights = 0
        self.precision_macro = None 
        self.precision_micro = None
        self.recall_all_classes = []
        self.recall_all_classes_weighted = []
        self.recall_macro = None
        self.recall_micro = None
        self.F1_macro = None
        self.F1_micro = None
        self.maxDepth = 0
        self.temp_max_occ_class_in_cluster = 0
        self.temp_max_doc_perClass_inCluster = 0
        
        
    def _build_tree(self, node, current_depth=None):
        if self.mode == "sklearn":
            if node.node_id in self.tree_hierarchy["node_id"].values: # if node not leaf
                treeChildren = self.tree_hierarchy[self.tree_hierarchy["node_id"] == node.node_id]
                node.add_child(Node(Id=treeChildren["left"].values[0], depth=node.depth + 1, parent=node))
                node.add_child(Node(Id=treeChildren["right"].values[0], depth=node.depth + 1, parent=node))
                self._build_tree(node.children[0])
                self._build_tree(node.children[1])
            else:
                return node
            return node
            #print(node)
#            #print("\tcurrent_depth: {}; temp_nleafs: {}".format(current_depth, self.temp_n_leafs))
#            if node.depth == current_depth and self.temp_n_leafs < MAX_LEAFS:
#                if node.node_id in self.tree_hierarchy["node_id"].values: # if node not leaf
#                    treeChildren = self.tree_hierarchy[self.tree_hierarchy["node_id"] == node.node_id]
#                    self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
#                    node.add_child(Node(Id=treeChildren["left"].values[0], depth=node.depth + 1, parent=node))
#                    self.temp_n_leafs += 1 # creates new child
#                    node.add_child(Node(Id=treeChildren["right"].values[0], depth=node.depth + 1, parent=node))
#                    self.temp_n_leafs += 1 # creates new child
#            else:
#                if len(node.children) == 2 and self.temp_n_leafs < MAX_LEAFS:
#                    self._build_tree(node.children[0], current_depth)
#                    self._build_tree(node.children[1], current_depth)
#            return node
        elif self.mode == "FBE": 
            # Only create node if node is in current depth level
            if node.depth == current_depth and self.temp_n_leafs < MAX_LEAFS:
                treeChildren = self.tree_hierarchy.iloc[node.node_id].children
                #print("\t{}".format(node))
                #print("tree children:")
                #print(treeChildren)
                #print()
                # FBE tree is not a perfect binary tree, some nodes don't create children any more
                if len(treeChildren) > 0:
                    cluster_child_one = self.tree_hierarchy.iloc[treeChildren[0]].filterValue[0]
                    cluster_child_two = self.tree_hierarchy.iloc[treeChildren[1]].filterValue[0]
                #    print("c1: {}, c2: {}".format(cluster_child_one, cluster_child_two))
                    # Some nodes from nodes.json are empty: no sentences is going through them
                    # Only create node in tree when there is a sentence running through it
                    if cluster_child_one in self.sentences_all_classes:
                #        print("\c1 in class")
                        self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
                        node.add_child(Node(Id=treeChildren[0], depth=node.depth + 1, parent=node, cluster_label=cluster_child_one))
                        self.temp_n_leafs += 1
                        if cluster_child_two in sentences_all_classes:
                #            print("\tc1 and c2 in class")
                            node.add_child(Node(Id=treeChildren[1], depth=node.depth + 1, parent=node, cluster_label=cluster_child_two))
                            self.temp_n_leafs += 1
                    elif cluster_child_two in sentences_all_classes:
                #        print("\tc2 in class")
                        self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
                        node.add_child(Node(Id=treeChildren[1], depth=node.depth + 1, parent=node, cluster_label=cluster_child_two))
                        self.temp_n_leafs += 1          
                #    else:
                #        print("\tno class for c1 and c2")
            else:
                if len(node.children) == 1 and self.temp_n_leafs < MAX_LEAFS:
                #    print("one child deeper: {}".format(node.children[0]))
                    self._build_tree(node.children[0], current_depth)
                elif len(node.children) == 2 and self.temp_n_leafs < MAX_LEAFS:
                #    print("two childs deeper:")
                #    print(node.children[0])
                #    print(node.children[1])
                    self._build_tree(node.children[0], current_depth)
                    self._build_tree(node.children[1], current_depth)
                #else:
                #    print("no deeper")
            return node

    def _update_leaf_to_root(self, node, abstract_id, class_predict):
        """ Updates node and all its ancestors up to the root with the abstract's id and the predicted class"""
        node.update_node(abstract_id, class_predict)
        if node.parent != None: # Root has no parent
            self._update_leaf_to_root(node.parent, abstract_id, class_predict)
    

    def set_build_tree(self,node):
        """ Builds the tree and sets the variable tree."""  

        # tree with MAX_LEAFS leafs is constructed. 
        # For sklearn add to each leaf its cluster label based on the children in the tree object from sklearn AgglomerativeClustering
        self.leaf_nodes = []
        if self.mode == "sklearn":
            tree = self._build_tree(node) # construct whole tree
            tree = self._get_cluster_labels_for_leafs(tree) # get labels for leafs
            tree = self._cut_nodes_from_leafs(tree) # cut nodes from bottom of the tree until only leafs with a unique cluster_label exist (Number leaves = MAX_LEAFS)
        elif self.mode == "FBE":
            self.temp_n_leafs = 1
            self.maxDepth = 0
            self._get_maxDepth(0, 0)
            depth = 0
            #print("maxDepth: {}".format(self.maxDepth))
            # build tree by level: create first all children for level 1, then level 2... 
            # Prevents that a tree creates children just in one branch and always goes deeper in case of a max number of leavese
            while self.temp_n_leafs < MAX_LEAFS and depth <= self.maxDepth:
            #    print("\n\ndepth: {}, temp_n_leafs: {}".format(depth, self.temp_n_leafs))
                tree = self._build_tree(node, depth)
                depth += 1

        assert isinstance(tree, Node)
        self.tree = tree
        print("Count nodes: {}; leafs: {}".format(self.count_nodes(), self.count_leafs()))


    def _get_maxDepth(self, i, depth):
        """ get max depth of tree"""
        if depth > self.maxDepth:
            self.maxDepth = depth        
        node = self.tree_hierarchy.iloc[i]
        if len(node.children) == 1:
            self._get_maxDepth(node.children[0], depth+1)
        elif len(node.children) == 2:
            self._get_maxDepth(node.children[0], depth+1)
            self._get_maxDepth(node.children[1], depth+1)


        
    def _get_cluster_labels_for_leafs(self, node):
        """ 
            Get's the cluster labels for each leafs using the cluster labels assigned by
            the output of the sklearn agglomerative clustering algorithm.
        """        
        if len(node.children) == 0: #leaf
            cluster_label = self.clusters_predict[node.node_id]
            node.set_clusterLabel(cluster_label)
        else: # no leaf
            self._get_cluster_labels_for_leafs(node.children[0])
            self._get_cluster_labels_for_leafs(node.children[1])
        return node
    
    def _cut_nodes_from_leafs(self, node):
        """ 
            self.mode == sklearn:
            Children of nodes, who are leafs and have the same cluster_label, are cut off
            and the parent node takes the cluster label of its children.
            This is done recursively until there are only leafs with unique cluster_labels 
            Number of leaves = MAX_LEAFS
        
            self.mode == FBE:
            Towards the bottom of the tree, it may happen that a node has only child, which has only one child, 
            and this child also has only one child, etc. Several nodes following of each other with only one child.
            In this case keep only the child C whose parent has two children and cut the child of C.
        """
        if self.mode == "sklearn":
            if len(node.children) > 0: 
                left_child = node.children[0]
                right_child = node.children[1]
                if left_child.cluster_label is None: # left child is not leaf 
                    self._cut_nodes_from_leafs(left_child)
                if right_child.cluster_label is None: # right child is not leaf 
                    self._cut_nodes_from_leafs(right_child)

                # should be updated now
                left_child = node.children[0]
                right_child = node.children[1]
                if left_child.cluster_label == right_child.cluster_label and left_child.cluster_label is not None:
                    node.children = []
                    node.cluster_label = left_child.cluster_label
                    return node
        elif self.mode == "FBE":
            if len(node.children) == 1: # node has only one child
                temp = node
                while len(temp.children) == 1: # check if several nodes following of each other have only one child two
                    temp = temp.children[0]
                if len(temp.children) == 2: # if at some point a node has two children, continue to search
                    self._cut_nodes_from_leafs(temp.children[0])    
                    self._cut_nodes_from_leafs(temp.children[1])
                else: # if we reached a leaf, cut the node's children
                    node.children = []
            elif len(node.children) == 2:
                self._cut_nodes_from_leafs(node.children[0])    
                self._cut_nodes_from_leafs(node.children[1])             
        return node    

    
        
    def fitTree(self, node, data):
        """ Updates all the nodes of the tree according to the clustering from bottom to top """

        assert isinstance(node, Node)
        if len(node.children) > 0: # no leaf
            for child in node.children:
                self.fitTree(child, data)
        else: # leaf
            if self.mode == "sklearn": 
                leaf_cluster_label = node.cluster_label
                abstract_hits = data[data["class_predict"] == leaf_cluster_label]
                for i, row in abstract_hits.iterrows():
                    leaf_abstract_id = row.name
                    leaf_abstract_class_true = row.mesh_ui_diab # true class 
                    self._update_leaf_to_root(node, leaf_abstract_id, leaf_abstract_class_true)
            elif self.mode == "FBE": # several documents per leaf
                leaf_cluster_label = node.cluster_label
                abstract_hits = data[data["uniqueCluster"] == leaf_cluster_label]
                for i, row in abstract_hits.iterrows():
                    leaf_abstract_id = row["id"]
                    leaf_abstract_class_true = row["mesh_ui_diab"]
                    self._update_leaf_to_root(node, leaf_abstract_id, leaf_abstract_class_true)
            else: 
                print("ERROR: mode should be one of ['sklearn', 'FBE']")
        return node
         
            
    def count_nodes(self, tree=None):
        self.n_nodes = 0

        def _walk_count_nodes(node):
            self.n_nodes += 1

            for child in node.children:
                _walk_count_nodes(child)   
                
        if tree == None:
            _walk_count_nodes(self.tree)
        else:
            _walk_count_nodes(tree)
        return self.n_nodes
    

                
    def count_leafs(self, tree=None):

        def _walk_count_leafs(node):
            if node.children == []:
                self.n_leafs += 1
                self.leaf_nodes.append(node)
            else:
                for child in node.children:
                    _walk_count_leafs(child)
        
        self.n_leafs = 0
        self.leaf_nodes = []
        if tree == None:
            _walk_count_leafs(self.tree)
        else:
            _walk_count_leafs(tree)
        return self.n_leafs
    
    
    def get_leaf_nodes(self):
        def _walk_leaf_nodes(node):
            if node.children == []:
                self.leaf_nodes.append(node)
            else:
                for child in node.children:
                    _walk_leaf_nodes(child)
        
        self.leaf_nodes = []
        _walk_leaf_nodes(self.tree)
        return self.leaf_nodes
    
    def _walk_precision(self, node):
        node_precision = node.get_precision()
        self.precision_all_nodes.append(node_precision)
        self.precision_all_nodes_weighted.append(node_precision * node.counts)
        self.precision_all_nodes_weights += node.counts
        for child in node.children:
            self._walk_precision(child)
            
    def get_precision(self):
        self.precision_all_nodes = []
        self.precision_all_nodes_weighted = []
        self.precision_all_nodes_weights = 0
        self._walk_precision(self.tree)
        self.precision_macro = np.mean(self.precision_all_nodes)
        self.precision_micro = np.sum(self.precision_all_nodes_weighted) / self.precision_all_nodes_weights
        return {"prec_macro" : self.precision_macro
                , "prec_micro" : self.precision_micro}

        
    def get_recall(self):
        
        self.recall_all_classes = []
        self.recall_all_classes_weighted = []
        def _walk_recall(node, c):
            """ Get cluster with max documents of class c in which class c is the majority class """
            class_counts = Counter(node.true_classes).most_common()
            majority_classes = [c for c, occ in class_counts  if occ == class_counts[0][1]] # there can be several majority classes in a node
            #majority_class = Counter(node.true_classes).most_common()[0][0]
            occ = node.true_classes.count(c)
            #print()
            #print(node)
            #print("\t{}".format(node.true_classes))
            #print("\tmajority_classe: {}, occ({}): {}".format(majority_classes, c, occ))
            if c in majority_classes and occ > self.temp_max_occ_class_in_cluster:
                self.temp_max_occ_class_in_cluster = occ
            #    print("\t updatetemp_max_occ_class_in_cluster: {}".format(self.temp_max_occ_class_in_cluster))
            #if (occ > self.temp_max_occ_class_in_cluster 
            #    and (c in majority_classes or node.children == [])
            #   ): # if we found a cluster with higher occ of documents for class c and the class c is the majority class in the cluster or leaf node
            #    self.temp_max_occ_class_in_cluster = occ
            #    print("\tupdatetemp_max_occ_class_in_cluster: {}".format(self.temp_max_occ_class_in_cluster))
            
            #if (occ > self.temp_max_occ_class_in_cluster and c in majority_classes):
            # self.temp_max_occ_class_in_cluster = occ
            #    print("\MAJ: tupdatetemp_max_occ_class_in_cluster: {}".format(self.temp_max_occ_class_in_cluster))
            #elif (occ > self.temp_max_occ_class_in_cluster and node.children == []):
            #    self.temp_max_occ_class_in_cluster = occ
            #    print("\tLEAF: updatetemp_max_occ_class_in_cluster: {}".format(self.temp_max_occ_class_in_cluster))
    
            for child in node.children:
                _walk_recall(child, c)
        
        weights_sum = 0
        for c in self.true_classes_documents_unique:
            N_c = self.true_classes_documents.count(c)
            #print("\nc: {}, N_c: {}".format(c, N_c))
            self.temp_max_occ_class_in_cluster = 0
#            _walk_recall(self.tree, c)
            # TODO: check if it is right!
            # # start with children; otherwise recalls for all classes will be highest in root
            _walk_recall(self.tree.children[0], c) 
            _walk_recall(self.tree.children[1], c)
            recall = self.temp_max_occ_class_in_cluster / N_c
            #print("c: {}, recall: {}".format(c, recall))

            self.recall_all_classes.append(recall) #len(self.unique_cluster_predict))
            self.recall_all_classes_weighted.append(recall * N_c)
            weights_sum += N_c
        self.recall_macro = np.mean(self.recall_all_classes)
        self.recall_micro = np.sum(self.recall_all_classes_weighted) / weights_sum
        return {"recall_macro" : self.recall_macro
                ,"recall_micro" : self.recall_micro}
    
    def get_F1(self):
        precision = self.get_precision()
        recall = self.get_recall()        
        
        self.F1_macro = 2*precision["prec_macro"]*recall["recall_macro"] / (precision["prec_macro"] + recall["recall_macro"])
        self.F1_micro = 2*precision["prec_micro"]*recall["recall_micro"] / (precision["prec_micro"] + recall["recall_micro"])
        return {"F1_macro":self.F1_macro
               ,"F1_micro":self.F1_micro}
    
    def F1_zhao(self):
        """ F1 score like in Evaluation of Hierarchical Clustering Algorithms forDocument Datasets from Zhao & Karypis """
        
        def _walk_F1_zhao(node, c, N_c):
            """ Calculates F1 Score for a given class c (N_c = total number of documents of class c) """
            class_count = node.true_classes.count(c)
            prec =  class_count / node.counts
            recall = class_count / N_c
            #print(node)
            #print("\t class_count: {}, self.count: {}, p: {:.2f}, r: {:.2f}".format(class_count, node.counts, prec, recall))
            if prec > 1e-10 or recall > 1e-10: # if prec or recall == 0 
                F1 = 2 * prec * recall / (prec+recall)
            else:
                F1 = 0
            
            if F1 > self.temp_max_doc_perClass_inCluster:
                self.temp_max_doc_perClass_inCluster = F1
                #print("\t\t UPDATE F1: {}!!! ".format(F1))

            for child in node.children:
                _walk_F1_zhao(child, c, N_c)        
        
        FScore_sum = 0
        for c in self.true_classes_documents_unique:
            N_c = self.true_classes_documents.count(c)
            N = len(self.true_classes_documents)
            #print("\nc: {}, N_c: {}, N: {}, N_c/N : {}".format(c, N_c, N, N_c/N))
            self.temp_max_doc_perClass_inCluster = 0
            _walk_F1_zhao(self.tree.children[0], c, N_c) 
            _walk_F1_zhao(self.tree.children[1], c, N_c)
            #print("Best F1: {}".format(self.temp_max_doc_perClass_inCluster))
            #print("Nc/N + F1 : {}".format((N_c / N ) * self.temp_max_doc_perClass_inCluster))
            FScore_sum += (N_c / N ) * self.temp_max_doc_perClass_inCluster
            
        return FScore_sum
    
    def get_isim(self, data):
        """ Internal similarity """
        
        I_sum = 0 
        def _walk_isim(node):
            
            print("Node: {}".format(node))
            print("abstracts: {}".node.abstracts)
            for child in node.children:
                _walk_isim(child)
            

    
    def get_performances(self):
        precision = self.get_precision()
        recall = self.get_recall()
        F1 = self.get_F1()
        return({
            "prec_micro" : precision["prec_micro"]
            ,"prec_macro" : precision["prec_macro"]            
            ,"recall_micro" : recall["recall_micro"]
            ,"recall_macro" : recall["recall_macro"]
            ,"F1_micro" : F1["F1_micro"]
            ,"F1_macro" : F1["F1_macro"]
            ,"F1_zhao" : self.F1_zhao()
        })
 


class Node(object):
    "Generic tree node."
    def __init__(self, Id, depth, parent=None, cluster_label=None, children=[]):
        self.node_id = Id
        self.parent = parent
        self.children = []
        self.depth = depth
        self.cluster_label = cluster_label # In case FBE: this is the filterValue in the leafs
        self.abstracts = [] # PMID's of abstracts 
        self.true_classes = [] # True classes for each abstract
        self.counts = 0
        self.recall = None
        self.precision = None 
        self.F1 = None
        if children is not None:
            for child in children:
                self.add_child(child)
                
    def __repr__(self):
        return "Node id: {} (depth: {}, cluster_label: {}, children: {})".format(self.node_id
                                                                                 , self.depth
                                                                                 , self.cluster_label
                                                                                , [child.node_id for child in self.children])
    
    def add_child(self, node):
        assert isinstance(node, Node)
        self.children.append(node)
        
    def set_clusterLabel(self, clusterLabel):
        self.cluster_label = clusterLabel
        
    def pretty_print(self, depth=0):
        
        if self.depth == depth: 
            print("Node: {}, Parent: {} (Depth: {}, counts: {}, cluster_label: {}) | Children: {}".format(self.node_id, self.parent, self.depth, self.counts, self.cluster_label, self.children))
            print("\tAbstracts: {}".format(Counter(self.abstracts)))
            print("\ttrue_classes: {}".format(Counter(self.true_classes)))
        else:
            for child in self.children:
                child.pretty_print(depth)
            
            
    def update_node(self, abstract_id, true_class):
        """ Updates the abstracts and its true class label running through this node """
        self.abstracts.append(abstract_id)
        self.true_classes.append(true_class)
        self.counts += 1
        
        
    def get_precision(self):
        count = Counter(self.true_classes)
        mostFrequent = max(self.true_classes, key=count.get)
        prec = self.true_classes.count(mostFrequent) / self.counts
        return prec

    def count_class_occurrence(self, c):
        return self.true_classes.count(c)
    

#

In [2]:
data = pd.read_parquet("/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/outputs_03082020/diabetes_abstracts_HC_output.parquet")
data.index = data.index.get_level_values("PMID")
data = data.reset_index()
data["PMID"] = pd.to_numeric(data["PMID"])

#data = pd.read_parquet("/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/diabetes_abstracts_HC_output_10Examples.parquet")
#data = pd.read_parquet("/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/diabetes_abstracts_HC_output_30Examples.parquet")

HC_tree = pd.read_parquet('/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/outputs_03082020/diabetes_abstracts_tree_output.parquet')
#HC_tree = pd.read_parquet('/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/diabetes_abstracts_tree_output_10Examples.parquet')
#HC_tree = pd.read_parquet('/home/adrian/workspace/Hierarchical-Clustering-Active-Learning-Text/diabetes_abstracts_tree_output_30Examples.parquet')

## TEST TREE
#data = pd.DataFrame({"PMID": [0, 1, 2, 3, 4, 5]
#                    , "class_predict": [3, 0, 0, 0, 1, 2]}
#                   , columns=["PMID", "class_predict"]).set_index("PMID")

#HC_tree = pd.DataFrame({"node_id":[6, 7, 8, 9, 10]
#                    , "left" : [1, 2, 0, 5, 8]
#                    , "right" :[3, 6, 4, 7, 9]}
#                   , columns=["node_id", "left", "right"])

print("Tree nodes: {}".format(HC_tree.shape))
print(HC_tree.head())
print("data size: {}".format(data.shape))
print(list(set(data["class_predict"])))
#print(list(set(data["mesh_ui_diab"])))
#print(data["mesh_ui_diab"])

#df_vec = (data.title + " " + data.abstract).map(lambda abstract: avg_feature_vector(abstract))
#print(type(df_vec))
#df_vec = np.stack(df_vec.values, axis = 0)
#print(df_vec.shape)
#print(type(df_vec))

data.head(2)



Tree nodes: (55910, 3)
   node_id   left  right
0    55911  12234  21376
1    55912   2561  48188
2    55913   1107  14060
3    55914  37499  49945
4    55915  20246  51857
data size: (55911, 10)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182

Unnamed: 0,PMID,title,abstract,date,mesh_ui,mesh_mh,mesh_ui_diab,mesh_mh_diab,title_abstract_prep,class_predict
0,28800712,Outcomes Achieved With Use of a Prefabricated ...,BACKGROUND\nThe total contact cast (TCC) is co...,2017-10,"D000328,D000367,D000368,D000369,D002370,D01533...","Adult,Age Factors,Aged,Aged, 80 and over,Casts...",D017719,Diabetic Foot,outcomes achieved with use of a prefabricated ...,34
1,6989594,Investigation of insulin sensitivity in early ...,Twenty-three normal weight subjects without an...,1980-01,"D001786,D001835,D005230,D005951,D006801,D00732...","Blood Glucose,Body Weight,Fatty Acids, Noneste...",D011236,Prediabetic State,investigation of insulin sensitivity in early ...,34


In [94]:
# ADD vectors to abstracts

#model = KeyedVectors.load_word2vec_format("/home/adrian/PhD/Data/Word2Vec/BioASQvectors2018/pubmed2018_w2v_200D/pubmed2018_w2v_200D.bin", binary=True)
#index2word_set = set(model.wv.index2word)
# clean for BioASQ
#bioclean = lambda t: re.sub('[.,?;*!%^&_+():-\[\]{}]', '', t.replace('"', '').replace('/', '').replace('\\', '').replace("'",'').strip().lower()).split()

def hasNumbers(inputString):
    return any(char.isdigit() for char in inputString)

def avg_feature_vector(sentence, model=model, num_features=200, index2word_set=index2word_set):
    #words = sentence.split()
    try:
        words = bioclean(sentence)
    except:
        print("bioclean did not work for: {}".format(sentence))
        print(type(sentence))
        print(math.isnan(sentence))
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in index2word_set:
            n_words += 1
            feature_vec = np.add(feature_vec, model[word])
        else:
            if hasNumbers(word):
                print("word not in vocabulary: {}".format(word))
    if (n_words > 0):
        feature_vec = np.divide(feature_vec, n_words)
    return feature_vec

#data["vec"] = (data.title + " " + data.abstract).map(lambda abstract: avg_feature_vector(abstract))
#data.head()

In [3]:
# initialise
#MAX_LEAFS=8
treeClass = Tree(HC_tree, data["class_predict"], mode="sklearn", true_classes_all=data["mesh_ui_diab"])

# define root node
root = Node(Id=HC_tree["node_id"].max() # In scikit learn, the root node is the one with maximum node id
          , depth=0
          , parent=None
          , children=[])

# build tree
treeClass.set_build_tree(root)

print("N nodes: {}".format(treeClass.count_nodes()))
print("N leafs: {}".format(treeClass.count_leafs()))

treeClass.leaf_nodes

Count nodes: 511; leafs: 256
N nodes: 511
N leafs: 256


[Node id: 99614 (depth: 1, cluster_label: 132, children: []),
 Node id: 9920 (depth: 3, cluster_label: 143, children: []),
 Node id: 90245 (depth: 3, cluster_label: 123, children: []),
 Node id: 111266 (depth: 3, cluster_label: 68, children: []),
 Node id: 42262 (depth: 4, cluster_label: 163, children: []),
 Node id: 110858 (depth: 6, cluster_label: 165, children: []),
 Node id: 598 (depth: 7, cluster_label: 146, children: []),
 Node id: 54801 (depth: 7, cluster_label: 126, children: []),
 Node id: 8842 (depth: 6, cluster_label: 223, children: []),
 Node id: 110904 (depth: 7, cluster_label: 148, children: []),
 Node id: 48600 (depth: 9, cluster_label: 184, children: []),
 Node id: 53994 (depth: 9, cluster_label: 177, children: []),
 Node id: 5257 (depth: 11, cluster_label: 135, children: []),
 Node id: 110592 (depth: 12, cluster_label: 218, children: []),
 Node id: 23650 (depth: 13, cluster_label: 200, children: []),
 Node id: 39586 (depth: 13, cluster_label: 149, children: []),
 Node 

In [4]:
# fit tree with abstracts 
tree_fit = treeClass.fitTree(treeClass.tree, data)

In [5]:
#treeClass.tree.pretty_print(depth=8)

In [6]:
#print(treeClass.get_precision())
#print(treeClass.get_recall())
#print(treeClass.get_F1())
pprint(treeClass.get_performances())
print("F1 score zhao: {}".format(treeClass.F1_zhao()))


{'F1_macro': 0.4441254035533422,
 'F1_micro': 0.19230689421131783,
 'F1_zhao': 0.23525969824232465,
 'prec_macro': 0.5810909145747698,
 'prec_micro': 0.11243403384883459,
 'recall_macro': 0.3594108035763631,
 'recall_micro': 0.6640374881508111}
F1 score zhao: 0.23525969824232465


# load FeedbackExplorer output

In [7]:
import pyspark
#fbe_path = "/home/adrian/tmp/Test_FBE"
fbe_path = "/home/adrian/workspace/FBE output/maxTopwords_6_maxClasses1024_Nall"

spark = pyspark.sql.SparkSession.builder.getOrCreate()
sentences = spark.read.load(fbe_path+"/phrases/")
print(len(sentences.columns))
print(sentences.count())

df_short = sentences.select("id", "tokens", "index")
#df_short.printSchema()
df_short.show(2)

1027
55911
+--------+--------------------+--------------------+
|      id|              tokens|               index|
+--------+--------------------+--------------------+
|28800712|[outcomes,  , ach...|[191 -> [218 -> 2...|
| 6989594|[investigation,  ...|[107 -> [92 -> 92...|
+--------+--------------------+--------------------+
only showing top 2 rows



In [8]:
nodes = pd.read_json(fbe_path+"/nodes.json", orient="records")
print(nodes.shape)
nodes.head(5)

(512, 21)


Unnamed: 0,name,tagId,color,annotations,algo,strLinks,strClassPath,names,filterMode,filterValue,...,windowSize,classCenters,cError,childSplitSize,children,hits,metrics,rocCurve,externalClassesFreq,purity
0,In Scope,0.0,,"[{'tokens': ['aggregate'], 'tag': 1, 'from': N...",{'value': 'supervised'},{'0': [1]},{'1': [0]},{},{'value': 'allIn'},[0],...,0.0,,,,[1],55911,{},{},{},{}
1,Explorer,1.0,,"[{'tokens': ['diabetic'], 'tag': 2, 'from': No...",{'value': 'clustering'},"{'1': [2, 3]}","{'2': [0, 1], '3': [0, 1]}",{},{'value': 'anyIn'},[1],...,,"{'2': 0, '3': 1}","[NaN, NaN]",50.0,"[2, 187]",55911,{},{},{},{}
2,Explorer,,,"[{'tokens': ['patients'], 'tag': 4, 'from': No...",{'value': 'clustering'},"{'1': [4, 5]}","{'4': [0, 1, 2], '5': [0, 1, 2]}",{},{'value': 'anyIn'},[2],...,,"{'4': 0, '5': 1}","[NaN, NaN]",50.0,"[3, 88]",25654,{},{},{},{}
3,Explorer,,,"[{'tokens': ['extragenital'], 'tag': 32, 'from...",{'value': 'clustering'},"{'1': [32, 33]}","{'32': [0, 1, 2, 4], '33': [0, 1, 2, 4]}",{},{'value': 'anyIn'},[4],...,,"{'32': 0, '33': 1}","[NaN, NaN]",50.0,"[4, 29]",20825,{},{},{},{}
4,Explorer,,,"[{'tokens': ['modifable'], 'tag': 80, 'from': ...",{'value': 'clustering'},"{'1': [80, 81]}","{'80': [0, 1, 2, 32, 4], '81': [0, 1, 2, 32, 4]}",{},{'value': 'anyIn'},[32],...,,"{'80': 0, '81': 1}","[NaN, NaN]",50.0,"[5, 24]",1582,{},{},{},{}


In [9]:
# Get list with all possible classes in the sentences file
import pyspark
from pyspark.sql.functions import udf, col
from pyspark.sql.types import *


def get_list_all_possible_classes(sentences ):
    """ Get the list of all possible occuring classes in the sentences file """
    join_udf = udf(lambda x: ";".join(x))
    sentences_classes_udf = udf(lambda x: ";".join([str(v) for v in x.keys()]))

    sentences_transformed = sentences.select("id"
                                            , "tokens"
                                            , sentences_classes_udf('index').alias("all_classes")) \
                                    .withColumn("tokens", join_udf(col("tokens"))) 
                    
    sentences_pdf = sentences_transformed.toPandas()
    sentences_pdf["id"] = pd.to_numeric(sentences_pdf["id"])

    # add true class labels to sentences from data by merge/join 
    sentences_pdf["PMID"] = sentences_pdf["id"]
    sentences_pdf["PMID"] = pd.to_numeric(sentences_pdf["PMID"])
    meshDiab = data[["PMID", "mesh_ui_diab"]]
    meshDiab["PMID"] = pd.to_numeric(meshDiab["PMID"])
    sentences_pd_with_classes = pd.merge(sentences_pdf, meshDiab, on='PMID', how="left")

    print("\tsentences_pdf: {}".format(sentences_pdf.shape))
    print("\tmeshDiab: {}".format(meshDiab.shape))
    print("\tmerged: {}".format(sentences_pd_with_classes.shape))

    
    # list of all classes in the sentences file
    return (set(pd.to_numeric(sentences_pdf["all_classes"].map(lambda sentence: sentence.split(";")).explode()).values)
            , sentences_pd_with_classes)

sentences_all_classes, sentences_pd_with_classes = get_list_all_possible_classes(sentences)
print("Number of classes in sentences file: {}".format(len(sentences_all_classes)))
print("Merged dataset with true classes: {}".format(sentences_pd_with_classes.shape))
sentences_pd_with_classes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


	sentences_pdf: (55911, 4)
	meshDiab: (55911, 2)
	merged: (55911, 5)
Number of classes in sentences file: 913
Merged dataset with true classes: (55911, 5)


Unnamed: 0,id,tokens,all_classes,PMID,mesh_ui_diab
0,28800712,outcomes; ;achieved; ;with; ;use; ;of; ;a; ;pr...,0;1;67;195;3;7;10;13;847;850;723;27;61;30;191,28800712,D017719
1,6989594,investigation; ;of; ;insulin; ;sensitivity; ;i...,0;1;3;103;7;10;107;27;13;31;63,6989594,D011236
2,524360,ultrastructural; ;pathology; ;of; ;peripheral;...,0;1;33;2;83;595;4;935;91;157;94,524360,D003929
3,21199315,evidence;-;based; ;interventional; ;pain; ;med...,0;1;994;3;70;7;328;712;10;12;724;917;25,21199315,D003929
4,24607755,delivery; ;timing; ;and; ;cesarean; ;delivery;...,0;1;33;2;82;4;709;214;89;730;252,24607755,D016640


In [10]:
# initialise
treeFBE = Tree(nodes
            #, list(set(data["class_predict"]))
            , mode="FBE"
            , sentences_all_classes=sentences_all_classes
            , true_classes_all=sentences_pd_with_classes["mesh_ui_diab"])

# define root node
root = Node(Id=1, depth=0, parent=None, children=[]) # Id = 1 because start at Explorer 

# build tree
#maxDepth = 10
treeFBE.set_build_tree(root)

print("Number leafs: {}".format(treeFBE.count_leafs()))

Count nodes: 484; leafs: 236
Number leafs: 236


In [12]:
# Associate cluster to each sentence

from pyspark.sql.functions import udf, col
from pyspark.sql.types import *

def matchCluster(index_map, cluster): 
    """ gets for each abstract its unique cluster (filterValue) from the index"""
    return list(set(list(index_map.keys())).intersection(set(cluster)))[0]

def associate_unique_cluster_to_documents(sentences, tree):
    """ Associates unique cluster to each document """
    leafs = tree.get_leaf_nodes()
    print("N leafs: {}".format(len(leafs)))
    cluster = [leaf.cluster_label for leaf in leafs]
    print("N clusters: {}".format(len(set(cluster))))

    matchCluster_udf = udf(lambda y: matchCluster(y, cluster))
    join_udf = udf(lambda x: ";".join(x))

    sentences_transformed = sentences.select("id", "tokens", matchCluster_udf('index').alias("uniqueCluster")) \
                        .withColumn("tokens", join_udf(col("tokens"))) 

    #sentences.select('index', matchClass_udf('index').atlias("uniqueCluster")).groupby("uniqueCluster").count().show()
    sentences_pd = sentences_transformed.toPandas()
    sentences_pd["id"] = pd.to_numeric(sentences_pd["id"])
    sentences_pd["uniqueCluster"] = pd.to_numeric(sentences_pd["uniqueCluster"])
    
    # add true class labels to data by merge/join 
    sentences_pd["PMID"] = sentences_pd["id"]
    sentences_pd["PMID"] = pd.to_numeric(sentences_pd["PMID"])
    meshDiab = data[["PMID", "mesh_ui_diab"]]
    meshDiab["PMID"] = pd.to_numeric(meshDiab["PMID"])
    sentences_pd_with_classes_uniqueCluster = pd.merge(sentences_pd, meshDiab, on='PMID', how="left")
    print("sentences_pd: {}".format(sentences_pd.shape))
    print("meshDiab: {}".format(meshDiab.shape))
    print("sentences_pd_with_classes_uniqueCluster: {}".format(sentences_pd_with_classes.shape))
    
    return sentences_pd_with_classes_uniqueCluster 

sentences_pd_with_classes_uniqueCluster = associate_unique_cluster_to_documents(sentences, treeFBE)
print("Unique clusters in sentences: {}".format(sentences_pd_with_classes_uniqueCluster["uniqueCluster"].nunique())) #####

sentences_pd_with_classes_uniqueCluster.head()

N leafs: 236
N clusters: 236
sentences_pd: (55911, 4)
meshDiab: (55911, 2)
sentences_pd_with_classes_uniqueCluster: (55911, 5)
Unique clusters in sentences: 236


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,id,tokens,uniqueCluster,PMID,mesh_ui_diab
0,28800712,outcomes; ;achieved; ;with; ;use; ;of; ;a; ;pr...,847,28800712,D017719
1,6989594,investigation; ;of; ;insulin; ;sensitivity; ;i...,103,6989594,D011236
2,524360,ultrastructural; ;pathology; ;of; ;peripheral;...,595,524360,D003929
3,21199315,evidence;-;based; ;interventional; ;pain; ;med...,917,21199315,D003929
4,24607755,delivery; ;timing; ;and; ;cesarean; ;delivery;...,709,24607755,D016640


In [13]:
treeFBE.fitTree(treeFBE.tree, sentences_pd_with_classes_uniqueCluster)

leaf_abstract_id: 2648659, leaf_abstract_class_true: D003928
leaf_abstract_id: 8588876, leaf_abstract_class_true: D003930
leaf_abstract_id: 8164551, leaf_abstract_class_true: D016640
leaf_abstract_id: 21816498, leaf_abstract_class_true: D016640
leaf_abstract_id: 10827618, leaf_abstract_class_true: D003924
leaf_abstract_id: 17487113, leaf_abstract_class_true: D016640
leaf_abstract_id: 22101483, leaf_abstract_class_true: D017719
leaf_abstract_id: 26756260, leaf_abstract_class_true: D016640
leaf_abstract_id: 16522427, leaf_abstract_class_true: D016640
leaf_abstract_id: 28400430, leaf_abstract_class_true: D011236
leaf_abstract_id: 21441441, leaf_abstract_class_true: D003924
leaf_abstract_id: 30226486, leaf_abstract_class_true: D003920
leaf_abstract_id: 20204997, leaf_abstract_class_true: D003920
leaf_abstract_id: 11787436, leaf_abstract_class_true: D003928
leaf_abstract_id: 24620558, leaf_abstract_class_true: D005320
leaf_abstract_id: 25757637, leaf_abstract_class_true: D016640
leaf_abstra

leaf_abstract_id: 25660138, leaf_abstract_class_true: D003925
leaf_abstract_id: 2078874, leaf_abstract_class_true: D003922
leaf_abstract_id: 16098925, leaf_abstract_class_true: D016640
leaf_abstract_id: 15172859, leaf_abstract_class_true: D016640
leaf_abstract_id: 27227919, leaf_abstract_class_true: D003924
leaf_abstract_id: 30260983, leaf_abstract_class_true: D003920
leaf_abstract_id: 22926361, leaf_abstract_class_true: D016640
leaf_abstract_id: 24146865, leaf_abstract_class_true: D003930
leaf_abstract_id: 29203507, leaf_abstract_class_true: D016640
leaf_abstract_id: 3984732, leaf_abstract_class_true: D003922
leaf_abstract_id: 23080236, leaf_abstract_class_true: D017719
leaf_abstract_id: 30500542, leaf_abstract_class_true: D016640
leaf_abstract_id: 15735191, leaf_abstract_class_true: D016640
leaf_abstract_id: 16317371, leaf_abstract_class_true: D003930
leaf_abstract_id: 17934394, leaf_abstract_class_true: D016640
leaf_abstract_id: 27601018, leaf_abstract_class_true: D016640
leaf_abstr

leaf_abstract_id: 2951460, leaf_abstract_class_true: D003925
leaf_abstract_id: 3883000, leaf_abstract_class_true: D048909
leaf_abstract_id: 15785022, leaf_abstract_class_true: D003925
leaf_abstract_id: 19347224, leaf_abstract_class_true: D005320
leaf_abstract_id: 9255040, leaf_abstract_class_true: D016640
leaf_abstract_id: 25082340, leaf_abstract_class_true: D017719
leaf_abstract_id: 21925081, leaf_abstract_class_true: D003920
leaf_abstract_id: 25870695, leaf_abstract_class_true: D016640
leaf_abstract_id: 21957592, leaf_abstract_class_true: D003929
leaf_abstract_id: 26813594, leaf_abstract_class_true: D048909
leaf_abstract_id: 12694990, leaf_abstract_class_true: D003929
leaf_abstract_id: 1456792, leaf_abstract_class_true: D003928
leaf_abstract_id: 27004003, leaf_abstract_class_true: D048909
leaf_abstract_id: 27924661, leaf_abstract_class_true: D016640
leaf_abstract_id: 8865924, leaf_abstract_class_true: D003928
leaf_abstract_id: 26048000, leaf_abstract_class_true: D048909
leaf_abstract

leaf_abstract_id: 20222331, leaf_abstract_class_true: D048909
leaf_abstract_id: 10395336, leaf_abstract_class_true: D017719
leaf_abstract_id: 29424871, leaf_abstract_class_true: D005320
leaf_abstract_id: 30805100, leaf_abstract_class_true: D016640
leaf_abstract_id: 19484867, leaf_abstract_class_true: D003928
leaf_abstract_id: 10960718, leaf_abstract_class_true: D017719
leaf_abstract_id: 12076235, leaf_abstract_class_true: D048909
leaf_abstract_id: 24642738, leaf_abstract_class_true: D017719
leaf_abstract_id: 7198388, leaf_abstract_class_true: D003930
leaf_abstract_id: 20697004, leaf_abstract_class_true: D048909
leaf_abstract_id: 30385355, leaf_abstract_class_true: D005320
leaf_abstract_id: 30630990, leaf_abstract_class_true: D003920
leaf_abstract_id: 25016791, leaf_abstract_class_true: D003930
leaf_abstract_id: 25672881, leaf_abstract_class_true: D003920
leaf_abstract_id: 18351194, leaf_abstract_class_true: D003920
leaf_abstract_id: 31471505, leaf_abstract_class_true: D003920
leaf_abst

leaf_abstract_id: 29636823, leaf_abstract_class_true: D017719
leaf_abstract_id: 24564044, leaf_abstract_class_true: D003920
leaf_abstract_id: 18386800, leaf_abstract_class_true: D005320
leaf_abstract_id: 17426042, leaf_abstract_class_true: D003922
leaf_abstract_id: 23781894, leaf_abstract_class_true: D003924
leaf_abstract_id: 28764665, leaf_abstract_class_true: D016640
leaf_abstract_id: 19390341, leaf_abstract_class_true: D048909
leaf_abstract_id: 20423440, leaf_abstract_class_true: D003922
leaf_abstract_id: 9638541, leaf_abstract_class_true: D017719
leaf_abstract_id: 28452722, leaf_abstract_class_true: D005320
leaf_abstract_id: 22534170, leaf_abstract_class_true: D003924
leaf_abstract_id: 507535, leaf_abstract_class_true: D048909
leaf_abstract_id: 22142620, leaf_abstract_class_true: D003925
leaf_abstract_id: 31045693, leaf_abstract_class_true: D003920
leaf_abstract_id: 1196554, leaf_abstract_class_true: D048909
leaf_abstract_id: 7814784, leaf_abstract_class_true: D048909
leaf_abstract

leaf_abstract_id: 15953040, leaf_abstract_class_true: D017719
leaf_abstract_id: 3561950, leaf_abstract_class_true: D014929
leaf_abstract_id: 15040155, leaf_abstract_class_true: D003925
leaf_abstract_id: 30591810, leaf_abstract_class_true: D003928
leaf_abstract_id: 24880140, leaf_abstract_class_true: D048909
leaf_abstract_id: 22971940, leaf_abstract_class_true: D048909
leaf_abstract_id: 29182429, leaf_abstract_class_true: D003930
leaf_abstract_id: 9882860, leaf_abstract_class_true: D003928
leaf_abstract_id: 9847658, leaf_abstract_class_true: D003929
leaf_abstract_id: 18672190, leaf_abstract_class_true: D003924
leaf_abstract_id: 18194629, leaf_abstract_class_true: D014929
leaf_abstract_id: 6348369, leaf_abstract_class_true: D048909
leaf_abstract_id: 1131653, leaf_abstract_class_true: D003929
leaf_abstract_id: 3201971, leaf_abstract_class_true: D005320
leaf_abstract_id: 18093210, leaf_abstract_class_true: D003928
leaf_abstract_id: 16586192, leaf_abstract_class_true: D017719
leaf_abstract_

leaf_abstract_id: 19952879, leaf_abstract_class_true: D003929
leaf_abstract_id: 29439130, leaf_abstract_class_true: D017719
leaf_abstract_id: 20860554, leaf_abstract_class_true: D017719
leaf_abstract_id: 55889, leaf_abstract_class_true: D003929
leaf_abstract_id: 9151203, leaf_abstract_class_true: D003928
leaf_abstract_id: 19000538, leaf_abstract_class_true: D003928
leaf_abstract_id: 12502660, leaf_abstract_class_true: D003925
leaf_abstract_id: 30529577, leaf_abstract_class_true: D048909
leaf_abstract_id: 16270812, leaf_abstract_class_true: D003928
leaf_abstract_id: 8803638, leaf_abstract_class_true: D017719
leaf_abstract_id: 19026199, leaf_abstract_class_true: D017719
leaf_abstract_id: 28944725, leaf_abstract_class_true: D048909
leaf_abstract_id: 29316133, leaf_abstract_class_true: D016883
leaf_abstract_id: 23079827, leaf_abstract_class_true: D048909
leaf_abstract_id: 20446593, leaf_abstract_class_true: D003925
leaf_abstract_id: 29145522, leaf_abstract_class_true: D048909
leaf_abstract

leaf_abstract_id: 28120427, leaf_abstract_class_true: D016640
leaf_abstract_id: 22732378, leaf_abstract_class_true: D017719
leaf_abstract_id: 4031955, leaf_abstract_class_true: D048909
leaf_abstract_id: 2104404, leaf_abstract_class_true: D003925
leaf_abstract_id: 30203363, leaf_abstract_class_true: D003929
leaf_abstract_id: 30579024, leaf_abstract_class_true: D048909
leaf_abstract_id: 22184988, leaf_abstract_class_true: D003930
leaf_abstract_id: 7271401, leaf_abstract_class_true: D003928
leaf_abstract_id: 18222304, leaf_abstract_class_true: D003930
leaf_abstract_id: 24508179, leaf_abstract_class_true: D003930
leaf_abstract_id: 19449758, leaf_abstract_class_true: D003920
leaf_abstract_id: 10051779, leaf_abstract_class_true: D003928
leaf_abstract_id: 1992657, leaf_abstract_class_true: D003928
leaf_abstract_id: 11886320, leaf_abstract_class_true: D003930
leaf_abstract_id: 29695169, leaf_abstract_class_true: D011236
leaf_abstract_id: 27865313, leaf_abstract_class_true: D017719
leaf_abstrac

leaf_abstract_id: 17418687, leaf_abstract_class_true: D003928
leaf_abstract_id: 12560879, leaf_abstract_class_true: D003928
leaf_abstract_id: 9297131, leaf_abstract_class_true: D003928
leaf_abstract_id: 23717464, leaf_abstract_class_true: D048909
leaf_abstract_id: 22477719, leaf_abstract_class_true: D003924
leaf_abstract_id: 19339356, leaf_abstract_class_true: D003920
leaf_abstract_id: 7230683, leaf_abstract_class_true: D048909
leaf_abstract_id: 29804166, leaf_abstract_class_true: D003924
leaf_abstract_id: 17017195, leaf_abstract_class_true: D003930
leaf_abstract_id: 16059569, leaf_abstract_class_true: D003930
leaf_abstract_id: 20465104, leaf_abstract_class_true: D003925
leaf_abstract_id: 3336599, leaf_abstract_class_true: D048909
leaf_abstract_id: 14595983, leaf_abstract_class_true: D017719
leaf_abstract_id: 16860024, leaf_abstract_class_true: D003920
leaf_abstract_id: 22977274, leaf_abstract_class_true: D048909
leaf_abstract_id: 11007394, leaf_abstract_class_true: D003929
leaf_abstra

leaf_abstract_id: 28532776, leaf_abstract_class_true: D003920
leaf_abstract_id: 17052791, leaf_abstract_class_true: D003925
leaf_abstract_id: 9887607, leaf_abstract_class_true: D048909
leaf_abstract_id: 26865800, leaf_abstract_class_true: D003930
leaf_abstract_id: 3485900, leaf_abstract_class_true: D003929
leaf_abstract_id: 3323491, leaf_abstract_class_true: D003928
leaf_abstract_id: 9240935, leaf_abstract_class_true: D003928
leaf_abstract_id: 17391152, leaf_abstract_class_true: D048909
leaf_abstract_id: 25355264, leaf_abstract_class_true: D003928
leaf_abstract_id: 16329060, leaf_abstract_class_true: D003930
leaf_abstract_id: 17950788, leaf_abstract_class_true: D003925
leaf_abstract_id: 9692282, leaf_abstract_class_true: D003920
leaf_abstract_id: 28204913, leaf_abstract_class_true: D003928
leaf_abstract_id: 18552389, leaf_abstract_class_true: D003930
leaf_abstract_id: 23537336, leaf_abstract_class_true: D003930
leaf_abstract_id: 1495506, leaf_abstract_class_true: D003929
leaf_abstract_

leaf_abstract_id: 15800558, leaf_abstract_class_true: D003920
leaf_abstract_id: 7094299, leaf_abstract_class_true: D003920
leaf_abstract_id: 27355980, leaf_abstract_class_true: D016640
leaf_abstract_id: 24220476, leaf_abstract_class_true: D003922
leaf_abstract_id: 17318092, leaf_abstract_class_true: D003930
leaf_abstract_id: 21450564, leaf_abstract_class_true: D011236
leaf_abstract_id: 25783330, leaf_abstract_class_true: D014929
leaf_abstract_id: 30419893, leaf_abstract_class_true: D003924
leaf_abstract_id: 26810129, leaf_abstract_class_true: D017719
leaf_abstract_id: 1737284, leaf_abstract_class_true: D003925
leaf_abstract_id: 7146506, leaf_abstract_class_true: D003929
leaf_abstract_id: 22405662, leaf_abstract_class_true: D003925
leaf_abstract_id: 11140076, leaf_abstract_class_true: D003920
leaf_abstract_id: 22284167, leaf_abstract_class_true: D003920
leaf_abstract_id: 12390424, leaf_abstract_class_true: D048909
leaf_abstract_id: 22381342, leaf_abstract_class_true: D017719
leaf_abstra

leaf_abstract_id: 28803331, leaf_abstract_class_true: D003922
leaf_abstract_id: 3896667, leaf_abstract_class_true: D048909
leaf_abstract_id: 8874374, leaf_abstract_class_true: D016640
leaf_abstract_id: 20488402, leaf_abstract_class_true: D048909
leaf_abstract_id: 20336998, leaf_abstract_class_true: D003920
leaf_abstract_id: 20123288, leaf_abstract_class_true: D017719
leaf_abstract_id: 23008987, leaf_abstract_class_true: D003925
leaf_abstract_id: 24470558, leaf_abstract_class_true: D003922
leaf_abstract_id: 11211756, leaf_abstract_class_true: D003930
leaf_abstract_id: 9850485, leaf_abstract_class_true: D003920
leaf_abstract_id: 29902542, leaf_abstract_class_true: D017719
leaf_abstract_id: 18781934, leaf_abstract_class_true: D003925
leaf_abstract_id: 27824912, leaf_abstract_class_true: D003929
leaf_abstract_id: 11604790, leaf_abstract_class_true: D003920
leaf_abstract_id: 20875048, leaf_abstract_class_true: D017719
leaf_abstract_id: 21527381, leaf_abstract_class_true: D003930
leaf_abstra

leaf_abstract_id: 23787544, leaf_abstract_class_true: D003928
leaf_abstract_id: 23826777, leaf_abstract_class_true: D005320
leaf_abstract_id: 26021548, leaf_abstract_class_true: D016640
leaf_abstract_id: 14510863, leaf_abstract_class_true: D003923
leaf_abstract_id: 22525909, leaf_abstract_class_true: D016640
leaf_abstract_id: 12147144, leaf_abstract_class_true: D003922
leaf_abstract_id: 27475905, leaf_abstract_class_true: D016640
leaf_abstract_id: 21993190, leaf_abstract_class_true: D048909
leaf_abstract_id: 11872692, leaf_abstract_class_true: D003924
leaf_abstract_id: 9171256, leaf_abstract_class_true: D003922
leaf_abstract_id: 22998993, leaf_abstract_class_true: D016883
leaf_abstract_id: 15855572, leaf_abstract_class_true: D003924
leaf_abstract_id: 15989984, leaf_abstract_class_true: D016640
leaf_abstract_id: 23139349, leaf_abstract_class_true: D003922
leaf_abstract_id: 24268212, leaf_abstract_class_true: D003924
leaf_abstract_id: 14692373, leaf_abstract_class_true: D003924
leaf_abst

leaf_abstract_id: 20842885, leaf_abstract_class_true: D003924
leaf_abstract_id: 25970163, leaf_abstract_class_true: D003920
leaf_abstract_id: 29940126, leaf_abstract_class_true: D003924
leaf_abstract_id: 18538098, leaf_abstract_class_true: D003928
leaf_abstract_id: 21067267, leaf_abstract_class_true: D003924
leaf_abstract_id: 30818773, leaf_abstract_class_true: D003924
leaf_abstract_id: 26687713, leaf_abstract_class_true: D005320
leaf_abstract_id: 1511579, leaf_abstract_class_true: D003920
leaf_abstract_id: 28748705, leaf_abstract_class_true: D003920
leaf_abstract_id: 16426695, leaf_abstract_class_true: D016640
leaf_abstract_id: 14693722, leaf_abstract_class_true: D003920
leaf_abstract_id: 12879789, leaf_abstract_class_true: D003924
leaf_abstract_id: 8546639, leaf_abstract_class_true: D016640
leaf_abstract_id: 17515702, leaf_abstract_class_true: D016883
leaf_abstract_id: 29648934, leaf_abstract_class_true: D011236
leaf_abstract_id: 18715198, leaf_abstract_class_true: D003924
leaf_abstr

leaf_abstract_id: 25302493, leaf_abstract_class_true: D016640
leaf_abstract_id: 19614942, leaf_abstract_class_true: D003924
leaf_abstract_id: 1190932, leaf_abstract_class_true: D048909
leaf_abstract_id: 12642925, leaf_abstract_class_true: D003922
leaf_abstract_id: 11416678, leaf_abstract_class_true: D048909
leaf_abstract_id: 6373280, leaf_abstract_class_true: D003925
leaf_abstract_id: 28599399, leaf_abstract_class_true: D003929
leaf_abstract_id: 19813381, leaf_abstract_class_true: D003924
leaf_abstract_id: 27430714, leaf_abstract_class_true: D003922
leaf_abstract_id: 20369624, leaf_abstract_class_true: D003920
leaf_abstract_id: 28974211, leaf_abstract_class_true: D003930
leaf_abstract_id: 17517445, leaf_abstract_class_true: D003924
leaf_abstract_id: 2000180, leaf_abstract_class_true: D003924
leaf_abstract_id: 26922436, leaf_abstract_class_true: D048909
leaf_abstract_id: 21185471, leaf_abstract_class_true: D016640
leaf_abstract_id: 26186809, leaf_abstract_class_true: D003924
leaf_abstra

leaf_abstract_id: 7297969, leaf_abstract_class_true: D048909
leaf_abstract_id: 19570342, leaf_abstract_class_true: D016883
leaf_abstract_id: 15776861, leaf_abstract_class_true: D016640
leaf_abstract_id: 19077324, leaf_abstract_class_true: D016640
leaf_abstract_id: 27917777, leaf_abstract_class_true: D016640
leaf_abstract_id: 10880893, leaf_abstract_class_true: D003924
leaf_abstract_id: 29255311, leaf_abstract_class_true: D005320
leaf_abstract_id: 22413808, leaf_abstract_class_true: D003924
leaf_abstract_id: 28526014, leaf_abstract_class_true: D003924
leaf_abstract_id: 16219059, leaf_abstract_class_true: D048909
leaf_abstract_id: 24518176, leaf_abstract_class_true: D003924
leaf_abstract_id: 18339441, leaf_abstract_class_true: D003925
leaf_abstract_id: 11205931, leaf_abstract_class_true: D011236
leaf_abstract_id: 30092235, leaf_abstract_class_true: D016640
leaf_abstract_id: 28664545, leaf_abstract_class_true: D016883
leaf_abstract_id: 18446108, leaf_abstract_class_true: D016883
leaf_abst

leaf_abstract_id: 28396903, leaf_abstract_class_true: D058065
leaf_abstract_id: 16369198, leaf_abstract_class_true: D016640
leaf_abstract_id: 20036433, leaf_abstract_class_true: D003924
leaf_abstract_id: 28356319, leaf_abstract_class_true: D003922
leaf_abstract_id: 22826102, leaf_abstract_class_true: D003921
leaf_abstract_id: 22842963, leaf_abstract_class_true: D016883
leaf_abstract_id: 12858126, leaf_abstract_class_true: D003925
leaf_abstract_id: 25754876, leaf_abstract_class_true: D003924
leaf_abstract_id: 8236579, leaf_abstract_class_true: D003922
leaf_abstract_id: 28342363, leaf_abstract_class_true: D003920
leaf_abstract_id: 26100024, leaf_abstract_class_true: D016640
leaf_abstract_id: 23039989, leaf_abstract_class_true: D003922
leaf_abstract_id: 7011086, leaf_abstract_class_true: D003928
leaf_abstract_id: 21096241, leaf_abstract_class_true: D003922
leaf_abstract_id: 1496894, leaf_abstract_class_true: D003920
leaf_abstract_id: 19263394, leaf_abstract_class_true: D003920
leaf_abstra

leaf_abstract_id: 3783181, leaf_abstract_class_true: D003929
leaf_abstract_id: 22430594, leaf_abstract_class_true: D003920
leaf_abstract_id: 3215159, leaf_abstract_class_true: D003922
leaf_abstract_id: 21415060, leaf_abstract_class_true: D003930
leaf_abstract_id: 7894398, leaf_abstract_class_true: D017719
leaf_abstract_id: 19209233, leaf_abstract_class_true: D048909
leaf_abstract_id: 19940530, leaf_abstract_class_true: D003930
leaf_abstract_id: 25603665, leaf_abstract_class_true: D003925
leaf_abstract_id: 21409311, leaf_abstract_class_true: D003924
leaf_abstract_id: 8554204, leaf_abstract_class_true: D003924
leaf_abstract_id: 23362169, leaf_abstract_class_true: D048909
leaf_abstract_id: 15134111, leaf_abstract_class_true: D003930
leaf_abstract_id: 30102422, leaf_abstract_class_true: D017719
leaf_abstract_id: 21331960, leaf_abstract_class_true: D003930
leaf_abstract_id: 27341846, leaf_abstract_class_true: D003929
leaf_abstract_id: 10553586, leaf_abstract_class_true: D003929
leaf_abstrac

leaf_abstract_id: 3784406, leaf_abstract_class_true: D003930
leaf_abstract_id: 3916369, leaf_abstract_class_true: D048909
leaf_abstract_id: 22330965, leaf_abstract_class_true: D003930
leaf_abstract_id: 8813845, leaf_abstract_class_true: D003929
leaf_abstract_id: 15384839, leaf_abstract_class_true: D003930
leaf_abstract_id: 27337438, leaf_abstract_class_true: D003929
leaf_abstract_id: 19838113, leaf_abstract_class_true: D003928
leaf_abstract_id: 12595499, leaf_abstract_class_true: D003928
leaf_abstract_id: 2670725, leaf_abstract_class_true: D003928
leaf_abstract_id: 2812329, leaf_abstract_class_true: D003929
leaf_abstract_id: 19300353, leaf_abstract_class_true: D003922
leaf_abstract_id: 26576787, leaf_abstract_class_true: D048909
leaf_abstract_id: 23252999, leaf_abstract_class_true: D016883
leaf_abstract_id: 30409338, leaf_abstract_class_true: D003930
leaf_abstract_id: 8953931, leaf_abstract_class_true: D048909
leaf_abstract_id: 8305095, leaf_abstract_class_true: D003924
leaf_abstract_i

leaf_abstract_id: 8809630, leaf_abstract_class_true: D003920
leaf_abstract_id: 3531959, leaf_abstract_class_true: D003930
leaf_abstract_id: 23032497, leaf_abstract_class_true: D003929
leaf_abstract_id: 19603150, leaf_abstract_class_true: D003925
leaf_abstract_id: 15517763, leaf_abstract_class_true: D003928
leaf_abstract_id: 21251855, leaf_abstract_class_true: D017719
leaf_abstract_id: 26438201, leaf_abstract_class_true: D048909
leaf_abstract_id: 2709042, leaf_abstract_class_true: D003929
leaf_abstract_id: 11436189, leaf_abstract_class_true: D003922
leaf_abstract_id: 21684608, leaf_abstract_class_true: D003930
leaf_abstract_id: 25042298, leaf_abstract_class_true: D003929
leaf_abstract_id: 15330298, leaf_abstract_class_true: D048909
leaf_abstract_id: 23222389, leaf_abstract_class_true: D003930
leaf_abstract_id: 24257492, leaf_abstract_class_true: D003929
leaf_abstract_id: 29216336, leaf_abstract_class_true: D003928
leaf_abstract_id: 12579102, leaf_abstract_class_true: D048909
leaf_abstra

leaf_abstract_id: 24475680, leaf_abstract_class_true: D011236
leaf_abstract_id: 22069128, leaf_abstract_class_true: D011236
leaf_abstract_id: 16186388, leaf_abstract_class_true: D003922
leaf_abstract_id: 15138754, leaf_abstract_class_true: D048909
leaf_abstract_id: 16290992, leaf_abstract_class_true: D048909
leaf_abstract_id: 28800055, leaf_abstract_class_true: D003920
leaf_abstract_id: 29306895, leaf_abstract_class_true: D003920
leaf_abstract_id: 6863116, leaf_abstract_class_true: D003922
leaf_abstract_id: 2172053, leaf_abstract_class_true: D003929
leaf_abstract_id: 27311590, leaf_abstract_class_true: D003929
leaf_abstract_id: 2223706, leaf_abstract_class_true: D003930
leaf_abstract_id: 1760971, leaf_abstract_class_true: D003930
leaf_abstract_id: 3397762, leaf_abstract_class_true: D003925
leaf_abstract_id: 15035255, leaf_abstract_class_true: D016883
leaf_abstract_id: 1446578, leaf_abstract_class_true: D003922
leaf_abstract_id: 9829487, leaf_abstract_class_true: D003928
leaf_abstract_i

leaf_abstract_id: 26836393, leaf_abstract_class_true: D003920
leaf_abstract_id: 28948573, leaf_abstract_class_true: D003924
leaf_abstract_id: 30294954, leaf_abstract_class_true: D003928
leaf_abstract_id: 15250030, leaf_abstract_class_true: D003925
leaf_abstract_id: 27517947, leaf_abstract_class_true: D011236
leaf_abstract_id: 30246649, leaf_abstract_class_true: D003924
leaf_abstract_id: 29546070, leaf_abstract_class_true: D003924
leaf_abstract_id: 23447123, leaf_abstract_class_true: D003922
leaf_abstract_id: 30411704, leaf_abstract_class_true: D003924
leaf_abstract_id: 15049931, leaf_abstract_class_true: D003925
leaf_abstract_id: 27068777, leaf_abstract_class_true: D003930
leaf_abstract_id: 20975990, leaf_abstract_class_true: D003928
leaf_abstract_id: 4083082, leaf_abstract_class_true: D003928
leaf_abstract_id: 18042080, leaf_abstract_class_true: D003928
leaf_abstract_id: 28886990, leaf_abstract_class_true: D003924
leaf_abstract_id: 8734860, leaf_abstract_class_true: D003925
leaf_abstr

leaf_abstract_id: 7820950, leaf_abstract_class_true: D003921
leaf_abstract_id: 15943236, leaf_abstract_class_true: D003921
leaf_abstract_id: 1223814, leaf_abstract_class_true: D003921
leaf_abstract_id: 3796834, leaf_abstract_class_true: D003929
leaf_abstract_id: 2141449, leaf_abstract_class_true: D003921
leaf_abstract_id: 1907900, leaf_abstract_class_true: D003921
leaf_abstract_id: 20138415, leaf_abstract_class_true: D003928
leaf_abstract_id: 12932443, leaf_abstract_class_true: D003929
leaf_abstract_id: 10404280, leaf_abstract_class_true: D003921
leaf_abstract_id: 27434350, leaf_abstract_class_true: D003921
leaf_abstract_id: 7274588, leaf_abstract_class_true: D003928
leaf_abstract_id: 23975224, leaf_abstract_class_true: D016640
leaf_abstract_id: 8462892, leaf_abstract_class_true: D003921
leaf_abstract_id: 25644797, leaf_abstract_class_true: D003921
leaf_abstract_id: 23075244, leaf_abstract_class_true: D016640
leaf_abstract_id: 2610051, leaf_abstract_class_true: D003928
leaf_abstract_id

leaf_abstract_id: 1858864, leaf_abstract_class_true: D003921
leaf_abstract_id: 8596482, leaf_abstract_class_true: D003921
leaf_abstract_id: 3227958, leaf_abstract_class_true: D003925
leaf_abstract_id: 9761505, leaf_abstract_class_true: D003922
leaf_abstract_id: 8690062, leaf_abstract_class_true: D003929
leaf_abstract_id: 3277423, leaf_abstract_class_true: D003924
leaf_abstract_id: 8644123, leaf_abstract_class_true: D003921
leaf_abstract_id: 1847599, leaf_abstract_class_true: D003921
leaf_abstract_id: 1516758, leaf_abstract_class_true: D003921
leaf_abstract_id: 29486855, leaf_abstract_class_true: D016640
leaf_abstract_id: 28868764, leaf_abstract_class_true: D011236
leaf_abstract_id: 22291696, leaf_abstract_class_true: D003929
leaf_abstract_id: 21087392, leaf_abstract_class_true: D003921
leaf_abstract_id: 12964645, leaf_abstract_class_true: D048909
leaf_abstract_id: 23444823, leaf_abstract_class_true: D003922
leaf_abstract_id: 22881223, leaf_abstract_class_true: D011236
leaf_abstract_id:

leaf_abstract_id: 15505985, leaf_abstract_class_true: D016640
leaf_abstract_id: 23921142, leaf_abstract_class_true: D016640
leaf_abstract_id: 28275101, leaf_abstract_class_true: D016640
leaf_abstract_id: 20473242, leaf_abstract_class_true: D016883
leaf_abstract_id: 28341844, leaf_abstract_class_true: D011236
leaf_abstract_id: 25014388, leaf_abstract_class_true: D011236
leaf_abstract_id: 8510518, leaf_abstract_class_true: D003920
leaf_abstract_id: 16180794, leaf_abstract_class_true: D005320
leaf_abstract_id: 11744918, leaf_abstract_class_true: D016640
leaf_abstract_id: 12683646, leaf_abstract_class_true: D016640
leaf_abstract_id: 22653590, leaf_abstract_class_true: D016640
leaf_abstract_id: 3286168, leaf_abstract_class_true: D003922
leaf_abstract_id: 25675133, leaf_abstract_class_true: D003920
leaf_abstract_id: 26018222, leaf_abstract_class_true: D016640
leaf_abstract_id: 18787378, leaf_abstract_class_true: D016640
leaf_abstract_id: 25132069, leaf_abstract_class_true: D003924
leaf_abstr

leaf_abstract_id: 9155315, leaf_abstract_class_true: D048909
leaf_abstract_id: 19243283, leaf_abstract_class_true: D003924
leaf_abstract_id: 21741059, leaf_abstract_class_true: D016640
leaf_abstract_id: 21139125, leaf_abstract_class_true: D003924
leaf_abstract_id: 24894027, leaf_abstract_class_true: D016640
leaf_abstract_id: 17510694, leaf_abstract_class_true: D016640
leaf_abstract_id: 15756801, leaf_abstract_class_true: D016640
leaf_abstract_id: 12706274, leaf_abstract_class_true: D016640
leaf_abstract_id: 20953862, leaf_abstract_class_true: D016640
leaf_abstract_id: 22445568, leaf_abstract_class_true: D016640
leaf_abstract_id: 22505499, leaf_abstract_class_true: D016640
leaf_abstract_id: 19389819, leaf_abstract_class_true: D016640
leaf_abstract_id: 24183482, leaf_abstract_class_true: D016640
leaf_abstract_id: 27993398, leaf_abstract_class_true: D016640
leaf_abstract_id: 21470081, leaf_abstract_class_true: D016640
leaf_abstract_id: 3189500, leaf_abstract_class_true: D003920
leaf_abstr

leaf_abstract_id: 29468749, leaf_abstract_class_true: D016640
leaf_abstract_id: 14578229, leaf_abstract_class_true: D016640
leaf_abstract_id: 17654189, leaf_abstract_class_true: D005320
leaf_abstract_id: 22093485, leaf_abstract_class_true: D016640
leaf_abstract_id: 333276, leaf_abstract_class_true: D003922
leaf_abstract_id: 1877587, leaf_abstract_class_true: D048909
leaf_abstract_id: 24160240, leaf_abstract_class_true: D003920
leaf_abstract_id: 26824237, leaf_abstract_class_true: D016640
leaf_abstract_id: 25303998, leaf_abstract_class_true: D016640
leaf_abstract_id: 25203150, leaf_abstract_class_true: D016640
leaf_abstract_id: 2371019, leaf_abstract_class_true: D005320
leaf_abstract_id: 18287897, leaf_abstract_class_true: D005320
leaf_abstract_id: 22332109, leaf_abstract_class_true: D016640
leaf_abstract_id: 18050020, leaf_abstract_class_true: D016640
leaf_abstract_id: 24034451, leaf_abstract_class_true: D016640
leaf_abstract_id: 12623478, leaf_abstract_class_true: D005320
leaf_abstrac

leaf_abstract_id: 9364578, leaf_abstract_class_true: D003929
leaf_abstract_id: 23843215, leaf_abstract_class_true: D003928
leaf_abstract_id: 27907230, leaf_abstract_class_true: D003920
leaf_abstract_id: 18331228, leaf_abstract_class_true: D048909
leaf_abstract_id: 24752777, leaf_abstract_class_true: D003920
leaf_abstract_id: 24126227, leaf_abstract_class_true: D003924
leaf_abstract_id: 16530919, leaf_abstract_class_true: D005320
leaf_abstract_id: 28714601, leaf_abstract_class_true: D003920
leaf_abstract_id: 28413863, leaf_abstract_class_true: D003928
leaf_abstract_id: 16515530, leaf_abstract_class_true: D003922
leaf_abstract_id: 25894269, leaf_abstract_class_true: D003920
leaf_abstract_id: 18196299, leaf_abstract_class_true: D048909
leaf_abstract_id: 19080039, leaf_abstract_class_true: D003920
leaf_abstract_id: 24097992, leaf_abstract_class_true: D003922
leaf_abstract_id: 19900593, leaf_abstract_class_true: D003920
leaf_abstract_id: 21557709, leaf_abstract_class_true: D003924
leaf_abst

leaf_abstract_id: 3898789, leaf_abstract_class_true: D003920
leaf_abstract_id: 19469035, leaf_abstract_class_true: D003922
leaf_abstract_id: 17978753, leaf_abstract_class_true: D048909
leaf_abstract_id: 25407384, leaf_abstract_class_true: D003921
leaf_abstract_id: 24955140, leaf_abstract_class_true: D048909
leaf_abstract_id: 23135222, leaf_abstract_class_true: D003928
leaf_abstract_id: 19485887, leaf_abstract_class_true: D003928
leaf_abstract_id: 16037302, leaf_abstract_class_true: D003928
leaf_abstract_id: 27068830, leaf_abstract_class_true: D003928
leaf_abstract_id: 23805237, leaf_abstract_class_true: D003924
leaf_abstract_id: 23981649, leaf_abstract_class_true: D014929
leaf_abstract_id: 9356804, leaf_abstract_class_true: D016883
leaf_abstract_id: 19195968, leaf_abstract_class_true: D003928
leaf_abstract_id: 10615959, leaf_abstract_class_true: D003922
leaf_abstract_id: 27694007, leaf_abstract_class_true: D003928
leaf_abstract_id: 18292537, leaf_abstract_class_true: D003922
leaf_abstr

leaf_abstract_id: 9255172, leaf_abstract_class_true: D017719
leaf_abstract_id: 20951838, leaf_abstract_class_true: D048909
leaf_abstract_id: 29446740, leaf_abstract_class_true: D003928
leaf_abstract_id: 10584248, leaf_abstract_class_true: D003929
leaf_abstract_id: 31006638, leaf_abstract_class_true: D017719
leaf_abstract_id: 30297790, leaf_abstract_class_true: D003928
leaf_abstract_id: 9075856, leaf_abstract_class_true: D003922
leaf_abstract_id: 29532281, leaf_abstract_class_true: D003928
leaf_abstract_id: 7607624, leaf_abstract_class_true: D003928
leaf_abstract_id: 23768975, leaf_abstract_class_true: D003929
leaf_abstract_id: 16608049, leaf_abstract_class_true: D003929
leaf_abstract_id: 27894749, leaf_abstract_class_true: D017719
leaf_abstract_id: 22432804, leaf_abstract_class_true: D003922
leaf_abstract_id: 18090675, leaf_abstract_class_true: D003928
leaf_abstract_id: 18241617, leaf_abstract_class_true: D003920
leaf_abstract_id: 24011579, leaf_abstract_class_true: D003928
leaf_abstra

leaf_abstract_id: 19797880, leaf_abstract_class_true: D003929
leaf_abstract_id: 6801211, leaf_abstract_class_true: D003929
leaf_abstract_id: 1628763, leaf_abstract_class_true: D003922
leaf_abstract_id: 23354673, leaf_abstract_class_true: D048909
leaf_abstract_id: 21511679, leaf_abstract_class_true: D003929
leaf_abstract_id: 29899983, leaf_abstract_class_true: D003930
leaf_abstract_id: 28484955, leaf_abstract_class_true: D003930
leaf_abstract_id: 24663066, leaf_abstract_class_true: D003930
leaf_abstract_id: 28745964, leaf_abstract_class_true: D003930
leaf_abstract_id: 7874608, leaf_abstract_class_true: D003929
leaf_abstract_id: 1616217, leaf_abstract_class_true: D003930
leaf_abstract_id: 2607240, leaf_abstract_class_true: D003930
leaf_abstract_id: 17609322, leaf_abstract_class_true: D017719
leaf_abstract_id: 28936764, leaf_abstract_class_true: D017719
leaf_abstract_id: 25384916, leaf_abstract_class_true: D017719
leaf_abstract_id: 12704530, leaf_abstract_class_true: D017719
leaf_abstract

leaf_abstract_id: 22727134, leaf_abstract_class_true: D017719
leaf_abstract_id: 28523719, leaf_abstract_class_true: D003924
leaf_abstract_id: 30052276, leaf_abstract_class_true: D003930
leaf_abstract_id: 18579465, leaf_abstract_class_true: D003922
leaf_abstract_id: 15831258, leaf_abstract_class_true: D017719
leaf_abstract_id: 22762366, leaf_abstract_class_true: D048909
leaf_abstract_id: 16885645, leaf_abstract_class_true: D017719
leaf_abstract_id: 25354872, leaf_abstract_class_true: D003929
leaf_abstract_id: 29802073, leaf_abstract_class_true: D017719
leaf_abstract_id: 20966453, leaf_abstract_class_true: D017719
leaf_abstract_id: 26984197, leaf_abstract_class_true: D003929
leaf_abstract_id: 16401701, leaf_abstract_class_true: D003920
leaf_abstract_id: 6740655, leaf_abstract_class_true: D003930
leaf_abstract_id: 22450499, leaf_abstract_class_true: D003920
leaf_abstract_id: 23511041, leaf_abstract_class_true: D017719
leaf_abstract_id: 16256902, leaf_abstract_class_true: D003929
leaf_abst

leaf_abstract_id: 15083337, leaf_abstract_class_true: D003922
leaf_abstract_id: 208139, leaf_abstract_class_true: D048909
leaf_abstract_id: 2449302, leaf_abstract_class_true: D003922
leaf_abstract_id: 2696847, leaf_abstract_class_true: D003921
leaf_abstract_id: 23739800, leaf_abstract_class_true: D048909
leaf_abstract_id: 27827300, leaf_abstract_class_true: D017719
leaf_abstract_id: 6712371, leaf_abstract_class_true: D003928
leaf_abstract_id: 2376239, leaf_abstract_class_true: D003920
leaf_abstract_id: 11842991, leaf_abstract_class_true: D048909
leaf_abstract_id: 16358955, leaf_abstract_class_true: D003924
leaf_abstract_id: 26293616, leaf_abstract_class_true: D003924
leaf_abstract_id: 9112162, leaf_abstract_class_true: D003930
leaf_abstract_id: 9088772, leaf_abstract_class_true: D016640
leaf_abstract_id: 21960185, leaf_abstract_class_true: D003924
leaf_abstract_id: 3447812, leaf_abstract_class_true: D003922
leaf_abstract_id: 2203546, leaf_abstract_class_true: D003922
leaf_abstract_id: 

leaf_abstract_id: 7744224, leaf_abstract_class_true: D003922
leaf_abstract_id: 17331548, leaf_abstract_class_true: D003928
leaf_abstract_id: 14988273, leaf_abstract_class_true: D003924
leaf_abstract_id: 28359252, leaf_abstract_class_true: D003928
leaf_abstract_id: 29791073, leaf_abstract_class_true: D003922
leaf_abstract_id: 20724178, leaf_abstract_class_true: D003922
leaf_abstract_id: 1869841, leaf_abstract_class_true: D048909
leaf_abstract_id: 20170975, leaf_abstract_class_true: D003922
leaf_abstract_id: 20931529, leaf_abstract_class_true: D003922
leaf_abstract_id: 21956890, leaf_abstract_class_true: D003928
leaf_abstract_id: 1542564, leaf_abstract_class_true: D014929
leaf_abstract_id: 20416221, leaf_abstract_class_true: D003928
leaf_abstract_id: 18090667, leaf_abstract_class_true: D003928
leaf_abstract_id: 18178847, leaf_abstract_class_true: D003922
leaf_abstract_id: 19117022, leaf_abstract_class_true: D003924
leaf_abstract_id: 30229838, leaf_abstract_class_true: D003928
leaf_abstra

leaf_abstract_id: 30314265, leaf_abstract_class_true: D003928
leaf_abstract_id: 27343467, leaf_abstract_class_true: D048909
leaf_abstract_id: 30101366, leaf_abstract_class_true: D003928
leaf_abstract_id: 22728670, leaf_abstract_class_true: D003921
leaf_abstract_id: 2198204, leaf_abstract_class_true: D003930
leaf_abstract_id: 24598200, leaf_abstract_class_true: D003924
leaf_abstract_id: 8609230, leaf_abstract_class_true: D003929
leaf_abstract_id: 29229684, leaf_abstract_class_true: D003928
leaf_abstract_id: 28770830, leaf_abstract_class_true: D058065
leaf_abstract_id: 28089735, leaf_abstract_class_true: D003930
leaf_abstract_id: 28462946, leaf_abstract_class_true: D003921
leaf_abstract_id: 22332899, leaf_abstract_class_true: D003921
leaf_abstract_id: 24610811, leaf_abstract_class_true: D003922
leaf_abstract_id: 1325009, leaf_abstract_class_true: D003928
leaf_abstract_id: 2707043, leaf_abstract_class_true: D003921
leaf_abstract_id: 22439790, leaf_abstract_class_true: D003929
leaf_abstrac

leaf_abstract_id: 10749857, leaf_abstract_class_true: D003921
leaf_abstract_id: 29996768, leaf_abstract_class_true: D003921
leaf_abstract_id: 30135145, leaf_abstract_class_true: D003929
leaf_abstract_id: 29549522, leaf_abstract_class_true: D003924
leaf_abstract_id: 18220624, leaf_abstract_class_true: D003928
leaf_abstract_id: 25765092, leaf_abstract_class_true: D003921
leaf_abstract_id: 12399437, leaf_abstract_class_true: D003920
leaf_abstract_id: 24023648, leaf_abstract_class_true: D003921
leaf_abstract_id: 27077448, leaf_abstract_class_true: D003929
leaf_abstract_id: 16530517, leaf_abstract_class_true: D003929
leaf_abstract_id: 7590101, leaf_abstract_class_true: D003924
leaf_abstract_id: 16260351, leaf_abstract_class_true: D003921
leaf_abstract_id: 21795715, leaf_abstract_class_true: D003928
leaf_abstract_id: 15057667, leaf_abstract_class_true: D003921
leaf_abstract_id: 22492036, leaf_abstract_class_true: D003921
leaf_abstract_id: 25573030, leaf_abstract_class_true: D003928
leaf_abst

leaf_abstract_id: 8044701, leaf_abstract_class_true: D003922
leaf_abstract_id: 30095296, leaf_abstract_class_true: D011236
leaf_abstract_id: 6381010, leaf_abstract_class_true: D003922
leaf_abstract_id: 16756764, leaf_abstract_class_true: D048909
leaf_abstract_id: 10794604, leaf_abstract_class_true: D003921
leaf_abstract_id: 18828606, leaf_abstract_class_true: D003921
leaf_abstract_id: 12171997, leaf_abstract_class_true: D003928
leaf_abstract_id: 15686778, leaf_abstract_class_true: D003925
leaf_abstract_id: 971789, leaf_abstract_class_true: D003930
leaf_abstract_id: 17706592, leaf_abstract_class_true: D003920
leaf_abstract_id: 1289132, leaf_abstract_class_true: D003930
leaf_abstract_id: 7949331, leaf_abstract_class_true: D003920
leaf_abstract_id: 9144557, leaf_abstract_class_true: D003921
leaf_abstract_id: 1955501, leaf_abstract_class_true: D003922
leaf_abstract_id: 27924974, leaf_abstract_class_true: D003921
leaf_abstract_id: 29880646, leaf_abstract_class_true: D003921
leaf_abstract_id

leaf_abstract_id: 29372795, leaf_abstract_class_true: D003924
leaf_abstract_id: 23245702, leaf_abstract_class_true: D003920
leaf_abstract_id: 15629670, leaf_abstract_class_true: D016640
leaf_abstract_id: 24379526, leaf_abstract_class_true: D003930
leaf_abstract_id: 18551113, leaf_abstract_class_true: D003924
leaf_abstract_id: 27307508, leaf_abstract_class_true: D003924
leaf_abstract_id: 7145879, leaf_abstract_class_true: D003920
leaf_abstract_id: 19705975, leaf_abstract_class_true: D003928
leaf_abstract_id: 18787525, leaf_abstract_class_true: D003920
leaf_abstract_id: 29190700, leaf_abstract_class_true: D048909
leaf_abstract_id: 26102344, leaf_abstract_class_true: D016640
leaf_abstract_id: 8796801, leaf_abstract_class_true: D005320
leaf_abstract_id: 2554128, leaf_abstract_class_true: D003922
leaf_abstract_id: 27896902, leaf_abstract_class_true: D016883
leaf_abstract_id: 21357362, leaf_abstract_class_true: D003920
leaf_abstract_id: 29422049, leaf_abstract_class_true: D003924
leaf_abstra

leaf_abstract_id: 26452305, leaf_abstract_class_true: D003922
leaf_abstract_id: 20523293, leaf_abstract_class_true: D048909
leaf_abstract_id: 12692571, leaf_abstract_class_true: D003928
leaf_abstract_id: 8908386, leaf_abstract_class_true: D003922
leaf_abstract_id: 28201997, leaf_abstract_class_true: D003928
leaf_abstract_id: 24617042, leaf_abstract_class_true: D003922
leaf_abstract_id: 23161552, leaf_abstract_class_true: D003928
leaf_abstract_id: 17142210, leaf_abstract_class_true: D048909
leaf_abstract_id: 15696444, leaf_abstract_class_true: D003928
leaf_abstract_id: 15220190, leaf_abstract_class_true: D003922
leaf_abstract_id: 21211789, leaf_abstract_class_true: D048909
leaf_abstract_id: 8585935, leaf_abstract_class_true: D003930
leaf_abstract_id: 31408136, leaf_abstract_class_true: D003922
leaf_abstract_id: 2149688, leaf_abstract_class_true: D003922
leaf_abstract_id: 25579717, leaf_abstract_class_true: D048909
leaf_abstract_id: 20504094, leaf_abstract_class_true: D016640
leaf_abstra

leaf_abstract_id: 22617359, leaf_abstract_class_true: D003922
leaf_abstract_id: 21716731, leaf_abstract_class_true: D003920
leaf_abstract_id: 9878081, leaf_abstract_class_true: D003922
leaf_abstract_id: 10395228, leaf_abstract_class_true: D003922
leaf_abstract_id: 24190679, leaf_abstract_class_true: D003922
leaf_abstract_id: 10670825, leaf_abstract_class_true: D003924
leaf_abstract_id: 7678183, leaf_abstract_class_true: D003921
leaf_abstract_id: 7558135, leaf_abstract_class_true: D003922
leaf_abstract_id: 19845582, leaf_abstract_class_true: D003921
leaf_abstract_id: 26346164, leaf_abstract_class_true: D003928
leaf_abstract_id: 23803215, leaf_abstract_class_true: D003928
leaf_abstract_id: 16357311, leaf_abstract_class_true: D003924
leaf_abstract_id: 11479957, leaf_abstract_class_true: D016640
leaf_abstract_id: 8168209, leaf_abstract_class_true: D003921
leaf_abstract_id: 27696192, leaf_abstract_class_true: D003922
leaf_abstract_id: 16949391, leaf_abstract_class_true: D003920
leaf_abstrac

leaf_abstract_id: 19696699, leaf_abstract_class_true: D003930
leaf_abstract_id: 29590254, leaf_abstract_class_true: D003924
leaf_abstract_id: 3343542, leaf_abstract_class_true: D003920
leaf_abstract_id: 26813039, leaf_abstract_class_true: D003928
leaf_abstract_id: 28796143, leaf_abstract_class_true: D003930
leaf_abstract_id: 31714510, leaf_abstract_class_true: D003930
leaf_abstract_id: 3926565, leaf_abstract_class_true: D016883
leaf_abstract_id: 24026563, leaf_abstract_class_true: D003922
leaf_abstract_id: 28661068, leaf_abstract_class_true: D003924
leaf_abstract_id: 25546434, leaf_abstract_class_true: D003920
leaf_abstract_id: 1992766, leaf_abstract_class_true: D003929
leaf_abstract_id: 26899772, leaf_abstract_class_true: D003920
leaf_abstract_id: 26065854, leaf_abstract_class_true: D003930
leaf_abstract_id: 26895275, leaf_abstract_class_true: D003924
leaf_abstract_id: 12032195, leaf_abstract_class_true: D003928
leaf_abstract_id: 24258251, leaf_abstract_class_true: D003922
leaf_abstra

leaf_abstract_id: 24338577, leaf_abstract_class_true: D003930
leaf_abstract_id: 7007407, leaf_abstract_class_true: D003923
leaf_abstract_id: 15126994, leaf_abstract_class_true: D003924
leaf_abstract_id: 1588823, leaf_abstract_class_true: D003922
leaf_abstract_id: 12153747, leaf_abstract_class_true: D016640
leaf_abstract_id: 29511113, leaf_abstract_class_true: D011236
leaf_abstract_id: 21420034, leaf_abstract_class_true: D003928
leaf_abstract_id: 23073360, leaf_abstract_class_true: D011236
leaf_abstract_id: 7947554, leaf_abstract_class_true: D003930
leaf_abstract_id: 7975867, leaf_abstract_class_true: D003925
leaf_abstract_id: 30096354, leaf_abstract_class_true: D003930
leaf_abstract_id: 8846830, leaf_abstract_class_true: D003921
leaf_abstract_id: 15888973, leaf_abstract_class_true: D003928
leaf_abstract_id: 16750336, leaf_abstract_class_true: D016640
leaf_abstract_id: 29030692, leaf_abstract_class_true: D003930
leaf_abstract_id: 25783684, leaf_abstract_class_true: D048909
leaf_abstract

leaf_abstract_id: 30613010, leaf_abstract_class_true: D003929
leaf_abstract_id: 11398150, leaf_abstract_class_true: D016640
leaf_abstract_id: 16439034, leaf_abstract_class_true: D003922
leaf_abstract_id: 15332321, leaf_abstract_class_true: D003922
leaf_abstract_id: 9745421, leaf_abstract_class_true: D003924
leaf_abstract_id: 30577193, leaf_abstract_class_true: D003922
leaf_abstract_id: 14514604, leaf_abstract_class_true: D003924
leaf_abstract_id: 12433031, leaf_abstract_class_true: D016883
leaf_abstract_id: 24057294, leaf_abstract_class_true: D011236
leaf_abstract_id: 31642261, leaf_abstract_class_true: D016640
leaf_abstract_id: 29855712, leaf_abstract_class_true: D016640
leaf_abstract_id: 29873517, leaf_abstract_class_true: D003922
leaf_abstract_id: 15979893, leaf_abstract_class_true: D003921
leaf_abstract_id: 23183776, leaf_abstract_class_true: D003922
leaf_abstract_id: 28213010, leaf_abstract_class_true: D016640
leaf_abstract_id: 8037760, leaf_abstract_class_true: D003921
leaf_abstr

leaf_abstract_id: 11978690, leaf_abstract_class_true: D003922
leaf_abstract_id: 18227486, leaf_abstract_class_true: D003928
leaf_abstract_id: 12788800, leaf_abstract_class_true: D003924
leaf_abstract_id: 26490387, leaf_abstract_class_true: D005320
leaf_abstract_id: 19995483, leaf_abstract_class_true: D003922
leaf_abstract_id: 10446946, leaf_abstract_class_true: D003922
leaf_abstract_id: 2456231, leaf_abstract_class_true: D048909
leaf_abstract_id: 11579617, leaf_abstract_class_true: D016883
leaf_abstract_id: 28335529, leaf_abstract_class_true: D003924
leaf_abstract_id: 28776083, leaf_abstract_class_true: D058065
leaf_abstract_id: 7780057, leaf_abstract_class_true: D003928
leaf_abstract_id: 3521333, leaf_abstract_class_true: D003921
leaf_abstract_id: 1744558, leaf_abstract_class_true: D003921
leaf_abstract_id: 9439555, leaf_abstract_class_true: D003921
leaf_abstract_id: 7200671, leaf_abstract_class_true: D003930
leaf_abstract_id: 7835192, leaf_abstract_class_true: D003930
leaf_abstract_i

leaf_abstract_id: 30747310, leaf_abstract_class_true: D003921
leaf_abstract_id: 19860658, leaf_abstract_class_true: D017719
leaf_abstract_id: 10943720, leaf_abstract_class_true: D003929
leaf_abstract_id: 21062351, leaf_abstract_class_true: D003929
leaf_abstract_id: 28772216, leaf_abstract_class_true: D003929
leaf_abstract_id: 26739825, leaf_abstract_class_true: D003930
leaf_abstract_id: 8500860, leaf_abstract_class_true: D003921
leaf_abstract_id: 30213518, leaf_abstract_class_true: D003921
leaf_abstract_id: 10704693, leaf_abstract_class_true: D003921
leaf_abstract_id: 3937586, leaf_abstract_class_true: D003921
leaf_abstract_id: 14693616, leaf_abstract_class_true: D003921
leaf_abstract_id: 20456626, leaf_abstract_class_true: D003921
leaf_abstract_id: 9334651, leaf_abstract_class_true: D003921
leaf_abstract_id: 3438569, leaf_abstract_class_true: D003921
leaf_abstract_id: 21181398, leaf_abstract_class_true: D016640
leaf_abstract_id: 19556298, leaf_abstract_class_true: D003928
leaf_abstrac

leaf_abstract_id: 29361669, leaf_abstract_class_true: D003928
leaf_abstract_id: 28434974, leaf_abstract_class_true: D003924
leaf_abstract_id: 18216149, leaf_abstract_class_true: D003928
leaf_abstract_id: 6802115, leaf_abstract_class_true: D003921
leaf_abstract_id: 24374093, leaf_abstract_class_true: D003921
leaf_abstract_id: 16478771, leaf_abstract_class_true: D011236
leaf_abstract_id: 24008114, leaf_abstract_class_true: D003921
leaf_abstract_id: 3910515, leaf_abstract_class_true: D003921
leaf_abstract_id: 17508918, leaf_abstract_class_true: D003925
leaf_abstract_id: 10961714, leaf_abstract_class_true: D003921
leaf_abstract_id: 590651, leaf_abstract_class_true: D003921
leaf_abstract_id: 20975626, leaf_abstract_class_true: D003921
leaf_abstract_id: 27247947, leaf_abstract_class_true: D058065
leaf_abstract_id: 12604701, leaf_abstract_class_true: D003921
leaf_abstract_id: 2507230, leaf_abstract_class_true: D003930
leaf_abstract_id: 2973668, leaf_abstract_class_true: D003921
leaf_abstract_

leaf_abstract_id: 31151389, leaf_abstract_class_true: D003930
leaf_abstract_id: 27423690, leaf_abstract_class_true: D003929
leaf_abstract_id: 9844163, leaf_abstract_class_true: D017719
leaf_abstract_id: 26222147, leaf_abstract_class_true: D003925
leaf_abstract_id: 8772396, leaf_abstract_class_true: D003921
leaf_abstract_id: 8114473, leaf_abstract_class_true: D003930
leaf_abstract_id: 27723894, leaf_abstract_class_true: D003930
leaf_abstract_id: 2968350, leaf_abstract_class_true: D003928
leaf_abstract_id: 24240028, leaf_abstract_class_true: D003925
leaf_abstract_id: 3556280, leaf_abstract_class_true: D003930
leaf_abstract_id: 3568957, leaf_abstract_class_true: D003929
leaf_abstract_id: 28720285, leaf_abstract_class_true: D003928
leaf_abstract_id: 16424533, leaf_abstract_class_true: D003930
leaf_abstract_id: 19950068, leaf_abstract_class_true: D003921
leaf_abstract_id: 20419875, leaf_abstract_class_true: D017719
leaf_abstract_id: 2186971, leaf_abstract_class_true: D003930
leaf_abstract_i

leaf_abstract_id: 6889717, leaf_abstract_class_true: D003930
leaf_abstract_id: 6912769, leaf_abstract_class_true: D003930
leaf_abstract_id: 23345007, leaf_abstract_class_true: D003929
leaf_abstract_id: 26712272, leaf_abstract_class_true: D003928
leaf_abstract_id: 29481409, leaf_abstract_class_true: D003929
leaf_abstract_id: 3329077, leaf_abstract_class_true: D003929
leaf_abstract_id: 7564132, leaf_abstract_class_true: D003930
leaf_abstract_id: 20851003, leaf_abstract_class_true: D017719
leaf_abstract_id: 11980878, leaf_abstract_class_true: D003930
leaf_abstract_id: 11168952, leaf_abstract_class_true: D003928
leaf_abstract_id: 21190419, leaf_abstract_class_true: D016640
leaf_abstract_id: 20545630, leaf_abstract_class_true: D011236
leaf_abstract_id: 8432220, leaf_abstract_class_true: D003930
leaf_abstract_id: 27832978, leaf_abstract_class_true: D003920
leaf_abstract_id: 27379710, leaf_abstract_class_true: D048909
leaf_abstract_id: 10693634, leaf_abstract_class_true: D003929
leaf_abstract

leaf_abstract_id: 26588885, leaf_abstract_class_true: D017719
leaf_abstract_id: 8679315, leaf_abstract_class_true: D003929
leaf_abstract_id: 24282041, leaf_abstract_class_true: D003929
leaf_abstract_id: 9857714, leaf_abstract_class_true: D003925
leaf_abstract_id: 22998552, leaf_abstract_class_true: D003929
leaf_abstract_id: 9810997, leaf_abstract_class_true: D003925
leaf_abstract_id: 2125798, leaf_abstract_class_true: D016883
leaf_abstract_id: 24330927, leaf_abstract_class_true: D003930
leaf_abstract_id: 26737572, leaf_abstract_class_true: D003930
leaf_abstract_id: 24216319, leaf_abstract_class_true: D003920
leaf_abstract_id: 17068451, leaf_abstract_class_true: D003924
leaf_abstract_id: 17000364, leaf_abstract_class_true: D016640
leaf_abstract_id: 24984585, leaf_abstract_class_true: D048909
leaf_abstract_id: 1803962, leaf_abstract_class_true: D003930
leaf_abstract_id: 28502205, leaf_abstract_class_true: D016640
leaf_abstract_id: 2064492, leaf_abstract_class_true: D003930
leaf_abstract_

leaf_abstract_id: 7988921, leaf_abstract_class_true: D048909
leaf_abstract_id: 21518418, leaf_abstract_class_true: D048909
leaf_abstract_id: 11097446, leaf_abstract_class_true: D003925
leaf_abstract_id: 30537261, leaf_abstract_class_true: D003924
leaf_abstract_id: 15198373, leaf_abstract_class_true: D003929
leaf_abstract_id: 18307227, leaf_abstract_class_true: D048909
leaf_abstract_id: 29040836, leaf_abstract_class_true: D003921
leaf_abstract_id: 29572238, leaf_abstract_class_true: D003922
leaf_abstract_id: 24500073, leaf_abstract_class_true: D058065
leaf_abstract_id: 23163982, leaf_abstract_class_true: D017719
leaf_abstract_id: 10023125, leaf_abstract_class_true: D003929
leaf_abstract_id: 2076173, leaf_abstract_class_true: D003928
leaf_abstract_id: 9542990, leaf_abstract_class_true: D017719
leaf_abstract_id: 11953094, leaf_abstract_class_true: D048909
leaf_abstract_id: 10951880, leaf_abstract_class_true: D048909
leaf_abstract_id: 17986564, leaf_abstract_class_true: D048909
leaf_abstra

leaf_abstract_id: 9103498, leaf_abstract_class_true: D003921
leaf_abstract_id: 11809625, leaf_abstract_class_true: D016640
leaf_abstract_id: 3104431, leaf_abstract_class_true: D016883
leaf_abstract_id: 25796531, leaf_abstract_class_true: D016640
leaf_abstract_id: 25658504, leaf_abstract_class_true: D003924
leaf_abstract_id: 12459080, leaf_abstract_class_true: D003925
leaf_abstract_id: 18823739, leaf_abstract_class_true: D003928
leaf_abstract_id: 28440775, leaf_abstract_class_true: D003920
leaf_abstract_id: 25506430, leaf_abstract_class_true: D016640
leaf_abstract_id: 7823364, leaf_abstract_class_true: D003929
leaf_abstract_id: 18667997, leaf_abstract_class_true: D003924
leaf_abstract_id: 30987324, leaf_abstract_class_true: D003921
leaf_abstract_id: 1308382, leaf_abstract_class_true: D003925
leaf_abstract_id: 9949903, leaf_abstract_class_true: D003922
leaf_abstract_id: 2248418, leaf_abstract_class_true: D003925
leaf_abstract_id: 1952266, leaf_abstract_class_true: D003929
leaf_abstract_i

leaf_abstract_id: 25692273, leaf_abstract_class_true: D017719
leaf_abstract_id: 29849493, leaf_abstract_class_true: D003921
leaf_abstract_id: 8078099, leaf_abstract_class_true: D003921
leaf_abstract_id: 20061915, leaf_abstract_class_true: D011236
leaf_abstract_id: 3815372, leaf_abstract_class_true: D003921
leaf_abstract_id: 7257333, leaf_abstract_class_true: D003925
leaf_abstract_id: 18306449, leaf_abstract_class_true: D003925
leaf_abstract_id: 2689069, leaf_abstract_class_true: D048909
leaf_abstract_id: 23455657, leaf_abstract_class_true: D048909
leaf_abstract_id: 24890165, leaf_abstract_class_true: D003921
leaf_abstract_id: 15630637, leaf_abstract_class_true: D048909
leaf_abstract_id: 24912533, leaf_abstract_class_true: D017719
leaf_abstract_id: 17631511, leaf_abstract_class_true: D003928
leaf_abstract_id: 11465653, leaf_abstract_class_true: D003921
leaf_abstract_id: 17249333, leaf_abstract_class_true: D003925
leaf_abstract_id: 27901470, leaf_abstract_class_true: D058065
leaf_abstrac

leaf_abstract_id: 19015731, leaf_abstract_class_true: D016640
leaf_abstract_id: 3072876, leaf_abstract_class_true: D048909
leaf_abstract_id: 29460646, leaf_abstract_class_true: D016640
leaf_abstract_id: 18762913, leaf_abstract_class_true: D003921
leaf_abstract_id: 25208685, leaf_abstract_class_true: D016640
leaf_abstract_id: 30225687, leaf_abstract_class_true: D016640
leaf_abstract_id: 29986150, leaf_abstract_class_true: D011236
leaf_abstract_id: 3058373, leaf_abstract_class_true: D003929
leaf_abstract_id: 24090161, leaf_abstract_class_true: D016640
leaf_abstract_id: 22440386, leaf_abstract_class_true: D016883
leaf_abstract_id: 19166130, leaf_abstract_class_true: D016883
leaf_abstract_id: 19834315, leaf_abstract_class_true: D003922
leaf_abstract_id: 21091094, leaf_abstract_class_true: D048909
leaf_abstract_id: 7237684, leaf_abstract_class_true: D003921
leaf_abstract_id: 15178665, leaf_abstract_class_true: D016640
leaf_abstract_id: 19185985, leaf_abstract_class_true: D003920
leaf_abstra

leaf_abstract_id: 11192103, leaf_abstract_class_true: D016640
leaf_abstract_id: 23207871, leaf_abstract_class_true: D003921
leaf_abstract_id: 15838400, leaf_abstract_class_true: D006944
leaf_abstract_id: 24124964, leaf_abstract_class_true: D003924
leaf_abstract_id: 24964071, leaf_abstract_class_true: D003920
leaf_abstract_id: 26769102, leaf_abstract_class_true: D016640
leaf_abstract_id: 21109476, leaf_abstract_class_true: D016640
leaf_abstract_id: 18437353, leaf_abstract_class_true: D016640
leaf_abstract_id: 3534980, leaf_abstract_class_true: D003922
leaf_abstract_id: 24746173, leaf_abstract_class_true: D003922
leaf_abstract_id: 16967811, leaf_abstract_class_true: D003929
leaf_abstract_id: 1872305, leaf_abstract_class_true: D003925
leaf_abstract_id: 9230065, leaf_abstract_class_true: D048909
leaf_abstract_id: 27390971, leaf_abstract_class_true: D003924
leaf_abstract_id: 11793023, leaf_abstract_class_true: D003924
leaf_abstract_id: 10641958, leaf_abstract_class_true: D003924
leaf_abstra

leaf_abstract_id: 24389556, leaf_abstract_class_true: D003921
leaf_abstract_id: 27465377, leaf_abstract_class_true: D016640
leaf_abstract_id: 14746166, leaf_abstract_class_true: D003925
leaf_abstract_id: 25450302, leaf_abstract_class_true: D016640
leaf_abstract_id: 12830021, leaf_abstract_class_true: D003924
leaf_abstract_id: 23470316, leaf_abstract_class_true: D016640
leaf_abstract_id: 19625702, leaf_abstract_class_true: D003924
leaf_abstract_id: 11480453, leaf_abstract_class_true: D003925
leaf_abstract_id: 3930307, leaf_abstract_class_true: D016883
leaf_abstract_id: 23520452, leaf_abstract_class_true: D003924
leaf_abstract_id: 3082174, leaf_abstract_class_true: D003924
leaf_abstract_id: 30496135, leaf_abstract_class_true: D011236
leaf_abstract_id: 29461236, leaf_abstract_class_true: D003924
leaf_abstract_id: 22326749, leaf_abstract_class_true: D003920
leaf_abstract_id: 28401628, leaf_abstract_class_true: D003922
leaf_abstract_id: 6672992, leaf_abstract_class_true: D003922
leaf_abstra

leaf_abstract_id: 15133797, leaf_abstract_class_true: D005320
leaf_abstract_id: 19636976, leaf_abstract_class_true: D016640
leaf_abstract_id: 3041155, leaf_abstract_class_true: D003930
leaf_abstract_id: 11349170, leaf_abstract_class_true: D016640
leaf_abstract_id: 8099822, leaf_abstract_class_true: D005320
leaf_abstract_id: 22730196, leaf_abstract_class_true: D017719
leaf_abstract_id: 21295850, leaf_abstract_class_true: D016640
leaf_abstract_id: 4000644, leaf_abstract_class_true: D003930
leaf_abstract_id: 12507626, leaf_abstract_class_true: D003930
leaf_abstract_id: 28110940, leaf_abstract_class_true: D048909
leaf_abstract_id: 412058, leaf_abstract_class_true: D048909
leaf_abstract_id: 2976761, leaf_abstract_class_true: D003930
leaf_abstract_id: 25537242, leaf_abstract_class_true: D005320
leaf_abstract_id: 15902122, leaf_abstract_class_true: D005320
leaf_abstract_id: 2971076, leaf_abstract_class_true: D003930
leaf_abstract_id: 26019142, leaf_abstract_class_true: D003925
leaf_abstract_i

leaf_abstract_id: 20070983, leaf_abstract_class_true: D003925
leaf_abstract_id: 10584509, leaf_abstract_class_true: D003922
leaf_abstract_id: 19027331, leaf_abstract_class_true: D003922
leaf_abstract_id: 11193214, leaf_abstract_class_true: D005320
leaf_abstract_id: 28926293, leaf_abstract_class_true: D048909
leaf_abstract_id: 27738926, leaf_abstract_class_true: D003921
leaf_abstract_id: 29088414, leaf_abstract_class_true: D003924
leaf_abstract_id: 20216492, leaf_abstract_class_true: D017719
leaf_abstract_id: 29142171, leaf_abstract_class_true: D003920
leaf_abstract_id: 23594738, leaf_abstract_class_true: D005320
leaf_abstract_id: 22380687, leaf_abstract_class_true: D017719
leaf_abstract_id: 23682543, leaf_abstract_class_true: D017719
leaf_abstract_id: 23972010, leaf_abstract_class_true: D048909
leaf_abstract_id: 11960290, leaf_abstract_class_true: D003922
leaf_abstract_id: 22306653, leaf_abstract_class_true: D005320
leaf_abstract_id: 24024338, leaf_abstract_class_true: D003928
leaf_abs

Node id: 1 (depth: 0, cluster_label: None, children: [2, 187])

In [20]:
#print(treeFBE.get_precision_macro())
#print(treeFBE.get_recall_macro())
#print(treeFBE.get_F1())
print(treeFBE.get_performances())

{'prec_micro': 0.2211752045099421, 'prec_macro': 0.3180116921911039, 'recall_micro': 0.28332528482767255, 'recall_macro': 0.16608015672409682, 'F1_micro': 0.24842206950829182, 'F1_macro': 0.21820417673855752}


In [14]:
from pprint import pprint
print("Performance FBE:")
pprint(treeFBE.get_performances())
print()
print("Performance scikit learn: ")
pprint(treeClass.get_performances())

Performance FBE:
{'F1_macro': 0.22118151699607877,
 'F1_micro': 0.2333164135121727,
 'F1_zhao': 0.20859647776081247,
 'prec_macro': 0.3338511201744625,
 'prec_micro': 0.1900367478940206,
 'recall_macro': 0.16537126576469596,
 'recall_micro': 0.30212301693763305}

Performance scikit learn: 
{'F1_macro': 0.4441254035533422,
 'F1_micro': 0.19230689421131783,
 'F1_zhao': 0.23525969824232465,
 'prec_macro': 0.5810909145747698,
 'prec_micro': 0.11243403384883459,
 'recall_macro': 0.3594108035763631,
 'recall_micro': 0.6640374881508111}


In [106]:
sentences.select('index', matchClass_udf('index').alias("uniqueCluster")).groupby("uniqueCluster").count().show()


+-----------+-----+
|uniqueClass|count|
+-----------+-----+
|       1159|   21|
|       1090|  234|
|        296|   51|
|        691|   33|
|        125|    3|
|        666|  256|
|       1280|  334|
|        124| 1199|
|        718|  312|
|        740| 1173|
|        169|   41|
|        747|   46|
|       1425|   19|
|        577|    5|
|        272|   25|
|         54|  968|
|        282|    7|
|        232|    1|
|        483|   27|
|       1158|    5|
+-----------+-----+
only showing top 20 rows



### Calculate Performance for different FBE configurations. 
Limit number of abstracts to 2000; top words: 5

In [1]:
class Tree(object):
    
    def __init__(self, tree_hierarchy, clusters_predict=[], mode="sklearn", sentences_all_classes=None, true_classes_all=None):
        """
        @param mode : Two possible values
            - "FBE" : Tree object for Feedback Explorer output
            - "sklearn" : Tree object for scikit learn output
        
        @param sentences_all_classes : List of all possible classes occuring in the sentences file (only for mode FBE)
        @param true_labels_all : All occuring true labels (mesh codes) of the documents/abstracts
        """
        self.tree = None
        if mode in ["sklearn", "FBE"]:
            self.mode = mode
        else:
            raise ValueError("Provided mode '{}' is not supported".format(mode))
        self.tree_hierarchy = tree_hierarchy # pandas dataframe with tree structure coming from hierarchical clustering
        self.n_nodes = 0 # updated by calling self.count_nodes()
        self.n_leafs = 0 # updated by calling self.count_leafs()
        self.temp_n_leafs = 1 # In mode 'FBE' helps to construct the tree with the right number of nodes
        self.clusters_predict = clusters_predict # predicted cluster for each document
        self.unique_cluster_predict = list(set(clusters_predict)) # list of all classes to calculate performance metrices
        self.leaf_nodes = [] # list of all leaf nodes
        self.sentences_all_classes = sentences_all_classes # List of all classes occuring in sentences (phrases.parquet)
        self.true_classes_documents = true_classes_all.values.tolist() # list of true labels (mesh codes) in the abstracts
        self.true_classes_documents_unique = list(set(true_classes_all)) # all possible occuring true labels (mesh codes) in the abstracts
        self.precision_all_nodes = [] # macro
        self.precision_all_nodes_weighted = []
        self.precision_all_nodes_weights = 0
        self.precision_macro = None 
        self.precision_micro = None
        self.recall_all_classes = []
        self.recall_all_classes_weighted = []
        self.recall_macro = None
        self.recall_micro = None
        self.F1_macro = None
        self.F1_micro = None
        self.maxDepth = 0
        
        
    def _build_tree(self, node, current_depth=None):

        if self.mode == "FBE": 
            # Only create node if node is in current depth level
            if node.depth == current_depth and self.temp_n_leafs < MAX_LEAFS:
                treeChildren = self.tree_hierarchy.iloc[node.node_id].children
                print()
                print("{}".format(node))
                print("tree children: {}".format(treeChildren))
    
                # FBE tree is not a perfect binary tree, some nodes don't create children any more
                if len(treeChildren) > 0:
                    cluster_child_one = self.tree_hierarchy.iloc[treeChildren[0]].filterValue[0]
                    cluster_child_two = self.tree_hierarchy.iloc[treeChildren[1]].filterValue[0]
                    print("\tcluster c1: {}, cluster c2: {}".format(cluster_child_one, cluster_child_two))
                    # Some nodes from nodes.json are empty: no sentences is going through them
                    # Only create node in tree when there is a sentence running through it
#                    if cluster_child_one in self.sentences_all_classes and cluster_child_two in self.sentences_all_classes:
#                        self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
#                        print("\tc1 and c2 have sentences")
#                        node.add_child(Node(Id=treeChildren[0], depth=node.depth + 1, parent=node, cluster_label=cluster_child_one))
#                        node.add_child(Node(Id=treeChildren[1], depth=node.depth + 1, parent=node, cluster_label=cluster_child_two))
#                        self.temp_n_leafs += 2                    
                    if cluster_child_one in self.sentences_all_classes:
                        self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
                        node.add_child(Node(Id=treeChildren[0], depth=node.depth + 1, parent=node, cluster_label=cluster_child_one))
                        self.temp_n_leafs += 1
                        if cluster_child_two in sentences_all_classes:
                            print("\tc1 and c2 have sentences")
                            node.add_child(Node(Id=treeChildren[1], depth=node.depth + 1, parent=node, cluster_label=cluster_child_two))
                            self.temp_n_leafs += 1
                        else:
                            print("\tonly c1 has sentence")
                    elif cluster_child_two in sentences_all_classes:
                        print("\tonly c2 has sentences")
                        self.temp_n_leafs -= 1 # lose one leaf because it is split into two new leafs
                        node.add_child(Node(Id=treeChildren[1], depth=node.depth + 1, parent=node, cluster_label=cluster_child_two))
                        self.temp_n_leafs += 1          
                    else:
                        print("\tno sentence for c1 and c2")
            else:
                if len(node.children) == 1 and self.temp_n_leafs < MAX_LEAFS:
                    print("one child deeper (node.id={}, node depth={})".format(node.node_id, node.depth))
                    self._build_tree(node.children[0], current_depth)
                elif len(node.children) == 2 and self.temp_n_leafs < MAX_LEAFS:
                    print("two childs deeper (node.id={}, node depth : {}):".format(node.node_id, node.depth))
                    print("\t{}".format(node.children[0]))
                    print("\t{}".format(node.children[1]))
                    self._build_tree(node.children[0], current_depth)
                    self._build_tree(node.children[1], current_depth)
                #else:
                #    print("no deeper")
            return node

    def _update_leaf_to_root(self, node, abstract_id, class_predict):
        """ Updates node and all its ancestors up to the root with the abstract's id and the predicted class"""
        node.update_node(abstract_id, class_predict)
        if node.parent != None: # Root has no parent
            self._update_leaf_to_root(node.parent, abstract_id, class_predict)
    

    def set_build_tree(self,node):
        """ Builds the tree and sets the variable tree."""  

        # tree with MAX_LEAFS leafs is constructed. 
        # For sklearn add to each leaf its cluster label based on the children in the tree object from sklearn AgglomerativeClustering
        self.leaf_nodes = []
        if self.mode == "sklearn":
            tree = self._build_tree(node) # construct whole tree
            tree = self._get_cluster_labels_for_leafs(tree) # get labels for leafs
            tree = self._cut_nodes_from_leafs(tree) # cut nodes from bottom of the tree until only leafs with a unique cluster_label exist (Number leaves = MAX_LEAFS)
        elif self.mode == "FBE":
            self.temp_n_leafs = 1
            self.maxDepth = 0
            self._get_maxDepth(0, 0)
            depth = 0
            print("maxDepth: {}".format(self.maxDepth))
            # build tree by level: create first all children for level 1, then level 2... 
            # Prevents that a tree creates children just in one branch and always goes deeper in case of a max number of leavese
            while self.temp_n_leafs < MAX_LEAFS and depth <= 15:#self.maxDepth:
                print("\n\ndepth: {}, temp_n_leafs: {}".format(depth, self.temp_n_leafs))
                tree = self._build_tree(node, depth)
                depth += 1

            self.tree=tree
            print("\n\n\n==========\n")
            print("Count nodes: {}; leafs: {}".format(self.count_nodes(), self.count_leafs()))
            self._cut_nodes_from_leafs(self.tree)
            print("Count nodes: {}; leafs: {}".format(self.count_nodes(), self.count_leafs()))

#        assert isinstance(tree, Node)
#        self.tree = tree
#        print("Count nodes: {}; leafs: {}".format(self.count_nodes(), self.count_leafs()))
        
        

    def _get_maxDepth(self, i, depth):
        """ get max depth of tree"""
        if depth > self.maxDepth:
            self.maxDepth = depth        
        node = self.tree_hierarchy.iloc[i]
        #print(node)
        if len(node.children) == 1:
            self._get_maxDepth(node.children[0], depth+1)
        elif len(node.children) == 2:
            self._get_maxDepth(node.children[0], depth+1)
            self._get_maxDepth(node.children[1], depth+1)
            
    def count_nodes(self, tree=None):
        self.n_nodes = 0

                
        def _walk_count_nodes(node):
            self.n_nodes += 1

            for child in node.children:
                _walk_count_nodes(child)   
                
        if tree == None:
            _walk_count_nodes(self.tree)
        else:
            _walk_count_nodes(tree)
        return self.n_nodes        
    
    def count_leafs(self, tree=None):

        def _walk_count_leafs(node):
            if node.children == []:
                self.n_leafs += 1
                self.leaf_nodes.append(node)
            else:
                for child in node.children:
                    _walk_count_leafs(child)
        
        self.n_leafs = 0
        self.leaf_nodes = []
        if tree == None:
            _walk_count_leafs(self.tree)
        else:
            _walk_count_leafs(tree)
        return self.n_leafs 

    def get_leaf_nodes(self):
        def _walk_leaf_nodes(node):
            if node.children == []:
                self.leaf_nodes.append(node)
            else:
                for child in node.children:
                    _walk_leaf_nodes(child)
        
        self.leaf_nodes = []
        _walk_leaf_nodes(self.tree)
        return self.leaf_nodes    
    
    def _cut_nodes_from_leafs(self, node):
        """ 
            Children of nodes, who are leafs and have the same cluster_label, are cut off
            and the parent node takes the cluster label of its children.
            This is done recursively until there are only leafs with unique cluster_labels 
            Number of leaves = MAX_LEAFS
        
        """
        
        if len(node.children) == 1: 
            print(node)
            temp = node
            while len(temp.children) == 1:
                temp = temp.children[0]
                print("\tchild: {}".format(temp))
            if len(temp.children) == 2:
                print("\t two children")
                self._cut_nodes_from_leafs(temp.children[0])    
                self._cut_nodes_from_leafs(temp.children[1])
            else:
                print("\treset node")
                node.children = []
            #return node
        elif len(node.children) == 2:
            self._cut_nodes_from_leafs(node.children[0])    
            self._cut_nodes_from_leafs(node.children[1])    
        #else:
        #    return node
        return node
#        if len(node.children) > 0: 
#            left_child = node.children[0]
#            right_child = node.children[1]
#            if left_child.cluster_label is None: # left child is not leaf 
#                self._cut_nodes_from_leafs(left_child)
#            if right_child.cluster_label is None: # right child is not leaf 
#                self._cut_nodes_from_leafs(right_child)#

            # should be updated now
#            left_child = node.children[0]
#            right_child = node.children[1]
#            if left_child.cluster_label == right_child.cluster_label and left_child.cluster_label is not None:
#                node.children = []
#                node.cluster_label = left_child.cluster_label
#                return node
#
#        return node        

In [2]:
MAX_LEAFS = 32

fbe_path = "/home/adrian/workspace/FBE output/maxTopwords_8_tags_maxClasses1024_N2000"

print("Load data file: {}".format(fbe_path))
spark = pyspark.sql.SparkSession.builder.getOrCreate()
sentences = spark.read.load(fbe_path+"/phrases/")
df_short = sentences.select("id", "tokens", "index")
print(df_short.where(col("id") == 22771646).show())
print("N sentences: {}".format(sentences.count()))

print("Load tree..")
nodes = pd.read_json(fbe_path+"/nodes.json", orient="records")
print("nodes: {}".format(nodes.shape))

print("Get list with all possible classes in the sentences file..")
sentences_all_classes, sentences_pd_with_classes = get_list_all_possible_classes(sentences)
print("Number of classes in sentences file: {}".format(len(sentences_all_classes)))
print("Merged dataset with true classes: {}".format(sentences_pd_with_classes.shape))

print("initialise tree..")
treeFBE = Tree(nodes
            , mode="FBE"
            , sentences_all_classes=sentences_all_classes
            , true_classes_all=sentences_pd_with_classes["mesh_ui_diab"])

root = Node(Id=1, depth=0, parent=None, children=[]) # Id = 1 because start at Explorer 
treeFBE.set_build_tree(root)

print("Associate cluster to each sentence..")
sentences_pd_with_classes_uniqueCluster = associate_unique_cluster_to_documents(sentences, treeFBE)
print("Unique clusters in sentences: {}".format(sentences_pd_with_classes_uniqueCluster["uniqueCluster"].nunique())) #####

print("Fit..")
treeFBE.fitTree(treeFBE.tree, sentences_pd_with_classes_uniqueCluster)

pprint(treeFBE.get_performances())

Load data file: /home/adrian/workspace/FBE output/maxTopwords_8_tags_maxClasses1024_N2000


NameError: name 'pyspark' is not defined

In [28]:
treeFBE.tree.children[0].children[0].children[0]

Node id: 4 (depth: 3, cluster_label: 8, children: [5, 6])

In [131]:
#print(609 in sentences_all_classes)
#nodes[["strLinks", "filterValue", "children"]].iloc[365].head()

treeFBE.get_leaf_nodes()

[Node id: 21 (depth: 8, cluster_label: 890, children: []),
 Node id: 48 (depth: 8, cluster_label: 891, children: []),
 Node id: 49 (depth: 5, cluster_label: 105, children: []),
 Node id: 56 (depth: 6, cluster_label: 128, children: []),
 Node id: 71 (depth: 6, cluster_label: 129, children: []),
 Node id: 89 (depth: 5, cluster_label: 16, children: []),
 Node id: 106 (depth: 5, cluster_label: 17, children: []),
 Node id: 173 (depth: 9, cluster_label: 322, children: []),
 Node id: 174 (depth: 9, cluster_label: 323, children: []),
 Node id: 176 (depth: 8, cluster_label: 298, children: []),
 Node id: 177 (depth: 8, cluster_label: 299, children: []),
 Node id: 180 (depth: 6, cluster_label: 45, children: []),
 Node id: 190 (depth: 8, cluster_label: 112, children: []),
 Node id: 191 (depth: 8, cluster_label: 113, children: []),
 Node id: 192 (depth: 7, cluster_label: 73, children: []),
 Node id: 201 (depth: 8, cluster_label: 324, children: []),
 Node id: 202 (depth: 8, cluster_label: 325, child

In [101]:
print(873 in sentences_all_classes)
nodes[["strLinks", "filterValue", "children"]].iloc[375].head()

False


strLinks       {'1': [830, 831]}
filterValue                [791]
children                      []
Name: 375, dtype: object

In [27]:
4
{'F1_macro': 0.8467197871278361,
 'F1_micro': 0.6354458480220039,
 'prec_macro': 0.7356943279744165,
 'prec_micro': 0.4671033336592042,
 'recall_macro': 0.9972114441197784,
 'recall_micro': 0.9935}

6
{'F1_macro': 0.7356349027609322,
 'F1_micro': 0.6202309798501623,
 'prec_macro': 0.5884578696287001,
 'prec_micro': 0.45188113413304254,
 'recall_macro': 0.980985603499415,
 'recall_micro': 0.9885}

8
{'F1_macro': 0.6633409158744573,
 'F1_micro': 0.5998594041757052,
 'prec_macro': 0.5265395307091034,
 'prec_micro': 0.4564950980392157,
 'recall_macro': 0.8961791873294477,
 'recall_micro': 0.8745}

Performance scikit learn: 
{'F1_macro': 0.7182067311303414,
 'F1_micro': 0.2036082121751319,
 'prec_macro': 0.5603293541866284,
 'prec_micro': 0.11334402524839769,
 'recall_macro': 0.9999511409627143,
 'recall_micro': 0.9999105721593247}

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
8
5
6
4
5
6
7
8
6
3
4
5
6
7
8
5
6
7
8
4
5
6
7
8
9
9
7
8
8
9
6
7
5
6
7
8
8
7
2
3
4
5
6
7
8
8
9
10
5
4
5
6
7
7
8
9
10
11
11
10
11
12
11
12
9
10
8
9
10
11
10
11
12
9
6
5
6
7
7
8
6
7
8
8
9
3
4
5
6
6
5
6
6
4
5
6
7
8
7
6
7
8
9
10
11
12
13
8
9
10
11
12
13
14
15
16
17
18
5
6
7
8
8
7
1


18

In [17]:
Nnodes = 0

def get_nodes(node):
    """ get max depth of tree"""
    global Nnodes
    Nnodes += 1
    if len(node.children) == 1:
        get_nodes(node.children[0])
    elif len(node.children) == 2:
        get_nodes(node.children[0])
        get_nodes(node.children[1])

    
get_nodes(treeFBE.tree)
Nnodes

140

In [46]:
nodes.iloc[1]

name                                                            Explorer
tagId                                                                  1
color                                                                NaN
annotations            [{'tokens': ['diabetic'], 'tag': 2, 'from': No...
algo                                             {'value': 'clustering'}
strLinks                                                   {'1': [2, 3]}
strClassPath                                  {'2': [0, 1], '3': [0, 1]}
names                                                                 {}
filterMode                                            {'value': 'anyIn'}
filterValue                                                          [1]
maxTopWords                                                            6
windowSize                                                           NaN
classCenters                                            {'2': 0, '3': 1}
cError                            [0.29197365929895

In [35]:
from collections import Counter
ll = [5,5,4,7,8,4,7,7,7,7,0,2,5, 5, 5]
print(Counter(ll))
print(Counter(ll).most_common()[0][0])
print(Counter(ll).most_common())


Counter({5: 5, 7: 5, 4: 2, 8: 1, 0: 1, 2: 1})
5
[(5, 5), (7, 5), (4, 2), (8, 1), (0, 1), (2, 1)]


In [42]:
class_counts = Counter(ll).most_common()
[c for c, occ in class_counts  if occ == class_counts[0][1]]

[5, 7]