# Esercizio 3 - Sequence tree

#### Supporting functions

In [3]:
from datetime import timedelta
import os
from datetime import datetime
import itertools
import random
import math

# Some helper functions!

#takes a timedelta and prints it in a human-readable format
def format_timedelta(td: timedelta) -> str:
    days = td.days
    years, days = divmod(days, 365)
    months, days = divmod(days, 30)
    hours, remainder = divmod(td.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    formatted_str = ""
    if years:
        formatted_str += f"{years}y "
    if months:
        formatted_str += f"{months}mo "
    if days:
        formatted_str += f"{days}d "
    if hours:
        formatted_str += f"{hours}h "
    if minutes:
        formatted_str += f"{minutes}m "
    if seconds:
        formatted_str += f"{seconds}s "
    

    
    return formatted_str[:-1] if formatted_str else "0s"
  
# Given a set with binary classes, computes entropy
def compute_entropy(dataset_Y):
    ones = len(list(filter(lambda classification : classification == 1,dataset_Y)))
    zeros = len(list(filter(lambda classification : classification == 0,dataset_Y)))

    if(ones == 0 or zeros==0):
        return 0
    
    entropy = ones/len(dataset_Y)*math.log2(1/(ones/len(dataset_Y))) + zeros/len(dataset_Y)*math.log2(1/(zeros/len(dataset_Y)))

    return entropy

# returns a list of tuples (i,x) where i is the index of the patient in the dataset and x is a timedelta of per quanto tempo abbiamo rilevazioni
def find_durations(dataset):
    lengths = []
    for entry in dataset:
        min_t = datetime.max
        max_t = datetime.min
        for item in entry:
            if item[0] < min_t:
                min_t = item[0]
            elif item[0] > max_t:
                max_t = item[0]
        lengths.append(max_t-min_t)
    return [(i, x) for i, x in enumerate(lengths)]

## Operazioni preliminari sul DS

#### Loadind raw DS

In [4]:
def load_diabetes_dataset(verbose=False)-> list: 
    folder_path="datasets\\diabetes"
    dataset = []
    errcount=0
    print(f"-- DS loader")

    for filename in os.listdir(folder_path):

        file_path = os.path.join(folder_path, filename)

        if os.path.isfile(file_path)and filename.startswith('data'):
            entry=[]

            with open(file_path, 'r') as file:
                content = file.readlines()
                for line in content:
                    item = tuple((line[0:-1] if line.endswith('\n') else tuple(line)).split("\t"))

                    # If the item is valid, append it to the entry
                    try:
                        item_f = datetime.strptime(item[0]+" "+item[1], "%m-%d-%Y %H:%M")
                        entry.append((item_f,item[2],item[3]))
                    except:
                        if(verbose):
                            print(f"\t[!] Entry {item} in file {filename} is NOT vallid. Skipped!")
                        errcount+=1
                # add the entry to the dataset
                dataset.append(entry)
    print(f"\tSkipped {errcount} items for formatting issues in data file. {len(dataset)} loaded.")
    return dataset

dataset = load_diabetes_dataset(False)


-- DS loader
	Skipped 46 items for formatting issues in data file. 70 loaded.


#### Given raw DS, generate actual DS and classes.

Definiamo la classe reale come uno [0,1] che indica se l'evento 65 (65 = Hypoglycemic symptoms) si è verificato in una certa finestra di tempo `event_window` dopo un certo waiting time `waiting_window`.

In [5]:
from datetime import timedelta
from dateutil.relativedelta import relativedelta

def compute_datasets(dataset:list,observation_window,waiting_window,prediction_window):

    dataset_X = []
    dataset_Y = []
    
    dataset_ST = [entry[0][0] for entry in dataset ]

    count_excluded=0

    for i in range(0,len(dataset)):
        entry = dataset[i]

        end_obs = dataset_ST[i]+observation_window
        
        start_pred = end_obs + waiting_window
        end_pred = start_pred + prediction_window

        if end_pred < entry[-1][0]:
            entry_X = []
            found = 0

            for item in entry:
                if item[0]>= dataset_ST[i] and item[0]<end_obs:
                    entry_X.append(item)
                if item[0]>=start_pred and item[0]<end_pred:
                    # put Y=1 if it has at least one "65" entry
                    if (item[1]=="65"):
                        found = 1
            dataset_X.append(entry_X)
            dataset_Y.append(found)

        else:
            count_excluded+=1
    
    dataset_ST = [entry[0][0] for entry in dataset_X ]
    dataset_V = [None]*len(dataset_X)
    print(f"-- DS builder")
    print(f"\t{count_excluded} entries unsuitable for selected windows.")
    print(f"\tFinal dataset size: {len(dataset_X)}. Classes: {sum(1 for c in dataset_Y if c == 1)}|{sum(1 for c in dataset_Y if c == 0)}, entropy {float(compute_entropy(dataset_Y)):4.3}")    
    return dataset_X,dataset_Y,dataset_ST,dataset_V

observation_window = timedelta(days=+5)
waiting_window = timedelta(days=+5)
prediction_window = timedelta(days=+15)

# Convert raw data into dataset X,Y, etc
dataset_X,dataset_Y, dataset_ST,dataset_V = compute_datasets(dataset[:],observation_window,waiting_window,prediction_window)

-- DS builder
	6 entries unsuitable for selected windows.
	Final dataset size: 64. Classes: 27|37, entropy 0.982


In [53]:
# Quick reload if needed for testing/showcasing purposes
def reload_ds():
    # prepare
    dataset = load_diabetes_dataset(False)
    observation_window = timedelta(days=+3)
    waiting_window = timedelta(days=+0)
    prediction_window = timedelta(days=+10)

    dataset_X,dataset_Y, dataset_ST,dataset_V = compute_datasets(dataset,observation_window,waiting_window,prediction_window,)
    return dataset_X,dataset_Y, dataset_ST,dataset_V

#### Stampiamo il DS elaborato.

In [6]:
def print_dataset_state(dataset_X,dataset_Y,dataset_ST,dataset_V,indexes=None):
    print("--DS state")
    if (indexes and len(indexes)>len(dataset_X)):
        indexes=None
    
    print("index\tX\tY\tST\t\t\tV")

    if indexes is None:
        indexes = range(0,len(dataset_X))

    for i in indexes:
        print(f"{i}\tl.{len(dataset_X[i])}\t{dataset_Y[i]}\t{dataset_ST[i]}\t{dataset_V[i]}")
    print(f"# entries: {len(dataset_X)}, entropy={float(compute_entropy(dataset_Y)):4.3}")
    
print_dataset_state(dataset_X,dataset_Y, dataset_ST,dataset_V)

--DS state
index	X	Y	ST			V
0	l.35	1	1991-04-21 09:09:00	None
1	l.38	0	1989-10-10 08:00:00	None
2	l.49	1	1990-07-21 06:43:00	None
3	l.41	0	1990-08-19 17:00:00	None
4	l.46	0	1990-09-01 16:48:00	None
5	l.38	0	1989-03-27 22:00:00	None
6	l.24	1	1990-07-31 12:09:00	None
7	l.43	0	1990-04-22 18:08:00	None
8	l.34	0	1989-02-18 08:00:00	None
9	l.34	1	1990-07-13 09:44:00	None
10	l.46	1	1990-07-22 09:53:00	None
11	l.58	1	1990-09-04 05:53:00	None
12	l.38	1	1991-03-11 18:15:00	None
13	l.27	1	1991-04-13 08:47:00	None
14	l.22	1	1991-05-22 07:24:00	None
15	l.40	1	1990-07-13 09:48:00	None
16	l.44	0	1990-08-18 07:16:00	None
17	l.45	1	1990-09-09 17:23:00	None
18	l.39	0	1991-05-12 06:55:00	None
19	l.44	0	1989-09-03 08:00:00	None
20	l.36	0	1991-03-14 22:05:00	None
21	l.37	0	1991-04-27 23:02:00	None
22	l.28	0	1991-05-28 21:35:00	None
23	l.13	0	1990-07-24 16:00:00	None
24	l.25	0	1988-07-13 08:00:00	None
25	l.16	0	1989-01-29 08:00:00	None
26	l.16	0	1989-11-05 07:00:00	None
27	l.40	0	1990-04-29 07:00:00	None
28

#### Maximization functions

Let's define all functions to compute the best (d,l) couple and split a dataset according to it.

In [54]:
random.seed(1)

# Find al possible d,l couples that I could split the tree on. Note: Ds are randomly selected bc otherwise I'd end up with ~36000 pairs...
def create_pairs(dataset_X:list,dataset_ST:list,howmany_d =30,random_sampling=False):

    #1. Create all labels available from current
    labels = set()
    for i in range(0,len(dataset_X)):
        for item in dataset_X[i]:
            if (item[0] > dataset_ST[i]):# Only consider label if it's not been superato
                labels.add(item[1])


    #2. Find all d
    durations = set()
    if random_sampling:
        durations = set()
        for i in range(0,len(dataset_X)):
            for item in dataset_X[i]:
                if (item[0] > dataset_ST[i]): # Only consider timestamp if it's not been superato
                    durations.add(item[0]-dataset_ST[i])
        if len(durations)>howmany_d:
            durations = random.sample(sorted(durations),howmany_d) # Is this ok?
        else:
            durations = sorted(durations)
    else:
        durations = set()
        for i in range(0,len(dataset_X)):
            for item in dataset_X[i]:
                durations.add(item[0]-dataset_ST[i])
        delta = max(durations)/(howmany_d+1)
        durations = set()
        for i in range(1,howmany_d+1):
            durations.add(delta*i)

    #print([format_timedelta(x) for x in sorted(durations)])
    return sorted(list(itertools.product(durations,labels)))

# Given a d,l couple, split the dataset and return indexes of true and false entries.
def test_event(dataset_X,dl_pair,dataset_ST,dataset_V=None,update=False):
    i_T = [] # indexes of entries that have label==l within d time
    i_F = [] # indexes of entries that have DON'T HAVE label==l within d time
    d,l = dl_pair


    #1. Separate entries that satisfy event test from those who don't
    for i in range(0,len(dataset_X)):
        entry = dataset_X[i]
        found=False

        for item in entry:
            #print(item)
            if(found is False and item[0]>=dataset_ST[i] and item[0]<=(dataset_ST[i]+d) and item[1]==l ):
                found=True

        if(found):
            i_T.append(i)
        else:
            i_F.append(i)
    return i_T,i_F

# Given a pair of duration d and label l, computes its information gain on the dataset if we were to split it according to the sequence tree rules.
def compute_IG(dl_pair,dataset_X,dataset_Y,dataset_ST,verbose=False):
    #if verbose:
     #   print(f"Computing IG for {dl_pair}")

    entropy_0 = compute_entropy(dataset_Y)

    i_T, i_F = test_event(dataset_X,dl_pair,dataset_ST)
    
    #2. Compute final entropy. first let's generate our new datasets...
    dataset_Yt=[ dataset_Y[i] for i in i_T]
    dataset_Yf=[ dataset_Y[i] for i in i_F]
            
    entropy_f = (len(i_T)/len(dataset_X))*compute_entropy(dataset_Yt) + (len(i_F)/len(dataset_X))*compute_entropy(dataset_Yf)

    if verbose:
        print(f"information gain is {entropy_0-entropy_f} {entropy_0}-> {[ dataset_Y[i] for i in i_T],[ dataset_Y[i] for i in i_F]} {entropy_f}")
        
    return(entropy_0-entropy_f,i_T,i_F)

# Given all possible pairs of d,l finds the one with the highest information gain (aka the one I should actually split on)
def maximize_IG_event(dataset_X,dataset_Y, dataset_ST, indexes=None,verbose=False,howmany=30,random_sampling=False):
    if indexes is None:
        indexes = range(0,len(dataset_X))

    
    dl_pairs = create_pairs([dataset_X[i] for i in indexes],[dataset_ST[i] for i in indexes],howmany,random_sampling)


    igs_list = [(x,compute_IG(x,dataset_X,dataset_Y,dataset_ST,verbose)) for x in dl_pairs]
    # ogni entry di igs_list è ( (d,l) , (ig,i_T,i_F)   )

    max_ig=-1
    max_dl=None

    for ((d,l),(ig,i_T,i_F)) in igs_list:
        if ig > max_ig:
            max_ig=ig
            max_dl = ((d,l),(ig,i_T,i_F))

    
    if len(max_dl[1][1])==0 or len(max_dl[1][2])==0:
        if verbose:
            print("Split failed, couldn't find a d,l that separates values :(")
        return None, 0
    
    if verbose:
        print("Max IG is in couple d=",format_timedelta(max_dl[0][0]),", l=",max_dl[0][1],", IG=",max_dl[1])

    max_dl = (max_dl[0],max_dl[1][0])
    return max_dl

# Given a d,l couple, split the dataset in two and update starting times and dataset values.
def perform_event_test(max_dl,indexes,dataset_X,dataset_ST,dataset_V,verbose=False):
    # divide dataset in t and f...
    i_T = []
    i_F = []

    d,l = max_dl
    old_dataset_ST=dataset_ST
    
    for i in range(0,len(dataset_ST)):
        entry = dataset_X[i]

        found=False

        for item in entry:
            #starting from the starting time, see if it exists an item with timestamp < d and label == l
            
            if(found is False and item[0]>= old_dataset_ST[i] and item[0]<=(old_dataset_ST[i]+d) and item[1]==l ): # If i'm over starting time
                found=True
                dataset_ST[i] = item[0]
                dataset_V[i] = item[2]

        if(found):
            i_T.append(i)
        else:
            i_F.append(i)
            
    return i_T,i_F,dataset_ST,dataset_V


Now let's define all functions to compute the best value split and split a dataset according to it.

In [55]:
# Given a dataset, test out all possible values and see which one would lead to the best split.
# If we have called this function, it means we are in a true branch and all values in dataset_V are already of a single label.
def maximize_IG_value(dataset_V,dataset_Y):
    values = set([v for v in dataset_V])
    values_ig = []

    for v in values:   
        entropy_0 = compute_entropy(dataset_Y)
        i_T=[]
        i_F=[]
        for i in range(0,len(dataset_V)):
            if dataset_V[i] <= v:
                i_T.append(i)
            else:
                i_F.append(i)
        dataset_Yt=[ dataset_Y[i] for i in i_T]
        dataset_Yf=[ dataset_Y[i] for i in i_F]
        entropy_f = (len(i_T)/len(dataset_Y))*compute_entropy(dataset_Yt) + (len(i_F)/len(dataset_Y))*compute_entropy(dataset_Yf)
        ig = entropy_0-entropy_f
        values_ig.append((v,ig))

    return max(values_ig, key=lambda x: x[1])

# Given a d,l couple, split the dataset in two and update dataset values.
def perform_value_test(value,dataset_V):
    i_T=[]
    i_F=[]
    for i in range(0,len(dataset_V)):
        if dataset_V[i] <= value:
            i_T.append(i)
        else:
            i_F.append(i)
    return i_T,i_F


### SequenceTree definition and functions.

In [60]:
from treelib import Tree,Node

class SequenceTree(Tree):
    def __init__(self, tree=None, deep=False, node_class=None, identifier=None):
        super(SequenceTree, self).__init__(tree=tree, deep=deep, node_class=node_class, identifier=identifier)

    # Let's override original create_node method in order to add new constraints such as child node number and true/false branchs.
    def create_node(self, tag=None, identifier=None, parent=None, data=None,branch=None):
        """
        Create a child node for the given @parent node. If ``identifier`` is absent,
        a UUID will be generated automatically.
        """
        
        new_node = super(SequenceTree, self).create_node(tag=tag, parent=parent, data=data)
        siblings = super(SequenceTree,self).siblings(new_node.identifier)
        
        if len(super(SequenceTree,self).siblings(new_node.identifier))>=2:
           raise ValueError("Parent node already has maximum number of children")

        if branch in [x.data["branch"] for x in siblings]:
           raise ValueError(f"Parent node already has a {branch} branch")
        
        return new_node
    
    # Library has a bug that won't show trees correctly unless stdout=False is added.
    def display(self):
        print(self.show(stdout=False))

    def create_node_event(self,data,parent=None,branch=None,entropy="",size=0,ig="",index=""):
        branch_f = "" if (branch is None) else str(branch)+" "
        tag =  f"\x1b[32m⬤ {branch_f} ({str(data[1])},{format_timedelta(data[0])})\x1b[0m - [e={entropy:4.2} ig={ig:4.2}] [n={size}] {index}"
        data = {"branch":branch, "dl":(data[0],data[1]),"entropy":entropy,"ig":ig,"index":index}

        return     self.create_node(tag,data=data,parent=parent,branch=branch)

    def create_node_value(self,label_value,parent=None,branch=None,entropy="",size=0,ig="",index=""):
        branch_f = "" if (branch is None) else branch+" "
        tag =  f"\x1b[31m■ {branch_f} ({label_value[0]}, {label_value[1]}) \x1b[0m- [e={float(entropy):2.2} ig={ig:4.2}] [n={size}] {index}"
        data = {"branch":branch,"value":label_value,"entropy":entropy,"index":index}
        return     self.create_node(tag,data=data,parent=parent,branch=branch)

    def create_node_class(self,classification,parent=None,branch=None,entropy="",size=0,index=""):
        branch_f = "" if (branch is None) else str(branch)+" "

        tag =  f"\x1b[33m◆ {branch_f} {classification} \x1b[0m- [e={float(entropy):2.2}] \x1b[33m[n={size}]\x1b[0m - {index}"

        # If the classsification had "max length reached", remove the tag from data
        if isinstance(classification, str):
            classification = int(classification[0])

        data = {"branch":branch, "class":classification,"entropy":entropy,"index":index}


        return     self.create_node(tag,data=data,parent=parent,branch=branch)
    
        
    def fit(self,dataset_X,dataset_Y,dataset_V,dataset_ST,max_depth:int,parent=None,branch=None,depth=0,indexes=[],verbose=False):
        """
        Implements the fit algorithm as described in the exercise text.

        :param list dataset_X: Dataset entries, where each entry is a list of items (time,label,value)
        :param list dataset_Y: classes of the dataset, each entry is the class of the dataset entry with the same index
        :param list dataset_V: it's a int value if there has been a previous (d,l) test, otherwise it's None
        :param list dataset_ST: current starting time (aka, how much of the list I've already read)
        :param SequenceTree tree: the SequenceTree tree we're building.
        :param int max_depth: how deep can the SequenceTree be before we stop creating new nodes and approximate the result.
        :param parent: identifier of the parent node.
        :param branch: whether the true node is on the true or false branch.
        :param depth: depth of the current node.
        :param indexes: original indexes of the entries currently being processed.
        :param verbose: prints more info.
        """
        if(parent is None):
            print("-- Fit")
        

        # BASE CASES:
        
        # 1. If I have 1 node only or all the classes are the same, make a leaf
        if (len(dataset_X)==1) or (all(element == dataset_Y[0] for element in dataset_Y)):
            if verbose:
                print(f"{depth,branch} All classes are the same!")
            self.create_node_class(classification=dataset_Y[0],parent=parent,branch=branch,entropy=compute_entropy(dataset_Y),size=len(dataset_Y),index=indexes)
            return
        
        # 2. If I've reached the maximum number of tests allowed, create a leaf with the extimation of the class.
        elif depth > max_depth:
            if (verbose):
                print("Reached maximum depth!")

            y_1 = sum(1 for c in dataset_Y if c == 1)
            y_0 = sum(1 for c in dataset_Y if c == 0)
            estimated_class = 1 if y_1>y_0 else 0  
            estimated_class = str(estimated_class)+" MAX DEPTH REACHED"

            self.create_node_class(estimated_class,parent,branch,compute_entropy(dataset_Y),len(dataset_Y),indexes)
            return


        # INDUCTIVE CASE

        #Compute d,l that maximises IG.
        max_dl, max_dl_ig = maximize_IG_event(dataset_X,dataset_Y,dataset_ST,range(len(dataset_X)),False,howmany=30,random_sampling=False)
        
        # If previous node is an event test and branch is True, or if previous node is an event test, compute value that maximises IG.
        max_value_ig=0
        if parent and (("dl" in parent.data.keys() and branch=="t") or "value" in parent.data.keys() ):
            max_value, max_value_ig=maximize_IG_value(dataset_V,dataset_Y)
            

        # If value beats event, create a new event node.
        if max_value_ig > max_dl_ig:

            label = parent.data["dl"][1] if "dl" in parent.data.keys() else parent.data["value"][0]

            node = self.create_node_value((label,max_value),parent,branch,compute_entropy(dataset_Y),len(dataset_Y),max_value_ig)
            i_T, i_F=perform_value_test(max_value,dataset_V)

            self.fit([dataset_X[i] for i in i_T],[dataset_Y[i] for i in i_T],[dataset_V[i] for i in i_T],[dataset_ST[i] for i in i_T],max_depth,node,"t",depth+1,[indexes[i] for i in i_T])
            self.fit([dataset_X[i] for i in i_F],[dataset_Y[i] for i in i_F],[dataset_V[i] for i in i_F],[dataset_ST[i] for i in i_F],max_depth,node,"f",depth+1,[indexes[i] for i in i_F])
        
        # Otherwise,create a new event node (shocking!).
        else:

            node = self.create_node_event(max_dl,parent,branch,compute_entropy(dataset_Y),len(dataset_Y),max_dl_ig)
            i_T, i_F, dataset_ST, dataset_V=perform_event_test(max_dl,indexes,dataset_X,dataset_ST,dataset_V)

            self.fit([dataset_X[i] for i in i_T],[dataset_Y[i] for i in i_T],[dataset_V[i] for i in i_T],[dataset_ST[i] for i in i_T],max_depth,node,"t",depth+1,[indexes[i] for i in i_T])
            self.fit([dataset_X[i] for i in i_F],[dataset_Y[i] for i in i_F],[dataset_V[i] for i in i_F],[dataset_ST[i] for i in i_F],max_depth,node,"f",depth+1,[indexes[i] for i in i_F])

    def predict_r(self,entry_X,entry_ST,entry_V,node:Node,verbose=False):

        if verbose:
            print(node)
            
        # BASE CASE
        
        if node.is_leaf():
            if verbose:
                print("end")
            return node.data["class"]


        # INDUCTIVE CASE    

        children = [self.get_node(x) for x in node.successors(tree.identifier)]

        if "dl" in node.data.keys():
            if verbose:
                print("dl test")
            i_T,i_F,entry_ST,entry_V = perform_event_test(node.data["dl"],None,entry_X,entry_ST,entry_V)

            branch = "t" if i_T else "f"
            next_node = list(filter(lambda x : x.data["branch"]==branch,children))[0]

            return self.predict_r(entry_X,entry_ST,entry_V,next_node)

        elif "value" in node.data.keys():
            if verbose:
                print("value test")
            i_T,i_F = perform_value_test(node.data["value"][1],entry_V)
            branch = "t" if i_T else "f"
            next_node = list(filter(lambda x : x.data["branch"]==branch,children))[0]

            return self.predict_r(entry_X,entry_ST,entry_V,next_node)

    # Given a list of new entries, computes the prediction and returns it. It also prints the confusion matrix!
    def predict(self,entry_X,entry_ST,entry_V,entry_Y,verbose=False):
        print("-- Predict")
        results=[]
        root = self.get_node(self.root)


        if isinstance(dataset_V,list) and len(dataset_V)==1:
            entry_X = [entry_X]
            entry_Y = [entry_Y]
            entry_ST = [entry_ST]
            entry_V = [entry_V]
            results.append(self.predict_r(self.entry_X,entry_ST,entry_V,root),verbose)
        else:
            for i in range(0,len(entry_ST)):
                results.append(self.predict_r([entry_X[i]],[entry_ST[i]],[entry_V[i]],root,verbose))
        
        tp = sum(1 for x, y in zip(results, entry_Y) if (x == 1 and y==1))
        tn = sum(1 for x, y in zip(results, entry_Y) if (x == 0 and y==0))
        fp = sum(1 for x, y in zip(results, entry_Y) if (x == 1 and y==0))
        fn = sum(1 for x, y in zip(results, entry_Y) if (x == 0 and y==1))

        print(f"\tP\tN\nP\t{tp}\t{fn}\nN\t{fp}\t{tn}")
        print(f"Items: {tp+tn+fp+fn}")
        print(f"Accuracy: {float((tp+tn)/(tp+tn+fp+fn)):4.3}")

        return results






# Reload ds
dataset_X,dataset_Y, dataset_ST,dataset_V = reload_ds()

#--Fit
tree = SequenceTree()
tree.fit(dataset_X,dataset_Y,dataset_V,dataset_ST,max_depth=100,indexes=range(0,len(dataset_X)))
if len(tree.all_nodes()) != 0:
    print(tree)

#--Predict
# Choose which entries to test
rangee=len(dataset_V)
entry_X,entry_Y, entry_ST,entry_V = dataset_X[0:rangee],dataset_Y[0:rangee], dataset_ST[0:rangee],dataset_V[0:rangee]
# Test'em!
predictions = tree.predict(entry_X,entry_ST,entry_V,entry_Y)

-- DS loader
	Skipped 46 items for formatting issues in data file. 70 loaded.
-- DS builder
	2 entries unsuitable for selected windows.
	Final dataset size: 68. Classes: 31|37, entropy 0.994
-- Fit
[32m⬤  (48,2d 17h 1m 1s)[0m - [e=0.99 ig=0.21] [n=68] 
├── [31m■ t  (48, 109) [0m- [e=0.44 ig=0.053] [n=22] 
│   ├── [31m■ f  (48, 123) [0m- [e=0.57 ig=0.22] [n=15] 
│   │   ├── [31m■ f  (48, 210) [0m- [e=0.37 ig=0.093] [n=14] 
│   │   │   ├── [32m⬤ f  (34,9h 8m 23s)[0m - [e=0.65 ig=0.65] [n=6] 
│   │   │   │   ├── [33m◆ f  0 [0m- [e=0.0] [33m[n=5][0m - [9, 53, 54, 63, 67]
│   │   │   │   └── [33m◆ t  1 [0m- [e=0.0] [33m[n=1][0m - [65]
│   │   │   └── [33m◆ t  0 [0m- [e=0.0] [33m[n=8][0m - [5, 6, 19, 20, 25, 29, 39, 51]
│   │   └── [33m◆ t  1 [0m- [e=0.0] [33m[n=1][0m - [0]
│   └── [33m◆ t  0 [0m- [e=0.0] [33m[n=7][0m - [1, 26, 27, 28, 30, 40, 52]
└── [32m⬤ f  (62,6h 57m 58s)[0m - [e=0.95 ig= 0.1] [n=46] 
    ├── [32m⬤ f  (34,2d 17h 1m 1s)[0m - [e=0.81 ig=0.