# Banknote dataset with LionForests V2

In [1]:
import sys
cpath = !pwd
sys.path.append(cpath[0][:-18])

In [2]:
from lionforests import LionForests
from datasets.dataset import Dataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
np.seterr(invalid='ignore')
import warnings
warnings.filterwarnings("ignore")
import time

Firstly, we load the dataset and we set the feature and class names

In [3]:
banknotes = Dataset()
X, y, feature_names, class_names = banknotes.load_banknote()

In [4]:
feature_names

['variance', 'skew', 'curtosis', 'entropy']

In [5]:
X.shape, X[:2]

((1372, 4),
 array([[ 3.6216 ,  8.6661 , -2.8073 , -0.44699],
        [ 4.5459 ,  8.1674 , -2.4586 , -1.4621 ]]))

In [6]:
X[:,3].max(), X[:,3].min(), X[:,3].std(), X[:,3].mean()

(2.4495, -8.5482, 2.100247322449037, -1.1916565200437317)

Which has 1372 instances

In [7]:
len(X)

1372

Then we initiate LionForests

In [8]:
parameters = [{
    'max_depth': [10],
    'max_features': [0.75],
    'bootstrap': [True],
    'min_samples_leaf' : [1],
    'n_estimators': [500]
}]
scaler = MinMaxScaler(feature_range=(-1,1))
lf = LionForests(None, False, scaler, feature_names, class_names)
lf.fit(X, y, params=parameters)

The model achieves very high performance, based on F1-Weighted score

In [6]:
lf.accuracy

0.9949038953887852

## Qualitative

Now, we will examine the rule interpretation of an example

In [7]:
lf.explain(X[49], True, 'apriori', 'kmedoids')[0]

'if 1.616<=variance<=1.642 & 2.277<=skew<=3.695 & 0.193<=curtosis<=17.927 then fake banknote'

Moreover, we can present a UI tool to explore the interpretation visually

In [8]:
lf.visualize(lf.explain(X[49], True, 'apriori', 'kmedoids', to_vis=True))

Prediction rule: if 1.616<=variance<=1.642 & 2.277<=skew<=3.695 & 0.193<=curtosis<=17.927 then fake banknote


HBox(children=(IntSlider(value=1, continuous_update=False, description='Feature: ', max=4, min=1), ToggleButto…

Output()

In [9]:
list(zip(feature_names, X[49]))

[('variance', 1.6349),
 ('skew', 3.286),
 ('curtosis', 2.8753),
 ('entropy', 0.08705399999999999)]

We can change ourselves an instances values and see if and how the outcome will change! We will change skew value 2.5

In [10]:
temp = X[49].copy()
temp[1] = 2.5
lf.explain(temp, True, 'apriori', 'kmedoids')[0]

'if 1.616<=variance<=1.642 & 2.277<=skew<=3.695 & 0.193<=curtosis<=17.927 then fake banknote'

The outcome remained the same!

# Sensitivity analysis!
We will now test how the random forest's parameters are affecting the reduction of paths and features

In [None]:
from tqdm import tqdm
import time

results = []
iterr = 100
with tqdm(total=4*4*4*24) as pbar:
    for i in [1, 5, 7, 10]:
        for j in [10, 100, 500, 1000]:
            for o in ['sqrt', 'log2', 0.75, None]:
                parameters = [{
                    'max_depth': [i],
                    'max_features': [o],
                    'bootstrap': [True],
                    'min_samples_leaf': [1],
                    'n_estimators': [j]
                }]
                lf = LionForests(class_names=class_names)
                scaler = MinMaxScaler(feature_range=(-1, 1))
                lf.fit(X, y, scaler, feature_names, parameters)
                tlf = 0
                fr = 0
                pr = 0
                for inde in range(iterr):
                    t_a = time.time()
                    a, b, c, d, e, f = lf.explain(
                        X[inde], False, None, None, False)
                    fr = fr + c - e
                    pr = pr + b - d
                    tlf = tlf + time.time() - t_a
                results.append([str(i), str(j), str(o), 'NoRed',
                                '_', '_', tlf/iterr, fr/iterr, pr/iterr])
                pbar.update(1)
                for k in ['1', '2', '3', '12', '13', '23', '123']:
                    if '1' in k and '2' in k:
                        for ara in ['apriori', 'fpgrowth']:
                            for cla in ['kmedoids', 'OPTICS', 'SC']:
                                tlf = 0
                                fr = 0
                                pr = 0
                                for inde in range(iterr):
                                    t_a = time.time()
                                    a, b, c, d, e, f = lf.explain(
                                        X[inde], True, ara, cla, method=k)
                                    fr = fr + c - e
                                    pr = pr + b - d
                                    tlf = tlf + time.time() - t_a
                                results.append([str(i), str(j), str(
                                    o), k, ara, cla, tlf/iterr, fr/iterr, pr/iterr])
                                pbar.update(1)
                    elif '1' in k:
                        for ara in ['apriori', 'fpgrowth']:
                            tlf = 0
                            fr = 0
                            pr = 0
                            for inde in range(iterr):
                                t_a = time.time()
                                a, b, c, d, e, f = lf.explain(
                                    X[inde], True, ara, None, method=k)
                                fr = fr + c - e
                                pr = pr + b - d
                                tlf = tlf + time.time() - t_a
                            results.append([str(i), str(j), str(
                                o), k, ara, '_', tlf/iterr, fr/iterr, pr/iterr])
                            pbar.update(1)
                    elif '2' in k:
                        for cla in ['kmedoids', 'OPTICS', 'SC']:
                            tlf = 0
                            fr = 0
                            pr = 0
                            for inde in range(iterr):
                                t_a = time.time()
                                a, b, c, d, e, f = lf.explain(
                                    X[inde], True, None, cla, method=k)
                                fr = fr + c - e
                                pr = pr + b - d
                                tlf = tlf + time.time() - t_a
                            results.append([str(i), str(j), str(
                                o), k, '_', cla, tlf/iterr, fr/iterr, pr/iterr])
                            pbar.update(1)
                    else:
                        tlf = 0
                        fr = 0
                        pr = 0
                        for inde in range(iterr):
                            t_a = time.time()
                            a, b, c, d, e, f = lf.explain(
                                X[inde], True, None, None, method=k)
                            fr = fr + c - e
                            pr = pr + b - d
                            tlf = tlf + time.time() - t_a
                        results.append([str(i), str(j), str(
                            o), k, '_', '_', tlf/iterr, fr/iterr, pr/iterr])
                        pbar.update(1)

In [None]:
for i in results:
    print(i)

Another test we are going to make concerns the speed of LionForests V2 to original LionForests!

In [None]:
from tqdm import tqdm
import time  # 10 trees

results = []
iterr = 100
with tqdm(total=4*4*4*2) as pbar:
    for i in [1, 5, 7, 10]:  # 1,5,7,10
        for j in [10, 100, 500, 1000]:
            for o in ['sqrt', 'log2', 0.75, None]:  # 0.75,None
                parameters = [{
                    'max_depth': [i],
                    'max_features': [o],
                    'bootstrap': [True],
                    'min_samples_leaf': [1],
                    'n_estimators': [j]
                }]
                lf = LionForests(None, False, None, feature_names, class_names)
                lf.fit(X, y, params=parameters)
                tlf = 0
                fr = 0
                pr = 0
                for inde in range(iterr):
                    t_a = time.time()
                    a, b, c, d, e, f = lf.explain(
                        X[inde], False, None, None, True)
                    fr = fr + c - e
                    pr = pr + b - d
                    tlf = tlf + time.time() - t_a
                results.append([str(i), str(j), str(o), 'NoRed_Old',
                                '_', '_', tlf/iterr, fr/iterr, pr/iterr])
                pbar.update(1)
                for inde in range(iterr):
                    t_a = time.time()
                    a, b, c, d, e, f = lf.explain(
                        X[inde], True, 'apriori', 'kmedoids', True)
                    fr = fr + c - e
                    pr = pr + b - d
                    tlf = tlf + time.time() - t_a
                results.append([str(i), str(j), str(o), '123_Old',
                                '_', '_', tlf/iterr, fr/iterr, pr/iterr])
                pbar.update(1)

In [None]:
for i in results:
    print(i)