#

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/decisionbranches/sigmod_decisionbranches/blob/master/examples/pipeline.ipynb)


# Download and install Decisionbranches

In [None]:
!pip install git+https://github.com/decisionbranches/sigmod_decisionbranches.git



In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from decisionbranches.utils.helpers import generate_fidxs
from decisionbranches.models.boxSearch.boxClassifier import BoxClassifier
from py_kdtree.treeset import KDTreeSet

In [None]:
seed=42
np.random.seed(seed)


#Parameter
nfeat = 10
nind = 100
dbranch_cfg = {"top_down":False,"max_evals":"all","stop_infinite":True}

label = "4."

In [None]:
X, y = fetch_openml('satimage', version=1, return_X_y=True, as_frame=False)

y_bin = np.zeros(len(y),dtype=int)
y_bin[y==label] = 1

X_train,X_test,y_train,y_test = train_test_split(X,y_bin,train_size=0.05,random_state=seed)
print("Number of rare training objects: ",np.sum(y_train))
print("Number of points to query: ",len(X_test))

# Generate feature subsets

In [None]:
subsets = generate_fidxs(n_feat=nfeat,n_ind=nind,feats=np.arange(X.shape[1]),seed=seed)


# Create indexes

In [None]:
treeset = KDTreeSet(subsets,path="./indexes/",leaf_size=60,verbose=False)
treeset.fit(X_test)

# DecisionBranch

In [None]:
dbranch = BoxClassifier(tot_feat=X.shape[1],n_feat=nfeat,n_ind=nind,cfg=dbranch_cfg,postTree=False)

dbranch.fit(X_train,y_train)

mins,maxs,fidxs = dbranch.get_boxes()
preds = dbranch.predict(X_test)
print("Test F1-score: ",f1_score(y_test, preds))

# Query boxes

In [None]:
inds,counts,time,loaded_leaves = treeset.multi_query_ranked_cy(mins,maxs,fidxs)

print("Number of found points: ",len(inds))
print("Loading time: ",time)
print("Number of loaded leaves: ",loaded_leaves)