# PSyKE's demo

Some imports.

In [1]:
from psyke.utils.dataframe import get_discrete_features_supervised
from psyke import Extractor
from psyke.utils.dataframe import get_discrete_dataset
from psyke.utils.logic import pretty_theory
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

Import iris dataset separating features and class.

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

The original features' dataset is discretized using the equal frequency method. Each feature is mapped in a 3 (can be an arbitrary integer) new one-hot encoded sub-features representing 3 real intervals. So from the original 4 features we have a new 12 features dataset. S, M and L stand for small, medium and large.

In [4]:
iris_features = get_discrete_features_supervised(x.join(y))

for descrete_feature in iris_features:
    print(str(descrete_feature), end='\n\n')

PetalLength = {'PetalLength_0' if PetalLength ∈ ]-∞, 2.27[, 'PetalLength_1' if PetalLength ∈ [2.27, 4.87], 'PetalLength_2' if PetalLength ∈ ]4.87, ∞[}

SepalWidth = {'SepalWidth_0' if SepalWidth ∈ ]-∞, 2.87[, 'SepalWidth_1' if SepalWidth ∈ [2.87, 3.20], 'SepalWidth_2' if SepalWidth ∈ ]3.20, ∞[}

SepalLength = {'SepalLength_0' if SepalLength ∈ ]-∞, 5.39[, 'SepalLength_1' if SepalLength ∈ [5.39, 6.26], 'SepalLength_2' if SepalLength ∈ ]6.26, ∞[}

PetalWidth = {'PetalWidth_0' if PetalWidth ∈ ]-∞, 0.65[, 'PetalWidth_1' if PetalWidth ∈ [0.65, 1.64], 'PetalWidth_2' if PetalWidth ∈ ]1.64, ∞[}



Reassign features' data to the discretized one.

In [5]:
x = get_discrete_dataset(x, iris_features)
x

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2
0,1,0,0,1,0,0,1,0,0,0,0,1
1,1,0,0,1,0,0,1,0,0,0,1,0
2,1,0,0,1,0,0,1,0,0,0,1,0
3,1,0,0,1,0,0,1,0,0,0,1,0
4,1,0,0,1,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0
146,0,0,1,0,0,1,0,0,1,1,0,0
147,0,0,1,0,0,1,0,0,1,0,1,0
148,0,0,1,0,0,1,0,1,0,0,0,1


Replace integer indices with the corresponding string class.

In [6]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,versicolor
146,versicolor
147,versicolor
148,versicolor


The final dataset:

In [7]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2,iris
0,1,0,0,1,0,0,1,0,0,0,0,1,setosa
1,1,0,0,1,0,0,1,0,0,0,1,0,setosa
2,1,0,0,1,0,0,1,0,0,0,1,0,setosa
3,1,0,0,1,0,0,1,0,0,0,1,0,setosa
4,1,0,0,1,0,0,1,0,0,0,0,1,setosa
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
146,0,0,1,0,0,1,0,0,1,1,0,0,versicolor
147,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
148,0,0,1,0,0,1,0,1,0,0,0,1,versicolor


Split between train and test set in a reproducible way.

In [8]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

We use as predictor a KNN with K = 7 and we train it.

In [9]:
predictor = KNeighborsClassifier(n_neighbors=7)
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])

KNeighborsClassifier(n_neighbors=7)

We create an extractor that uses the REAL algorithm and we extract prolog rules from our trained KNN.

In [10]:
real = Extractor.real(predictor, iris_features)
theory_from_real = real.extract(train)
print('REAL extracted rules:\n\n' + pretty_theory(theory_from_real))

REAL extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalWidth =< 0.6474.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 4.865, SepalLength > 6.262.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalWidth > 1.6376.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalWidth =< 2.872.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [5.3892, 6.262].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalWidth in [0.6474, 1.6376].


We create a different extractor that use Trepan algorithm and we extract prolog rules from the same KNN.

In [11]:
trepan = Extractor.trepan(predictor, iris_features)
theory_from_trepan = trepan.extract(train)
print('\nTrepan extracted rules:\n\n' + pretty_theory(theory_from_trepan))


Trepan extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength > 2.2685, PetalLength in [2.2685, 4.865].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 2.2685.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).
