In [2]:
import sys
sys.path.append('../../utils')

In [4]:
import pandas as pd
import numpy as np
import plotly.express as px
import random

from rulekit import RuleKit
from rulekit.classification import RuleClassifier
from rulekit.params import Measures

from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

RuleKit.init()

In [5]:
clf_simple = RuleClassifier(induction_measure=Measures.C2,
                        pruning_measure=Measures.C2,
                        voting_measure=Measures.C2,
                        discrete_set_conditions_enabled = False,
                        negated_conditions_enabled = False,
                        intervals_conditions_enabled = False,
                        numerical_attributes_conditions_enabled = False,
                        nominal_attributes_conditions_enabled = False,
                        inner_alternatives_enabled = False,
                        )
clf_complex = RuleClassifier(induction_measure=Measures.C2,
                        pruning_measure=Measures.C2,
                        voting_measure=Measures.C2,
                        discrete_set_conditions_enabled = True,
                        negated_conditions_enabled = True,
                        intervals_conditions_enabled = True,
                        numerical_attributes_conditions_enabled = True,
                        nominal_attributes_conditions_enabled = True,
                        inner_alternatives_enabled = False,
)

clf_complex_with_inners = RuleClassifier(induction_measure=Measures.C2,
                        pruning_measure=Measures.C2,
                        voting_measure=Measures.C2,
                        discrete_set_conditions_enabled = True,
                        negated_conditions_enabled = True,
                        intervals_conditions_enabled = True,
                        numerical_attributes_conditions_enabled = True,
                        nominal_attributes_conditions_enabled = True,
                        inner_alternatives_enabled = True,
)


# Numeryczne

## a E {v1, v2}

In [3]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[(df["a"] >= 3) & (df["a"] <= 7),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,3.92,4.86,1
1,5.02,8.98,1
2,8.20,8.28,0
3,6.10,7.29,1
4,0.76,7.94,0
...,...,...,...
95,7.64,0.40,0
96,4.72,7.77,1
97,6.76,5.82,1
98,7.77,4.50,0


In [4]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [5]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)


print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = <3.02, 6.98) THEN class = {1} (p = 46.0, n = 0.0, P = 46.0, N = 54.0, weight = 1.0, pvalue = 1.361081276455314e-29)
IF a = (-inf, 3.02) THEN class = {0} (p = 33.0, n = 0.0, P = 54.0, N = 46.0, weight = 0.8055555555555556, pvalue = 1.7657102882534904e-12)
IF a = <6.98, inf) THEN class = {0} (p = 21.0, n = 0.0, P = 54.0, N = 46.0, weight = 0.6944444444444444, pvalue = 2.5483930703189164e-07)


Complex rules
IF a = <3.02, 7.05) THEN class = {1} (p = 46.0, n = 0.0, P = 46.0, N = 54.0, weight = 1.0, pvalue = 1.361081276455314e-29)
IF a != <3.02, 7.05) THEN class = {0} (p = 54.0, n = 0.0, P = 54.0, N = 46.0, weight = 1.0, pvalue = 1.361081276455314e-29)


Complex rules + inners alternatives
IF #(a = <3.02, 6.76)) THEN class = {1} (p = 46.0, n = 0.0, P = 46.0, N = 54.0, weight = 1.0, pvalue = 1.361081276455314e-29)
IF #(a != <0.77, 7.05) OR a != <0.80, 7.05) OR a != <0.87, 7.05) OR a != <1.09, 7.05) OR a != <1.31, 7.05) OR a != <3.02, 7.05)) THEN class = {0} (p = 54.0, n

## a !E {v1, v2}

In [6]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[(df["a"] >= 3) & (df["a"] <= 7),"class"] = "0"
df = df.fillna("1")
df

Unnamed: 0,a,b,class
0,7.47,0.85,1
1,1.47,3.05,1
2,1.47,7.33,1
3,7.31,0.22,1
4,5.38,0.80,0
...,...,...,...
95,3.04,4.56,0
96,4.23,7.59,0
97,1.31,4.09,1
98,7.83,0.66,1


In [7]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [8]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = (-inf, 2.95) THEN class = {1} (p = 35.0, n = 0.0, P = 62.0, N = 38.0, weight = 0.782258064516129, pvalue = 2.554113438907091e-10)
IF a = <7.03, inf) THEN class = {1} (p = 27.0, n = 0.0, P = 62.0, N = 38.0, weight = 0.717741935483871, pvalue = 1.4587430898619218e-07)
IF a = <2.95, 7.03) THEN class = {0} (p = 38.0, n = 0.0, P = 38.0, N = 62.0, weight = 1.0, pvalue = 1.7636531930450345e-28)


Complex rules
IF a != <2.95, 7.08) THEN class = {1} (p = 62.0, n = 0.0, P = 62.0, N = 38.0, weight = 1.0, pvalue = 1.7636531930450345e-28)
IF a = <2.95, 7.08) THEN class = {0} (p = 38.0, n = 0.0, P = 38.0, N = 62.0, weight = 1.0, pvalue = 1.7636531930450345e-28)


Complex rules + inners alternatives
IF #(a != <1.25, 7.08) OR a != <1.33, 7.08) OR a != <1.35, 7.08) OR a != <1.41, 7.08) OR a != <1.52, 7.08) OR a != <2.95, 7.08)) THEN class = {1} (p = 62.0, n = 0.0, P = 62.0, N = 38.0, weight = 1.0, pvalue = 1.7636531930450345e-28)
IF #(a = <2.95, 6.86)) THEN class = {0} (p = 38.0, 

## a<b

In [9]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[(df["a"] < df["b"]),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,3.25,3.33,1
1,6.26,3.44,0
2,0.54,6.82,1
3,1.33,6.55,1
4,7.91,3.87,0
...,...,...,...
95,6.66,8.21,1
96,7.55,3.63,0
97,2.39,3.68,1
98,2.58,3.27,1


In [10]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [11]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF b = <7.87, inf) THEN class = {1} (p = 10.0, n = 0.0, P = 47.0, N = 53.0, weight = 0.6063829787234043, pvalue = 0.00029913195740550953)
IF a = (-inf, 7.70) AND b = <6.81, inf) THEN class = {1} (p = 15.0, n = 0.0, P = 47.0, N = 53.0, weight = 0.6595744680851063, pvalue = 2.9668462928026348e-06)
IF a = (-inf, 3.16) AND b = <1.91, inf) THEN class = {1} (p = 29.0, n = 0.0, P = 47.0, N = 53.0, weight = 0.8085106382978724, pvalue = 3.681173272700711e-13)
IF a = (-inf, 6.87) AND b = <5.40, inf) THEN class = {1} (p = 26.0, n = 0.0, P = 47.0, N = 53.0, weight = 0.7765957446808511, pvalue = 1.7941983175154166e-11)
IF b = (-inf, 1.37) THEN class = {0} (p = 12.0, n = 0.0, P = 53.0, N = 47.0, weight = 0.6132075471698113, pvalue = 0.0002539773316889682)
IF a = <5.89, inf) AND b = (-inf, 6.39) THEN class = {0} (p = 29.0, n = 0.0, P = 53.0, N = 47.0, weight = 0.7735849056603774, pvalue = 6.278824168962018e-11)
IF a = <5.52, inf) AND b = (-inf, 8.03) THEN class = {0} (p = 35.0, n = 1.0

## a>b

In [52]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[(df["a"] > df["b"]),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,1.26,8.21,0
1,0.93,5.20,0
2,3.88,2.45,1
3,6.84,6.08,1
4,1.88,2.23,0
...,...,...,...
95,2.48,5.41,0
96,0.16,5.41,0
97,8.14,0.77,1
98,2.29,8.91,0


In [53]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [54]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = (-inf, 2.69) AND b = <2.16, inf) THEN class = {0} (p = 36.0, n = 0.0, P = 54.0, N = 46.0, weight = 0.8333333333333333, pvalue = 4.9021911770736793e-14)
IF a = (-inf, 3.58) AND b = <0.88, inf) THEN class = {0} (p = 41.0, n = 2.0, P = 54.0, N = 46.0, weight = 0.7906883121746622, pvalue = 3.0507399732960576e-14)
IF a = (-inf, 5.44) AND b = <4.34, inf) THEN class = {0} (p = 39.0, n = 0.0, P = 54.0, N = 46.0, weight = 0.8611111111111112, pvalue = 9.601065669383949e-16)
IF a = (-inf, 7.31) AND b = <6.38, inf) THEN class = {0} (p = 24.0, n = 0.0, P = 54.0, N = 46.0, weight = 0.7222222222222222, pvalue = 1.7582458796469314e-08)
IF b = (-inf, 1.42) THEN class = {1} (p = 15.0, n = 0.0, P = 46.0, N = 54.0, weight = 0.6630434782608696, pvalue = 2.0199804546741444e-06)
IF a = <5.97, inf) AND b = (-inf, 7.69) THEN class = {1} (p = 27.0, n = 0.0, P = 46.0, N = 54.0, weight = 0.7934782608695652, pvalue = 2.1666569676724535e-12)
IF a = <2.69, inf) AND b = (-inf, 2.90) THEN class =

## a = b

In [14]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]
b[70:] = a[70:]
df["a"] = a
df["b"] = b

df.loc[(df["a"] == df["b"]),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,4.66,4.46,0
1,7.11,8.91,0
2,5.17,8.06,0
3,3.67,8.45,0
4,8.24,6.17,0
...,...,...,...
95,6.39,6.39,1
96,2.79,2.79,1
97,0.13,0.13,1
98,1.11,1.11,1


In [15]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [16]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = <8.34, inf) THEN class = {0} (p = 9.0, n = 0.0, P = 70.0, N = 30.0, weight = 0.5642857142857143, pvalue = 0.03418801445376973)
IF b = <8.32, inf) THEN class = {0} (p = 7.0, n = 0.0, P = 70.0, N = 30.0, weight = 0.55, pvalue = 0.07488803166063863)
IF a = (-inf, 7.66) AND b = <7.41, inf) THEN class = {0} (p = 16.0, n = 0.0, P = 70.0, N = 30.0, weight = 0.6142857142857143, pvalue = 0.0018427538682743766)
IF a = <6.98, inf) AND b = (-inf, 7.03) THEN class = {0} (p = 17.0, n = 0.0, P = 70.0, N = 30.0, weight = 0.6214285714285714, pvalue = 0.0011846274867478204)
IF a = <6.12, inf) AND b = (-inf, 6.77) THEN class = {0} (p = 22.0, n = 2.0, P = 70.0, N = 30.0, weight = 0.47460317460317475, pvalue = 0.0053988010574116925)
IF a = (-inf, 4.32) AND b = <3.93, inf) THEN class = {0} (p = 20.0, n = 0.0, P = 70.0, N = 30.0, weight = 0.6428571428571428, pvalue = 0.00030203288497681165)
IF a = <2.94, inf) AND b = (-inf, 3.26) THEN class = {0} (p = 18.0, n = 0.0, P = 70.0, N = 30.0, 

# Nominalne

## a E {zbiór wartości}

In [17]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [random.randint(0, 3) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]


df["a"] = a
df["b"] = b

df = df.replace({'a': {0: "low", 1: "medium", 2: "high", 3: "vhigh"}})
df.loc[(df["a"] == "low") | (df["a"] == "medium"),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,low,5.29,1
1,medium,3.80,1
2,medium,5.93,1
3,high,3.37,0
4,medium,8.88,1
...,...,...,...
95,high,4.88,0
96,medium,8.38,1
97,medium,2.20,1
98,vhigh,7.68,0


In [18]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [19]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)


print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = {low} THEN class = {1} (p = 28.0, n = 0.0, P = 51.0, N = 49.0, weight = 0.7745098039215687, pvalue = 3.93679541508213e-11)
IF a = {medium} THEN class = {1} (p = 23.0, n = 0.0, P = 51.0, N = 49.0, weight = 0.7254901960784313, pvalue = 7.914374796910971e-09)
IF a = {vhigh} THEN class = {0} (p = 26.0, n = 0.0, P = 49.0, N = 51.0, weight = 0.7653061224489797, pvalue = 8.339830914617157e-11)
IF a = {high} THEN class = {0} (p = 23.0, n = 0.0, P = 49.0, N = 51.0, weight = 0.7346938775510204, pvalue = 2.346379351554777e-09)


Complex rules
IF a = {low, medium} THEN class = {1} (p = 51.0, n = 0.0, P = 51.0, N = 49.0, weight = 1.0, pvalue = 1.0109886081846746e-29)
IF a = {high, vhigh} THEN class = {0} (p = 49.0, n = 0.0, P = 49.0, N = 51.0, weight = 1.0, pvalue = 1.0109886081846746e-29)


Complex rules + inners alternatives
IF a = {low, medium} THEN class = {1} (p = 51.0, n = 0.0, P = 51.0, N = 49.0, weight = 1.0, pvalue = 1.0109886081846746e-29)
IF a = {high, vhigh} THEN c

## a != wartość

In [20]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [random.randint(0, 2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]


df["a"] = a
df["b"] = b

df = df.replace({'a': {0: "low", 1: "medium", 2: "high"}})
df.loc[(df["a"] == "low") | (df["a"] == "medium"),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,high,5.28,0
1,medium,6.82,1
2,medium,7.68,1
3,medium,4.20,1
4,medium,6.68,1
...,...,...,...
95,medium,1.94,1
96,medium,0.23,1
97,high,0.15,0
98,medium,2.23,1


In [21]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [22]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = {high} THEN class = {0} (p = 32.0, n = 0.0, P = 32.0, N = 68.0, weight = 1.0, pvalue = 6.992395703634604e-27)
IF a = {medium} THEN class = {1} (p = 41.0, n = 0.0, P = 68.0, N = 32.0, weight = 0.8014705882352942, pvalue = 3.384504206881746e-10)
IF a = {low} THEN class = {1} (p = 27.0, n = 0.0, P = 68.0, N = 32.0, weight = 0.6985294117647058, pvalue = 3.550945986473596e-06)


Complex rules
IF a = {high} THEN class = {0} (p = 32.0, n = 0.0, P = 32.0, N = 68.0, weight = 1.0, pvalue = 6.992395703634604e-27)
IF a != {high} THEN class = {1} (p = 68.0, n = 0.0, P = 68.0, N = 32.0, weight = 1.0, pvalue = 6.992395703634604e-27)


Complex rules + inners alternatives
IF a = {high} THEN class = {0} (p = 32.0, n = 0.0, P = 32.0, N = 68.0, weight = 1.0, pvalue = 6.992395703634604e-27)
IF a != {high} THEN class = {1} (p = 68.0, n = 0.0, P = 68.0, N = 32.0, weight = 1.0, pvalue = 6.992395703634604e-27)


## a = b 

In [23]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [random.randint(0, 2) for i in range(100)]
b = [random.randint(0, 2) for i in range(100)]


df["a"] = a
df["b"] = b

df = df.replace({'a': {0: "low", 1: "medium", 2: "high"}, 'b': {0: "low", 1: "medium", 2: "high"}})
df.loc[(df["a"] == df["b"]),"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,medium,medium,1
1,medium,high,0
2,medium,high,0
3,low,high,0
4,high,medium,0
...,...,...,...
95,high,medium,0
96,low,high,0
97,medium,low,0
98,medium,high,0


In [24]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [25]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = {high} AND b = {high} THEN class = {1} (p = 14.0, n = 0.0, P = 34.0, N = 66.0, weight = 0.7058823529411764, pvalue = 3.1501967677758236e-08)
IF a = {low} AND b = {low} THEN class = {1} (p = 13.0, n = 0.0, P = 34.0, N = 66.0, weight = 0.6911764705882353, pvalue = 1.3050815180785538e-07)
IF a = {high} AND b = {medium} THEN class = {0} (p = 18.0, n = 0.0, P = 66.0, N = 34.0, weight = 0.6363636363636364, pvalue = 0.00022335122587369843)
IF a = {low} AND b = {medium} THEN class = {0} (p = 10.0, n = 0.0, P = 66.0, N = 34.0, weight = 0.5757575757575758, pvalue = 0.012188144281239734)
IF a = {medium} AND b = {high} THEN class = {0} (p = 10.0, n = 0.0, P = 66.0, N = 34.0, weight = 0.5757575757575758, pvalue = 0.012188144281239734)
IF a = {medium} AND b = {low} THEN class = {0} (p = 8.0, n = 0.0, P = 66.0, N = 34.0, weight = 0.5606060606060606, pvalue = 0.03086483482230472)
IF a = {high} AND b = {low} THEN class = {0} (p = 11.0, n = 0.0, P = 66.0, N = 34.0, weight = 0.58333

# XOR

In [26]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,4)])


a = ["0","0","1","1"]
b = ["0","1","0","1"]

df["a"] = a
df["b"] = b

df.loc[((df["a"] == "0") & (df["b"] == "1")) | ((df["a"] == "1") & (df["b"] == "0")) ,"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [27]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [28]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)


Rules original


Complex rules
IF a = b THEN class = {0} (p = 2.0, n = 0.0, P = 2.0, N = 2.0, weight = 1.0, pvalue = 0.16666666666666669)
IF a != b THEN class = {1} (p = 2.0, n = 0.0, P = 2.0, N = 2.0, weight = 1.0, pvalue = 0.16666666666666669)


Complex rules + inners alternatives
IF a = b THEN class = {0} (p = 2.0, n = 0.0, P = 2.0, N = 2.0, weight = 1.0, pvalue = 0.16666666666666669)
IF a != b THEN class = {1} (p = 2.0, n = 0.0, P = 2.0, N = 2.0, weight = 1.0, pvalue = 0.16666666666666669)


# Parity problem 
AN EMPIRICAL INVESTIGATION INTO DEEP AND SHALLOW RULE LEARNING

Consider, e.g., the following example: the parity concept, which is known to be hard to learn for heuristic, greedy learning algorithms,
checks whether an odd or an even number of R relevant attributes (out of a possibly higher total number of attributes) are
set to true. Figure 1a shows a flat rule-based representation of the target concept for R = 5, which requires 2^R−1 = 16
rules. On the other hand, a structured representation, which introduces three auxiliary predicates (parity2345,
parity345 and parity45 as shown in Figure 1b), is much more concise using only 2 · (R − 1) = 8 rules.

In [43]:
df = pd.DataFrame(columns=["x1", "x2", "x3","x4", "x5", "class"], index = [i for i in range(0,100)])

df["x1"] = [random.randint(0, 1) for i in range(100)]
df["x2"] = [random.randint(0, 1) for i in range(100)]
df["x3"] = [random.randint(0, 1) for i in range(100)]
df["x4"] = [random.randint(0, 1) for i in range(100)]
df["x5"] = [random.randint(0, 1) for i in range(100)]

df.loc[(df[["x1", "x2", "x3","x4", "x5"]].sum(axis=1)%2) == 0, "class"] = "1"
df = df.fillna("0")
df = df.astype("str")
df

Unnamed: 0,x1,x2,x3,x4,x5,class
0,0,0,1,0,0,0
1,0,0,1,1,0,1
2,0,1,0,1,1,0
3,1,0,1,0,0,1
4,0,1,1,0,1,0
...,...,...,...,...,...,...
95,0,0,1,0,0,0
96,0,1,1,1,0,0
97,1,0,1,1,0,0
98,1,0,1,1,1,1


In [35]:
df.dtypes

x1       object
x2       object
x3       object
x4       object
x5       object
class    object
dtype: object

In [44]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF x1 = {0} AND x2 = {1} AND x3 = {0} AND x4 = {0} AND x5 = {0} THEN class = {0} (p = 8.0, n = 0.0, P = 50.0, N = 50.0, weight = 0.58, pvalue = 0.0028850810098075127)
IF x1 = {0} AND x2 = {0} AND x3 = {0} AND x4 = {1} AND x5 = {0} THEN class = {0} (p = 6.0, n = 0.0, P = 50.0, N = 50.0, weight = 0.56, pvalue = 0.013330538154195196)
IF x3 = {0} AND x5 = {0} THEN class = {0} (p = 19.0, n = 9.0, P = 50.0, N = 50.0, weight = 0.24642857142857144, pvalue = 0.021980723884887348)
IF x3 = {0} THEN class = {0} (p = 26.0, n = 18.0, P = 50.0, N = 50.0, weight = 0.13818181818181824, pvalue = 0.07907229426358056)
IF x1 = {0} AND x5 = {0} THEN class = {0} (p = 20.0, n = 13.0, P = 50.0, N = 50.0, weight = 0.14848484848484853, pvalue = 0.10081451600333538)
IF x5 = {0} THEN class = {0} (p = 31.0, n = 25.0, P = 50.0, N = 50.0, weight = 0.08678571428571424, pvalue = 0.1569129396478786)
IF x2 = {1} THEN class = {0} (p = 25.0, n = 22.0, P = 50.0, N = 50.0, weight = 0.04787234042553187, pvalue 

In [45]:
df = pd.DataFrame(columns=["x1", "x2", "x3","x4", "x5", "class"], index = [i for i in range(0,100)])

df["x1"] = [random.randint(0, 1) for i in range(100)]
df["x2"] = [random.randint(0, 1) for i in range(100)]
df["x3"] = [random.randint(0, 1) for i in range(100)]
df["x4"] = [random.randint(0, 1) for i in range(100)]
df["x5"] = [random.randint(0, 1) for i in range(100)]

df.loc[(df[["x1", "x2", "x3","x4", "x5"]].sum(axis=1)%2) == 0, "class"] = "1"
df = df.fillna("0")
df = df.astype("str")
df

Unnamed: 0,x1,x2,x3,x4,x5,class
0,1,1,0,1,1,1
1,0,1,1,1,0,0
2,0,1,0,0,1,1
3,1,0,1,0,1,0
4,0,1,1,0,0,1
...,...,...,...,...,...,...
95,1,1,0,1,0,0
96,1,0,1,1,1,1
97,0,1,1,0,1,0
98,1,1,1,1,0,1


In [46]:
X = df.drop(['class'], axis=1)
y = df['class']

print(f"Rules original score: {clf_simple.score(X, y)}")

print(f"Complex rules score: {clf_complex.score(X, y)}")

Rules original score: 0.49
Complex rules score: 0.92


# Complex cases 1

In [47]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 9),2) for i in range(100)]
b = [round(random.uniform(0, 9),2)for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[((df["a"] >= 7) & (df["b"] <= 3)) | ((df["a"] <=3) & (df["b"] >= 7)) ,"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,6.22,3.28,0
1,1.54,5.68,0
2,5.88,3.15,0
3,5.59,6.52,0
4,3.13,4.91,0
...,...,...,...
95,4.75,2.25,0
96,7.09,2.91,1
97,4.25,8.74,0
98,2.80,8.75,1


In [48]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [49]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a = (-inf, 7.02) AND b = (-inf, 7.22) THEN class = {0} (p = 57.0, n = 0.0, P = 88.0, N = 12.0, weight = 0.8238636363636364, pvalue = 1.4602409431760255e-05)
IF b = <2.92, 7.22) THEN class = {0} (p = 51.0, n = 0.0, P = 88.0, N = 12.0, weight = 0.7897727272727273, pvalue = 8.783500172506351e-05)
IF a = <2.80, inf) AND b = <2.92, inf) THEN class = {0} (p = 54.0, n = 0.0, P = 88.0, N = 12.0, weight = 0.8068181818181819, pvalue = 3.70428768673331e-05)
IF a = (-inf, 5.83) AND b = <8.75, inf) THEN class = {1} (p = 3.0, n = 0.0, P = 12.0, N = 88.0, weight = 0.625, pvalue = 0.0013605442176870739)
IF a = (-inf, 3.14) AND b = <8.14, inf) THEN class = {1} (p = 7.0, n = 0.0, P = 12.0, N = 88.0, weight = 0.7916666666666667, pvalue = 4.947661982330259e-08)
IF a = <4.31, inf) AND b = <2.50, 2.92) THEN class = {1} (p = 3.0, n = 0.0, P = 12.0, N = 88.0, weight = 0.625, pvalue = 0.0013605442176870739)


Complex rules
IF b = <2.92, 7.35) THEN class = {0} (p = 51.0, n = 0.0, P = 88.0, N =

# Complex case 2

In [50]:
df = pd.DataFrame(columns=["a", "b", "class"], index = [i for i in range(0,100)])


a = [round(random.uniform(0, 20),2) for i in range(100)]
b = [round(random.uniform(0, 9),2) for i in range(100)]

df["a"] = a
df["b"] = b

df.loc[(df["a"] <= 3) | ((df["a"] >=5) & (df["a"] <= 7) | ((df["a"] >=14))) ,"class"] = "1"
df = df.fillna("0")
df

Unnamed: 0,a,b,class
0,10.14,1.80,0
1,2.72,7.99,1
2,7.25,4.59,0
3,17.77,7.04,1
4,0.14,3.65,1
...,...,...,...
95,5.94,5.47,1
96,12.44,4.67,0
97,18.29,6.02,1
98,16.47,1.80,1


In [51]:
fig = px.scatter(df, x='a',
            y='b',
            color='class',
            height=560,
            width=560,
            )
fig.show()

In [41]:
X = df.drop(['class'], axis=1)
y = df['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF b = <8.51, inf) THEN class = {0} (p = 6.0, n = 0.0, P = 51.0, N = 49.0, weight = 0.5588235294117647, pvalue = 0.015107943241421364)
IF a = <7.02, 14.04) THEN class = {0} (p = 41.0, n = 0.0, P = 51.0, N = 49.0, weight = 0.9019607843137255, pvalue = 6.351875249532286e-19)
IF a = <2.38, 14.04) AND b = <2.05, inf) THEN class = {0} (p = 45.0, n = 8.0, P = 51.0, N = 49.0, weight = 0.6512491789168497, pvalue = 9.864764354530558e-14)
IF a = <14.04, inf) THEN class = {1} (p = 32.0, n = 0.0, P = 49.0, N = 51.0, weight = 0.8265306122448979, pvalue = 4.5445470408846627e-14)
IF a = <4.59, 8.10) AND b = (-inf, 2.35) THEN class = {1} (p = 5.0, n = 0.0, P = 49.0, N = 51.0, weight = 0.5510204081632653, pvalue = 0.025328022492970975)
IF a = <4.81, 7.02) THEN class = {1} (p = 13.0, n = 0.0, P = 49.0, N = 51.0, weight = 0.6326530612244898, pvalue = 3.693062572531082e-05)


Complex rules
IF a = <7.02, 14.39) THEN class = {0} (p = 41.0, n = 0.0, P = 51.0, N = 49.0, weight = 0.9019607843137

# Monk-1

In [43]:
train = pd.read_parquet("./data/monk_1/train.parquet")
test = pd.read_parquet("./data/monk_1/train.parquet")

In [44]:
train

Unnamed: 0,a1,a2,a3,a4,a5,a6,class
0,1,1,1,1,3,1,True
1,1,1,1,1,3,2,True
2,1,1,1,3,2,1,True
3,1,1,1,3,3,2,True
4,1,1,2,1,2,1,True
...,...,...,...,...,...,...,...
119,3,3,2,1,4,2,True
120,3,3,2,3,1,2,True
121,3,3,2,3,2,2,True
122,3,3,2,3,3,2,True


In [45]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a5 = {1} THEN class = {True} (p = 29.0, n = 0.0, P = 62.0, N = 62.0, weight = 0.7338709677419355, pvalue = 2.4856745128420078e-11)
IF a1 = {3} AND a2 = {3} THEN class = {True} (p = 17.0, n = 0.0, P = 62.0, N = 62.0, weight = 0.6370967741935484, pvalue = 2.142364554238474e-06)
IF a4 = {1} AND a6 = {1} THEN class = {True} (p = 15.0, n = 6.0, P = 62.0, N = 62.0, weight = 0.2661290322580645, pvalue = 0.02680801791894211)
IF a4 = {1} THEN class = {True} (p = 26.0, n = 16.0, P = 62.0, N = 62.0, weight = 0.16897081413210444, pvalue = 0.043548750924814826)
IF a3 = {1} AND a6 = {2} THEN class = {True} (p = 20.0, n = 14.0, P = 62.0, N = 62.0, weight = 0.11669829222011381, pvalue = 0.15710116246905934)
IF a6 = {1} THEN class = {True} (p = 29.0, n = 27.0, P = 62.0, N = 62.0, weight = 0.026209677419354767, pvalue = 0.42844949591782744)
IF a1 = {1} AND a2 = {3} AND a5 = {4} THEN class = {False} (p = 8.0, n = 0.0, P = 62.0, N = 62.0, weight = 0.564516129032258, pvalue = 0.0030712154

The Monk's Problems: Problem 1
Once upon a time, in July 1991, the monks of Corsendonk Priory were faced with a school held in their priory, namely the 2nd European Summer School on Machine Learning. After listening more than one week to a wide variety of learning algorithms, they felt rather confused: Which algorithm would be optimal? And which one to avoid? As a consequence of this dilemma, they created a simple task on which all learning algorithms ought to be compared: the three MONK's problems.

The target concept associated with the 1st Monk's problem is the binary outcome of the logical formula:
MONK-1: (a1 == a2) or (a5 == 1)

In this dataset, the original train and test sets were merged to allow other sampling procedures. However, the original train-test splits can be found as one of the OpenML tasks.

# Monk-2

In [46]:
train = pd.read_parquet("./data/monk_2/train.parquet")
test = pd.read_parquet("./data/monk_2/train.parquet")

In [47]:
train

Unnamed: 0,a1,a2,a3,a4,a5,a6,class
0,1,1,1,1,2,2,False
1,1,1,1,1,4,1,False
2,1,1,1,2,1,1,False
3,1,1,1,2,1,2,False
4,1,1,1,2,2,1,False
...,...,...,...,...,...,...,...
164,3,3,2,2,3,1,False
165,3,3,2,2,3,2,False
166,3,3,2,3,1,1,True
167,3,3,2,3,2,1,False


In [48]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a4 = {1} AND a5 = {1} AND a6 = {1} THEN class = {False} (p = 9.0, n = 0.0, P = 105.0, N = 64.0, weight = 0.5428571428571428, pvalue = 0.012043080252488826)
IF a4 = {1} AND a5 = {1} THEN class = {False} (p = 14.0, n = 1.0, P = 105.0, N = 64.0, weight = 0.4669097222222221, pvalue = 0.0060842019563005865)
IF a1 = {1} AND a3 = {1} AND a4 = {1} THEN class = {False} (p = 7.0, n = 0.0, P = 105.0, N = 64.0, weight = 0.5333333333333333, pvalue = 0.03304309058967097)
IF a3 = {1} AND a4 = {1} AND a6 = {1} THEN class = {False} (p = 11.0, n = 0.0, P = 105.0, N = 64.0, weight = 0.5523809523809524, pvalue = 0.004317330656552595)
IF a4 = {1} AND a6 = {1} THEN class = {False} (p = 24.0, n = 4.0, P = 105.0, N = 64.0, weight = 0.38255739795918375, pvalue = 0.0032296489767082346)
IF a2 = {2} AND a3 = {2} AND a4 = {1} AND a6 = {2} THEN class = {False} (p = 5.0, n = 0.0, P = 105.0, N = 64.0, weight = 0.5238095238095238, pvalue = 0.08922301996394814)
IF a2 = {1} AND a5 = {4} AND a6 = {1} TH

# Monk-3

In [49]:
train = pd.read_parquet("./data/monk_3/train.parquet")
test = pd.read_parquet("./data/monk_3/train.parquet")

In [50]:
train

Unnamed: 0,a1,a2,a3,a4,a5,a6,class
0,1,1,1,1,1,2,True
1,1,1,1,1,2,1,True
2,1,1,1,1,2,2,True
3,1,1,1,1,3,1,False
4,1,1,1,1,4,1,False
...,...,...,...,...,...,...,...
117,3,3,2,2,2,2,False
118,3,3,2,2,3,2,False
119,3,3,2,3,1,1,False
120,3,3,2,3,3,2,False


In [51]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF a2 = {2} AND a5 = {1} THEN class = {True} (p = 10.0, n = 0.0, P = 60.0, N = 62.0, weight = 0.5833333333333334, pvalue = 0.0005470406191995192)
IF a1 = {1} AND a2 = {2} AND a5 = {2} THEN class = {True} (p = 8.0, n = 0.0, P = 60.0, N = 62.0, weight = 0.5666666666666667, pvalue = 0.0026572312430347715)
IF a2 = {2} AND a3 = {2} AND a5 = {3} THEN class = {True} (p = 5.0, n = 0.0, P = 60.0, N = 62.0, weight = 0.5416666666666666, pvalue = 0.0263474580998908)
IF a2 = {2} AND a6 = {2} THEN class = {True} (p = 18.0, n = 4.0, P = 60.0, N = 62.0, weight = 0.4174486803519061, pvalue = 0.0006392617157850593)
IF a2 = {1} AND a5 = {1} THEN class = {True} (p = 12.0, n = 0.0, P = 60.0, N = 62.0, weight = 0.6, pvalue = 0.00010780642833323966)
IF a2 = {1} AND a5 = {2} THEN class = {True} (p = 9.0, n = 0.0, P = 60.0, N = 62.0, weight = 0.575, pvalue = 0.001212070391559723)
IF a2 = {1} AND a5 = {3} THEN class = {True} (p = 5.0, n = 1.0, P = 60.0, N = 62.0, weight = 0.36402329749103945, pva

# Trains

In [52]:
train = pd.read_csv("./data/trains/trains-transformed.csv")

In [53]:
train

Unnamed: 0,Number_of_cars,Number_of_different_loads,num_wheels_car_2,length_car_2,shape_car_2,num_loads_car_2,load_shape_car_2,num_wheels_car_3,length_car_3,shape_car_3,...,Rectangle_next_to_triangle,Rectangle_next_to_hexagon,Rectangle_next_to_circle,Triangle_next_to_triangle,Triangle_next_to_hexagon,Triangle_next_to_circle,Hexagon_next_to_hexagon,Hexagon_next_to_circle,Circle_next_to_circle,class
0,5,4,2,long,openrect,3,rectanglod,2,short,slopetop,...,1,0,0,0,1,0,0,1,0,east
1,4,3,2,short,ushaped,1,trianglod,2,short,opentrap,...,1,0,1,0,0,0,0,0,0,east
2,4,2,2,short,openrect,1,circlelod,2,short,hexagon,...,0,0,0,1,0,1,0,0,0,east
3,5,2,2,short,opentrap,1,trianglod,2,short,dblopnrect,...,1,0,0,1,0,0,0,0,0,east
4,4,3,2,short,dblopnrect,1,trianglod,3,long,closedrect,...,1,0,1,0,0,0,0,0,0,east
5,3,2,2,long,closedrect,3,circlelod,2,short,openrect,...,0,0,0,0,0,1,0,0,0,west
6,4,2,2,short,dblopnrect,1,circlelod,2,short,ushaped,...,0,0,0,0,0,1,0,0,0,west
7,3,2,3,long,closedrect,1,rectanglod,2,short,ushaped,...,0,0,1,0,0,0,0,0,0,west
8,5,2,2,short,opentrap,1,circlelod,2,long,jaggedtop,...,0,0,1,0,0,0,0,0,0,west
9,3,1,2,short,ushaped,1,rectanglod,2,long,openrect,...,0,0,0,0,0,0,0,0,0,west


In [54]:
train.dtypes

Number_of_cars                  int64
Number_of_different_loads       int64
num_wheels_car_2                int64
length_car_2                   object
shape_car_2                    object
num_loads_car_2                 int64
load_shape_car_2               object
num_wheels_car_3                int64
length_car_3                   object
shape_car_3                    object
num_loads_car_3                 int64
load_shape_car_3               object
num_wheels_car_4               object
length_car_4                   object
shape_car_4                    object
num_loads_car_4                object
load_shape_car_4               object
num_wheels_car_5               object
length_car_5                   object
shape_car_5                    object
num_loads_car_5                object
load_shape_car_5               object
Rectangle_next_to_rectangle     int64
Rectangle_next_to_triangle      int64
Rectangle_next_to_hexagon       int64
Rectangle_next_to_circle        int64
Triangle_nex

In [55]:
train= train.astype('str')

In [56]:
train.dtypes

Number_of_cars                 object
Number_of_different_loads      object
num_wheels_car_2               object
length_car_2                   object
shape_car_2                    object
num_loads_car_2                object
load_shape_car_2               object
num_wheels_car_3               object
length_car_3                   object
shape_car_3                    object
num_loads_car_3                object
load_shape_car_3               object
num_wheels_car_4               object
length_car_4                   object
shape_car_4                    object
num_loads_car_4                object
load_shape_car_4               object
num_wheels_car_5               object
length_car_5                   object
shape_car_5                    object
num_loads_car_5                object
load_shape_car_5               object
Rectangle_next_to_rectangle    object
Rectangle_next_to_triangle     object
Rectangle_next_to_hexagon      object
Rectangle_next_to_circle       object
Triangle_nex

In [57]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF Rectangle_next_to_triangle = {1} THEN class = {east} (p = 4.0, n = 0.0, P = 5.0, N = 5.0, weight = 0.9, pvalue = 0.023809523809523808)
IF shape_car_4 = {closedrect} THEN class = {east} (p = 3.0, n = 0.0, P = 5.0, N = 5.0, weight = 0.8, pvalue = 0.08333333333333333)
IF load_shape_car_4 = {-} THEN class = {west} (p = 4.0, n = 0.0, P = 5.0, N = 5.0, weight = 0.9, pvalue = 0.023809523809523808)
IF Triangle_next_to_triangle = {0} AND Rectangle_next_to_triangle = {0} THEN class = {west} (p = 5.0, n = 0.0, P = 5.0, N = 5.0, weight = 1.0, pvalue = 0.003968253968253968)


Complex rules
IF Triangle_next_to_triangle != Rectangle_next_to_triangle THEN class = {east} (p = 4.0, n = 0.0, P = 5.0, N = 5.0, weight = 0.9, pvalue = 0.023809523809523808)
IF Rectangle_next_to_triangle != {0} THEN class = {east} (p = 4.0, n = 0.0, P = 5.0, N = 5.0, weight = 0.9, pvalue = 0.023809523809523808)
IF Triangle_next_to_triangle = Hexagon_next_to_circle = Rectangle_next_to_triangle THEN class = {w

# balance-scale

In [58]:
train = pd.read_parquet("./data/balance-scale.parquet")

In [59]:
train

Unnamed: 0,left-weight,left-distance,right-weight,right-distance,class
0,1.0,1.0,1.0,1.0,B
1,1.0,1.0,1.0,2.0,R
2,1.0,1.0,1.0,3.0,R
3,1.0,1.0,1.0,4.0,R
4,1.0,1.0,1.0,5.0,R
...,...,...,...,...,...
620,5.0,5.0,5.0,1.0,L
621,5.0,5.0,5.0,2.0,L
622,5.0,5.0,5.0,3.0,L
623,5.0,5.0,5.0,4.0,L


In [60]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF right-distance = (-inf, 2.50) AND left-weight = (-inf, 2.50) AND left-distance = (-inf, 2.50) AND right-weight = (-inf, 2.50) THEN class = {B} (p = 6.0, n = 10.0, P = 49.0, N = 576.0, weight = 0.18061977395124715, pvalue = 0.0007499248952481466)
IF right-distance = (-inf, 2.50) AND left-weight = <1.50, 4.50) AND left-distance = (-inf, 2.50) AND right-weight = <1.50, 4.50) THEN class = {B} (p = 8.0, n = 28.0, P = 49.0, N = 576.0, weight = 0.0907679043839758, pvalue = 0.004434784254422619)
IF left-weight = <1.50, 4.50) AND right-distance = <1.50, 4.50) AND left-distance = (-inf, 2.50) AND right-weight = (-inf, 2.50) THEN class = {B} (p = 8.0, n = 28.0, P = 49.0, N = 576.0, weight = 0.0907679043839758, pvalue = 0.004434784254422619)
IF left-weight = (-inf, 2.50) AND right-distance = (-inf, 2.50) AND left-distance = <1.50, 4.50) AND right-weight = <1.50, 4.50) THEN class = {B} (p = 8.0, n = 28.0, P = 49.0, N = 576.0, weight = 0.0907679043839758, pvalue = 0.004434784254422

# flag

In [61]:
train = pd.read_parquet("./data/flag.parquet")

In [62]:
train

Unnamed: 0,landmass,botright,area,population,language,religion,bars,stripes,colours,red,...,saltires,quarters,sunstars,crescent,triangle,icon,animate,text,topleft,class
0,5,green,648.0,16.0,10,2,0.0,3.0,5.0,1,...,0.0,0.0,1.0,0,0,1,0,0,black,1
1,3,red,29.0,3.0,6,6,0.0,0.0,3.0,1,...,0.0,0.0,1.0,0,0,0,1,0,red,1
2,4,white,2388.0,20.0,8,2,2.0,0.0,3.0,1,...,0.0,0.0,1.0,1,0,0,0,0,green,1
3,6,red,0.0,0.0,1,1,0.0,0.0,5.0,1,...,0.0,0.0,0.0,0,1,1,1,0,blue,3
4,3,red,0.0,0.0,6,0,3.0,0.0,3.0,1,...,0.0,0.0,0.0,0,0,0,0,0,blue,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,6,red,3.0,0.0,1,1,0.0,0.0,3.0,1,...,0.0,1.0,5.0,0,0,0,0,0,blue,3
190,3,red,256.0,22.0,6,6,0.0,3.0,4.0,1,...,0.0,0.0,1.0,0,0,0,0,0,blue,1
191,4,green,905.0,28.0,10,5,0.0,0.0,4.0,1,...,0.0,0.0,0.0,0,0,1,1,0,green,2
192,4,brown,753.0,6.0,10,5,3.0,0.0,4.0,1,...,0.0,0.0,0.0,0,0,0,1,0,green,2


In [63]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF landmass = {5} THEN class = {1} (p = 39.0, n = 0.0, P = 91.0, N = 103.0, weight = 0.7142857142857143, pvalue = 6.039529644312067e-16)
IF language = {4} THEN class = {1} (p = 6.0, n = 0.0, P = 91.0, N = 103.0, weight = 0.532967032967033, pvalue = 0.009733415084710487)
IF stripes = (-inf, 4) AND religion = {6} THEN class = {1} (p = 13.0, n = 0.0, P = 91.0, N = 103.0, weight = 0.5714285714285714, pvalue = 3.2661270084870434e-05)
IF language = {8} THEN class = {1} (p = 17.0, n = 2.0, P = 91.0, N = 103.0, weight = 0.4757562315048262, pvalue = 6.731200021668368e-05)
IF circles = (-inf, 0.50) AND landmass = {3} AND population = <3.50, 37) THEN class = {1} (p = 16.0, n = 0.0, P = 91.0, N = 103.0, weight = 0.5879120879120879, pvalue = 2.556394521411761e-06)
IF orange = {0} AND circles = (-inf, 0.50) AND landmass = {3} AND population = (-inf, 37) AND religion = {0} THEN class = {1} (p = 9.0, n = 0.0, P = 91.0, N = 103.0, weight = 0.5494505494505495, pvalue = 0.00088212300558693

# mushroom

In [64]:
train = pd.read_parquet("./data/mushroom.parquet")

In [65]:
train

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises?,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,class
0,x,s,n,t,p,f,c,n,k,e,...,w,w,p,w,o,p,k,s,u,p
1,x,s,y,t,a,f,c,b,k,e,...,w,w,p,w,o,p,n,n,g,e
2,b,s,w,t,l,f,c,b,n,e,...,w,w,p,w,o,p,n,n,m,e
3,x,y,w,t,p,f,c,n,n,e,...,w,w,p,w,o,p,k,s,u,p
4,x,s,g,f,n,f,w,b,k,t,...,w,w,p,w,o,e,n,a,g,e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,k,s,n,f,n,a,c,b,y,e,...,o,o,p,o,o,p,b,c,l,e
8120,x,s,n,f,n,a,c,b,y,e,...,o,o,p,n,o,p,b,v,l,e
8121,f,s,n,f,n,a,c,b,n,e,...,o,o,p,o,o,p,b,c,l,e
8122,k,y,n,f,y,f,c,n,b,t,...,w,w,p,w,o,e,w,v,l,p


In [66]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF odor = {f} THEN class = {p} (p = 2160.0, n = 0.0, P = 3916.0, N = 4208.0, weight = 0.7757916241062308, pvalue = 0.0)
IF gill-color = {b} THEN class = {p} (p = 1728.0, n = 0.0, P = 3916.0, N = 4208.0, weight = 0.7206332992849847, pvalue = 0.0)
IF stalk-surface-above-ring = {k} AND gill-spacing = {c} THEN class = {p} (p = 2228.0, n = 0.0, P = 3916.0, N = 4208.0, weight = 0.7844739530132789, pvalue = 0.0)
IF spore-print-color = {w} AND ring-type = {e} AND bruises? = {f} THEN class = {p} (p = 1768.0, n = 48.0, P = 3916.0, N = 4208.0, weight = 0.688706522163333, pvalue = 0.0)
IF spore-print-color = {w} AND ring-number = {o} THEN class = {p} (p = 1776.0, n = 48.0, P = 3916.0, N = 4208.0, weight = 0.6898385072923222, pvalue = 0.0)
IF bruises? = {f} AND gill-spacing = {c} AND ring-number = {o} AND veil-color = {w} THEN class = {p} (p = 3152.0, n = 144.0, P = 3916.0, N = 4208.0, weight = 0.8263324354430979, pvalue = 0.0)
IF gill-size = {n} AND gill-spacing = {c} AND population

# tic-tac-toe

In [67]:
train = pd.read_parquet("./data/tic-tac-toe.parquet")

In [68]:
train

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,class
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
...,...,...,...,...,...,...,...,...,...,...
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative


In [69]:
X = train.drop(['class'], axis=1)
y = train['class']

clf_simple.fit(X, y)

print("Rules original")
for rule in clf_simple.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules")
clf_complex.fit(X, y)
for rule in clf_complex.model.rules:
    print(rule, rule.stats)

print("\n\nComplex rules + inners alternatives")
clf_complex_with_inners.fit(X, y)
for rule in clf_complex_with_inners.model.rules:
    print(rule, rule.stats)

Rules original
IF middle-middle-square = {x} AND bottom-left-square = {b} AND top-right-square = {b} THEN class = {positive} (p = 30.0, n = 0.0, P = 626.0, N = 332.0, weight = 0.5239616613418531, pvalue = 2.232953092174273e-06)
IF middle-middle-square = {x} AND bottom-left-square = {b} AND top-right-square = {x} THEN class = {positive} (p = 20.0, n = 0.0, P = 626.0, N = 332.0, weight = 0.5159744408945687, pvalue = 0.00018100703894850716)
IF middle-middle-square = {x} AND bottom-middle-square = {o} AND bottom-right-square = {x} AND bottom-left-square = {b} THEN class = {positive} (p = 22.0, n = 0.0, P = 626.0, N = 332.0, weight = 0.5175718849840255, pvalue = 7.550592519529088e-05)
IF middle-middle-square = {x} AND bottom-middle-square = {o} AND bottom-left-square = {b} AND middle-right-square = {x} THEN class = {positive} (p = 22.0, n = 0.0, P = 626.0, N = 332.0, weight = 0.5175718849840255, pvalue = 7.550592519529088e-05)
IF middle-middle-square = {x} AND top-left-square = {x} AND midd