In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df_all = pd.read_csv("../data/TwoBarTruss.csv", delimiter=';')

In [2]:
df_all

Unnamed: 0,Volume,MaxStress,x1,x2,y,g1,Feasible
0,0.021916,18377.203009,0.002354,0.004815,2.115106,-81622.796991,1
1,0.031101,12991.022813,0.003331,0.006803,2.129376,-87008.977187,1
2,0.008704,46119.773223,0.000961,0.001948,2.022885,-53880.226777,1
3,0.038300,10514.420370,0.004061,0.008395,2.145074,-89485.579630,1
4,0.037292,10759.938300,0.004219,0.008331,1.977927,-89240.061700,1
...,...,...,...,...,...,...,...
19995,0.029792,90743.229458,0.006511,0.001055,1.523667,-9256.770542,1
19996,0.016515,29517.749139,0.001716,0.004547,1.719528,-70482.250861,1
19997,0.010111,73875.304147,0.001916,0.001476,1.079235,-26124.695853,1
19998,0.025331,62938.337736,0.004819,0.001398,2.187591,-37061.662264,1


In [3]:
variable_names = ["x1", "x2", "y"]

feasible = df_all["Feasible"].values == 1
fs = df_all[["Volume", "MaxStress"]].loc[feasible].copy().reset_index(drop=True) # to not have gaps in indices
xs = df_all[variable_names].loc[feasible].copy().reset_index(drop=True)

print(fs.head())
print(xs.head())

     Volume     MaxStress
0  0.021916  18377.203009
1  0.031101  12991.022813
2  0.008704  46119.773223
3  0.038300  10514.420370
4  0.037292  10759.938300
         x1        x2         y
0  0.002354  0.004815  2.115106
1  0.003331  0.006803  2.129376
2  0.000961  0.001948  2.022885
3  0.004061  0.008395  2.145074
4  0.004219  0.008331  1.977927


In [4]:
""" Category 1 -- Knee: MaxStress <= 50000 & Volume <= 0.02
    Category 2 -- LeftExtreme: MaxStress >= 70000 & Volume <= 0.015
    Category 3 -- Volume >= 0.055
    Category 4 -- MaxStress >= 50000 & Volume >= 0.04
"""

# Category 1
c1_mask = np.logical_and(fs["MaxStress"] <= 50000, fs["Volume"] <= 0.02)
# Category 2
c2_mask = np.logical_and(fs["MaxStress"] >= 70000, fs["Volume"] <= 0.015)
# Category 3
c3_mask = fs["Volume"] >= 0.055
# Category 4
c4_mask = np.logical_and(fs["MaxStress"] >= 50000, fs["Volume"] >= 0.04)

xs.loc[c1_mask, "category"] = 1
xs.loc[c2_mask, "category"] = 2
xs.loc[c3_mask, "category"] = 3
xs.loc[c4_mask, "category"] = 4

xs_clean = xs[~xs["category"].isnull()]
xs_clean.head()

Unnamed: 0,x1,x2,y,category
2,0.000961,0.001948,2.022885,1.0
8,0.001646,0.003333,1.957039,1.0
15,0.001044,0.002255,2.322141,1.0
20,0.001953,0.004115,2.179401,1.0
21,0.001953,0.004115,2.179401,1.0


## Class 1 vs others

In [10]:

xs.loc[c1_mask, "category"] = 1
xs.loc[c2_mask, "category"] = 0
xs.loc[c3_mask, "category"] = 0
xs.loc[c4_mask, "category"] = 0

xs_clean = xs[~xs["category"].isnull()]

### Decision tree

In [None]:
from sklearn import tree
Classifier = tree.DecisionTreeClassifier
import graphviz

classifier = Classifier(max_leaf_nodes=8)
classifier.fit(xs_clean[variable_names], xs_clean["category"])

dot_data = tree.export_graphviz(classifier, out_file=None, 
                     feature_names=["x1", "x2", "y"],  
                     class_names={0: "Other", 1: "Knee"},  
                     filled=True, rounded=False, precision=5,  
                     special_characters=False)  
graph = graphviz.Source(dot_data)  
graph 

### Skope rules

In [15]:
from imodels import RuleFitClassifier as Classifier

classifier = Classifier()

classifier.fit(X=xs_clean[variable_names], y=xs_clean["category"])

rules = classifier._get_rules()
rules = rules[rules.coef != 0].sort_values("support", ascending=False)
    # 'rule' is howthe feature is constructed
    # 'coef' is its weight in the final linear model
    # 'support' is the fraction of points it applies to
display(rules[['rule', 'coef', 'support']].style.background_gradient(cmap='viridis'))

Unnamed: 0,rule,coef,support
2,y,-0.085122,1.0
10,x1 > 0.00475,-0.217411,0.510456
3,x1 > 0.00461,-0.001478,0.510456
15,x1 > 0.00446,-0.000304,0.510456
14,x1 > 0.0049,-1.34906,0.510456
13,x1 > 0.00451,-0.001021,0.510456
12,x1 > 0.0048,-0.000522,0.510456
11,x1 > 0.00492,-6e-06,0.510456
16,x1 > 0.00489,-0.000983,0.510456
9,x1 > 0.00458,-0.000511,0.510456


### Skope rules

In [20]:
from imodels import SkopeRulesClassifier as Classifier

classifier = Classifier()

classifier.fit(X=xs_clean[variable_names], y=xs_clean["category"])
classifier.rules_
rules = classifier.rules_

def show_rules(rules) -> None:
    print("Rule -> Accuracy:")
    for rule in rules:
        print(f"{rule} --> {rule.args[0]}")
        
show_rules(rules)



Rule -> Accuracy:
x1 <= 0.00448 and x1 > 0.00075 and x2 > 0.00169 --> 0.9748097950024214
x1 <= 0.00451 and x1 > 0.00075 and x2 > 0.00172 --> 0.9714680466237942
x1 <= 0.00439 and x1 > 0.00075 and y > 1.09807 --> 0.8906752411575563
x2 <= 0.00499 and x2 > 0.00177 --> 0.793939393939394
x2 <= 0.00578 and x2 > 0.00173 and y <= 2.48097 --> 0.8556701030927835
x2 <= 0.00572 and x2 > 0.00176 and y <= 2.48097 --> 0.8127090301003345
x2 <= 0.00572 and x2 > 0.00176 and y > 2.48097 --> 0.6506024096385542
x2 <= 0.00578 and x2 > 0.00173 and y > 2.48097 --> 0.5802469135802469
x2 <= 0.00584 and x2 > 0.00499 --> 0.5263157894736842
x1 <= 0.00439 and x1 > 0.00075 and y <= 1.09807 --> 0.84
x2 <= 0.00804 and x2 > 0.00572 and y <= 1.71149 --> 0.65625
x2 <= 0.00884 and x2 > 0.00578 and y <= 1.49533 --> 0.8823529411764706
x2 <= 0.00857 and x2 > 0.00584 and y <= 1.49533 --> 0.6428571428571429


In [32]:
from imodels import BayesianRuleSetClassifier as Classifier

classifier = Classifier()

classifier.fit(X=xs_clean[variable_names], y=xs_clean["category"], feature_names=variable_names)

MemoryError: Unable to allocate 7.67 GiB for an array with shape (259254, 3969) and data type float64

In [31]:
classifier.get_params()

{'n_estimators': 10}