In [15]:
!pip install git+https://github.com/scikit-learn-contrib/skope-rules.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/scikit-learn-contrib/skope-rules.git
  Cloning https://github.com/scikit-learn-contrib/skope-rules.git to /tmp/pip-req-build-kskwnt1u
  Running command git clone --filter=blob:none --quiet https://github.com/scikit-learn-contrib/skope-rules.git /tmp/pip-req-build-kskwnt1u
  Resolved https://github.com/scikit-learn-contrib/skope-rules.git to commit 35553e98558c728e063c4d4dab3941ce8f6e3609
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [25]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from skrules import SkopeRules
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()



# let's remove the column "id" from the dataset, as it will not be used in classification
data = data.drop(["id"],axis=1)
data

# save the output class, "diagnosis", in y_data
y_data=data["diagnosis"]
# save the features in a new dataframe (datafeatures)
datafeatures = data.drop(["diagnosis"],axis=1,inplace=False)
datafeatures

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# standardize the dataset using z-score transformation
scaler = StandardScaler(copy=False)
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)




# Train a Decision Tree model
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

# Train a Bagging of Decision Trees model
bagging = BaggingClassifier(DecisionTreeClassifier(random_state=42), n_estimators=100, random_state=42)
bagging.fit(X_train, y_train)

# Train a Random Forests model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Train a Skope-Rules model
rules = SkopeRules(max_depth_duplication=2)
rules.fit(X_train, y_train)

# Evaluate the performance of the models on the testing set
y_pred_dt = dt.predict(X_test)
y_pred_bagging = bagging.predict(X_test)
y_pred_rf = rf.predict(X_test)
y_pred_rules = rules.predict(X_test)

acc_dt = accuracy_score(y_test, y_pred_dt)
acc_bagging = accuracy_score(y_test, y_pred_bagging)
acc_rf = accuracy_score(y_test, y_pred_rf)
acc_rules = accuracy_score(y_test, y_pred_rules)

print(f"Decision Tree accuracy: {acc_dt:.3f}")
print(f"Bagging of Decision Trees accuracy: {acc_bagging:.3f}")
print(f"Random Forests accuracy: {acc_rf:.3f}")
print(f"Skope-Rules accuracy: {acc_rules:.3f}")

NameError: ignored

In [20]:
# Visualize the decision boundaries for the Random Forests model
import matplotlib.pyplot as plt
import numpy as np

xx, yy = np.meshgrid(np.arange(4, 8, 0.01), np.arange(1.5, 5, 0.01))
Z = rf.predict(np.c_[xx.ravel(), yy.ravel()].reshape(xx.shape[0]*xx.shape[1], -1))
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X_test[:, 2], X_test[:, 3], c=y_test, alpha=0.8)
plt.xlabel(iris.feature_names[2])
plt.ylabel(iris.feature_names[3])
plt.show()

# Plot the feature importance scores for the Random Forests model
from sklearn.inspection import permutation_importance

result = permutation_importance(rf, X_test, y_test, n_repeats=10, random_state=42)
sorted_idx = result.importances_mean.argsort()

plt.barh(range(4), result.importances_mean[sorted_idx])
plt.yticks(range(4), np.array(iris.feature_names)[sorted_idx])
plt.xlabel("Importance score")
plt.show()


SyntaxError: ignored