# FuzzyCocoPython quickstart

This notebook gives a compact tour of the scikit-learn style API exposed by `fuzzycocopython`.
We train a classifier, a regressor, inspect generated fuzzy rules, and show how to persist a model.

We will:
- fit a `FuzzyCocoClassifier` on the Iris dataset and review its fuzzy knowledge base
- fit a `FuzzyCocoRegressor` on the Diabetes dataset and inspect predictions
- compute rule activation statistics
- save and reload a trained estimator

In [1]:
from pathlib import Path

import pandas as pd
from sklearn.datasets import load_diabetes, load_iris
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split

from fuzzycocopython import FuzzyCocoClassifier, FuzzyCocoRegressor

RANDOM_STATE = 0

## Classification example

In [2]:
iris = load_iris(as_frame=True)
X_iris = iris.data
y_iris = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X_iris,
    y_iris,
    test_size=0.3,
    stratify=y_iris,
    random_state=RANDOM_STATE,
)

X_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
144,6.7,3.3,5.7,2.5
117,7.7,3.8,6.7,2.2
115,6.4,3.2,5.3,2.3
22,4.6,3.6,1.0,0.2
28,5.2,3.4,1.4,0.2


In [3]:
clf = FuzzyCocoClassifier(nb_rules=10, input_vars_params__nb_sets=9, nb_max_var_per_rule=12, random_state=RANDOM_STATE)
clf.fit(X_train, y_train)

0,1,2
,params,
,random_state,0
,params_overrides,"{'global_params': {'nb_max_var_per_rule': 12, 'nb_rules': 10}, 'input_vars_params': {'nb_sets': 9}}"


In [4]:
iris_predictions = pd.DataFrame({
    "y_true": y_test.reset_index(drop=True),
    "y_pred": clf.predict(X_test),
})

print(f"Accuracy: {clf.score(X_test, y_test):.3f}")
iris_predictions.head()

Accuracy: 0.867


Unnamed: 0,y_true,y_pred
0,2,2
1,2,1
2,0,0
3,0,0
4,1,1


### Inspect the fuzzy system

In [5]:
# Each row corresponds to a fuzzy rule described by the engine
clf.rules_df_.head()

Unnamed: 0,rule,role,io,var,label,position,orig_set
0,default,default,output,target,Low,0.305882,target.1
1,rule1,antecedent,input,petal length (cm),Set 2,2.966667,petal length (cm).2
2,rule1,antecedent,input,sepal length (cm),Set 3,5.330588,sepal length (cm).3
3,rule1,consequent,output,target,High,1.709804,target.3
4,rule2,antecedent,input,petal width (cm),Set 9,2.095294,petal width (cm).9


In [6]:
rule_stats = clf.rules_stat_activations(X_test)
rule_stats.head()


Unnamed: 0,mean,std,min,max,usage_rate,usage_rate_pct,importance_pct,impact_pct
"IF (petal width (cm) is Set 9), THEN (target is High)",0.429136,0.371705,0.0,1.0,0.666667,66.666667,62.423027,41.615351
"IF (sepal length (cm) is Set 7), THEN (target is Medium)",0.047246,0.186171,0.0,0.86859,0.066667,6.666667,2.955459,0.197031
"IF (sepal length (cm) is Set 3 AND_min petal length (cm) is Set 2), THEN (target is High)",0.004614,0.030605,0.0,0.207627,0.022222,2.222222,1.288181,0.028626
"IF (petal width (cm) is Set 3), THEN (target is High)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"IF (sepal length (cm) is Set 8 AND_min sepal width (cm) is Set 2), THEN (target is Low)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# simply look at the learned rules
clf.rules_, clf.default_rules_

([IF (sepal length (cm) is Set 3 AND_min petal length (cm) is Set 2), THEN (target is High),
  IF (petal width (cm) is Set 9), THEN (target is High),
  IF (sepal length (cm) is Set 7), THEN (target is Medium),
  IF (petal width (cm) is Set 3), THEN (target is High),
  IF (sepal length (cm) is Set 8 AND_min sepal width (cm) is Set 2), THEN (target is Low)],
 [ELSE (target is Low)])

## Regression example

In [8]:
diabetes = load_diabetes(as_frame=True)
X_reg = diabetes.data
y_reg = diabetes.target

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg,
    y_reg,
    test_size=0.25,
    random_state=RANDOM_STATE,
)

X_train_reg.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
20,-0.049105,-0.044642,-0.056863,-0.043542,-0.045599,-0.043276,0.000779,-0.039493,-0.011897,0.015491
353,-0.052738,-0.044642,-0.055785,-0.036656,0.089244,-0.003193,0.008142,0.034309,0.132376,0.003064
281,-0.092695,0.05068,-0.090275,-0.057313,-0.02496,-0.030437,-0.006584,-0.002592,0.024055,0.003064
14,0.045341,-0.044642,-0.025607,-0.012556,0.017694,-6.1e-05,0.081775,-0.039493,-0.031988,-0.075636
300,0.016281,-0.044642,0.073552,-0.041235,-0.004321,-0.013527,-0.013948,-0.001116,0.042897,0.044485


In [9]:
reg = FuzzyCocoRegressor(random_state=RANDOM_STATE)
reg.fit(X_train_reg, y_train_reg)

0,1,2
,params,
,random_state,0.0
,params_overrides,


In [10]:
reg_predictions = reg.predict(X_test_reg)
print(f"R^2 score: {reg.score(X_test_reg, y_test_reg):.3f}")
print(f"RMSE: {root_mean_squared_error(y_test_reg, reg_predictions):.2f}")

pd.DataFrame({
    "y_true": y_test_reg.reset_index(drop=True),
    "y_pred": reg_predictions,
}).head()

R^2 score: -2.268
RMSE: 127.37


Unnamed: 0,y_true,y_pred
0,321.0,156.085139
1,215.0,30.035294
2,127.0,174.684092
3,64.0,104.008071
4,175.0,107.727861


In [11]:
reg.rules_df_.head()

Unnamed: 0,rule,role,io,var,label,position,orig_set
0,default,default,output,target,High,239.0,target.3
1,rule1,antecedent,input,s1,High,0.010658,s1.3
2,rule1,antecedent,input,s5,High,-0.052772,s5.3
3,rule1,consequent,output,target,Low,30.035294,target.1


## Persisting a trained model

In [12]:
artifact = Path("fuzzycoco_classifier.pkl")
clf.save(artifact)
loaded_clf = FuzzyCocoClassifier.load(artifact)

print("Predictions from the reloaded model:")
print(loaded_clf.predict(X_test.iloc[:3]))

artifact.unlink(missing_ok=True)

Predictions from the reloaded model:
[2 1 0]


In [13]:
loaded_clf.predict(X_test.iloc[:3])

array([2, 1, 0])

In [14]:
loaded_clf.rules_df_.head()

Unnamed: 0,rule,role,io,var,label,position,orig_set
0,default,default,output,target,Low,0.305882,target.1
1,rule1,antecedent,input,petal length (cm),Set 2,2.966667,petal length (cm).2
2,rule1,antecedent,input,sepal length (cm),Set 3,5.330588,sepal length (cm).3
3,rule1,consequent,output,target,High,1.709804,target.3
4,rule2,antecedent,input,petal width (cm),Set 9,2.095294,petal width (cm).9


In [16]:
loaded_clf

0,1,2
,params,
,random_state,0
,params_overrides,"{'global_params': {'nb_max_var_per_rule': 12, 'nb_rules': 10}, 'input_vars_params': {'nb_sets': 9}}"


In [15]:
clf

0,1,2
,params,
,random_state,0
,params_overrides,"{'global_params': {'nb_max_var_per_rule': 12, 'nb_rules': 10}, 'input_vars_params': {'nb_sets': 9}}"
