In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from h1st.model.predictive_model import PredictiveModel
import pandas as pd

class RuleModel(PredictiveModel):
    sepal_length_max: float = 5.3
    sepal_length_min: float = 3.7
    sepal_width_max: float = 4.4
    sepal_width_min: float = 3
    
    def predict(self, data):
        df = data['X']
        return {'predictions': pd.DataFrame(
            map(self.predict_setosa, df['sepal_length'], df['sepal_width']), 
            columns=['setosa'])}

    def predict_setosa(self, sepal_length, sepal_width):
        return 1 if (self.sepal_length_min <= sepal_length <= self.sepal_length_max) \
                  & (self.sepal_width_min <= sepal_width <= self.sepal_width_max) \
               else 0


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [4]:
import numpy as np
from h1st.model.fuzzy import (
    FuzzyVariables,
    FuzzyMembership as fm,
    FuzzyRules,
    FuzzyModeler
)

def get_metadata(data):
    res = {}
    for k, v in data.max().to_dict().items():
        res[k] = {'max': v}
    for k, v in data.min().to_dict().items():
        res[k].update({'min': v})    
    return res

def create_fuzzy_model(data):
    metadata = get_metadata(data)
    fuzzy_vars = FuzzyVariables()
    fuzzy_vars.add(
        var_name='sepal_length',
        var_type='antecedent',
        var_range=np.arange(
            metadata['sepal_length']['min'], 
            metadata['sepal_length']['max'], 
            0.1
        ),
        membership_funcs=[('small', fm.SIGMOID, [5.3, -2.5]),
                            ('large', fm.SIGMOID, [5.3, 2.5])]
    )
    fuzzy_vars.add(
        var_name='sepal_width',
        var_type='antecedent',
        var_range=np.arange(
            metadata['sepal_width']['min'], 
            metadata['sepal_width']['max'], 
            0.1
        ),
        membership_funcs=[('small', fm.SIGMOID, [3.1, -2.0]),
                            ('large', fm.SIGMOID, [3.1, 2.0])]
    )
    fuzzy_vars.add(
        var_name='setosa',
        var_type='consequent',
        var_range=np.arange(0, 1+1e-5, 0.1),
        membership_funcs=[('false', fm.GAUSSIAN, [0, 0.5]),
                        ('true', fm.GAUSSIAN, [1, 0.5])]
    )

    fuzzy_rule = FuzzyRules()
    fuzzy_rule.add(
        'rule1',
        if_term=fuzzy_vars.get('sepal_length')['small'] & fuzzy_vars.get('sepal_width')['large'],
        then_term=fuzzy_vars.get('setosa')['true']
    )
    fuzzy_rule.add(
        'rule2',
        if_term=fuzzy_vars.get('sepal_length')['large'] & fuzzy_vars.get('sepal_width')['small'],
        then_term=fuzzy_vars.get('setosa')['false']
    )

    modeler = FuzzyModeler()
    teacher = modeler.build_model(fuzzy_vars, fuzzy_rule)
    return teacher

In [5]:
from sklearn import datasets

In [6]:
df_raw = datasets.load_iris(as_frame=True).frame
df_raw.columns = [
    "sepal_length",
    "sepal_width",
    "petal_length",
    "petal_width",
    "setosa",
]
df_raw = df_raw[[
    "sepal_length",
    "sepal_width",
    "setosa"]]
df_raw["setosa"] = df_raw["setosa"].apply(lambda x: 1 if x == 0 else 0)

In [7]:

df_raw["setosa"].value_counts()

0    100
1     50
Name: setosa, dtype: int64

In [8]:
from collections import defaultdict
from numpy import array
from sklearn.model_selection import KFold
from h1st.model.oracle import OracleModeler
from h1st.model.oracle.ensembler_modelers import MLPEnsembleModeler
from loguru import logger

# prepare cross validation
kfold = KFold(n_splits=3, shuffle=True, random_state=1)

all_metrics = defaultdict(lambda: defaultdict(list))
all_index = defaultdict(lambda: defaultdict(list))

# 1. build rule-based model
bool_teacher = RuleModel()
fuzzy_teacher = create_fuzzy_model(df_raw)

# enumerate splits
for no, data_idx in enumerate(kfold.split(df_raw)):
    train, test = data_idx
    # 2. prepare data
    df_train, df_test = df_raw.iloc[train], df_raw.iloc[test]
    input_data = {
        "unlabeled_data": df_train[["sepal_length", "sepal_width"]],
        "labeled_data": {
            "X_train": df_train[["sepal_length", "sepal_width"]],
            "y_train": df_train[["setosa"]],
            "X_test": df_test[["sepal_length", "sepal_width"]],
            "y_test": df_test[["setosa"]],
        },
    }

    # 3. build oracle
    # 3.1 bool
    modeler = OracleModeler()
    oracle_with_bool = modeler.build_model(
        data=input_data,
        teacher_model=bool_teacher
    )
    
    # 3.2 fuzzy
    fuzzy_thresholds = {'setosa': 0.55}
    oracle_with_fuzzy = modeler.build_model(
        data=input_data,
        teacher_model=fuzzy_teacher,
        fuzzy_thresholds=fuzzy_thresholds)

    # 3.3 bool + ml_ensemble
    oracle_with_bool_ml = modeler.build_model(
        data=input_data, 
        teacher_model=bool_teacher,
        ensembler_modeler=MLPEnsembleModeler)

    # 3.4 fuzzy + ml_ensemble
    fuzzy_thresholds = {'setosa': 0.55}
    oracle_with_fuzzy_ml = modeler.build_model(
        data=input_data, 
        teacher_model=fuzzy_teacher,
        fuzzy_thresholds=fuzzy_thresholds,
        ensembler_modeler=MLPEnsembleModeler) 

    # 3.5 bool + ml_ensemble + x
    oracle_with_bool_ml_x = modeler.build_model(
        data=input_data, 
        teacher_model=bool_teacher,
        ensembler_modeler=MLPEnsembleModeler,
        inject_x_in_ensembler=True)

    # # 3.6 fuzzy + ml_ensemble + x
    fuzzy_thresholds = {'setosa': 0.55}
    oracle_with_fuzzy_ml_x = modeler.build_model(
        data=input_data, 
        teacher_model=fuzzy_teacher,
        fuzzy_thresholds=fuzzy_thresholds,
        ensembler_modeler=MLPEnsembleModeler,
        inject_x_in_ensembler=True) 

    model_map = {
        "oracle_with_bool": oracle_with_bool,
        "oracle_with_fuzzy": oracle_with_fuzzy,
        "oracle_with_bool_ml": oracle_with_bool_ml,
        "oracle_with_fuzzy_ml": oracle_with_fuzzy_ml,
        "oracle_with_bool_ml_x": oracle_with_bool_ml_x,
        "oracle_with_fuzzy_ml_x": oracle_with_fuzzy_ml_x,        
    }

    # 4. collect evaluation results
    for name, oracle in model_map.items():
        logger.info(oracle.metrics)
        for metrics in ['f1_score', 'precision', 'recall']:
            temp = oracle.metrics[metrics]['setosa']
            s1, s2 = temp.pop('students')
            temp.update({'student1': s1, 'student2': s2})
            all_metrics[metrics][name].append(temp)
            all_index[metrics][name].append(f'{name}_{no}')

2022-10-08 16:56:33.953 | INFO     | h1st.model.oracle.oracle_modeler:build_model:142 - Evaluated all sub models successfully.
2022-10-08 16:56:34.220 | INFO     | h1st.model.oracle.oracle_modeler:build_model:142 - Evaluated all sub models successfully.
X has feature names, but StandardScaler was fitted without feature names
X has feature names, but StandardScaler was fitted without feature names
X has feature names, but StandardScaler was fitted without feature names
2022-10-08 16:56:34.559 | INFO     | h1st.model.oracle.oracle_modeler:build_model:142 - Evaluated all sub models successfully.
X has feature names, but StandardScaler was fitted without feature names
X has feature names, but StandardScaler was fitted without feature names
X has feature names, but StandardScaler was fitted without feature names
2022-10-08 16:56:34.895 | INFO     | h1st.model.oracle.oracle_modeler:build_model:142 - Evaluated all sub models successfully.
2022-10-08 16:56:35.208 | INFO     | h1st.model.oracle

In [9]:
all_avg_metrics = []
oracle_names = []
for metrics_type in ['f1_score', 'precision', 'recall']:
    for oracle_name, metrics in all_metrics[metrics_type].items():
        df_metrics = pd.DataFrame(metrics)
        # print(metrics_type, oracle_name, df_metrics.mean().values)
        all_avg_metrics.append(
            [metrics_type] + list(df_metrics.mean().values)
        )
        oracle_names.append(oracle_name)

final_metrics = pd.DataFrame(
    all_avg_metrics,
    columns=['metrics_type', 'teacher', 'ensemblers', 'student1', 'student2'],
    index=oracle_names
)

final_metrics

Unnamed: 0,metrics_type,teacher,ensemblers,student1,student2
oracle_with_bool,f1_score,0.863713,0.87436,0.87436,0.935007
oracle_with_fuzzy,f1_score,0.866383,0.887653,0.887653,0.946387
oracle_with_bool_ml,f1_score,0.863713,1.0,0.87436,0.935007
oracle_with_fuzzy_ml,f1_score,0.866383,0.983333,0.887653,0.946387
oracle_with_bool_ml_x,f1_score,0.863713,0.99099,0.87436,0.935007
oracle_with_fuzzy_ml_x,f1_score,0.866383,0.99099,0.887653,0.946387
oracle_with_bool,precision,1.0,1.0,1.0,1.0
oracle_with_fuzzy,precision,1.0,1.0,1.0,1.0
oracle_with_bool_ml,precision,1.0,1.0,1.0,1.0
oracle_with_fuzzy_ml,precision,1.0,0.968253,1.0,1.0


In [10]:
final_metrics.to_csv('iris_eval_results.csv')

In [11]:
import plotly.graph_objects as go
#adding figure for Length/Width visualization
fig1=go.Figure()
#adding traces
fig1.add_trace(go.Scatter(x=df_raw[df_raw.setosa == 1].sepal_width, y=df_raw[df_raw.setosa == 1].sepal_length, name='Setosa',
                        mode='markers', marker_color='rgb(52, 152, 219)',
                        marker_symbol='star-triangle-up', marker_opacity=0.8,
                        hovertemplate="<b>SepalWidthCm:</b> %{x} <br><b>SepalLengthCm:</b> %{y}"))
fig1.add_trace(go.Scatter(x=df_raw[df_raw.setosa != 1].sepal_width, y=df_raw[df_raw.setosa != 1].sepal_length, name='Non-Setosa',
                        mode='markers', marker_color='rgb(170, 128, 255)',
                        marker_symbol='hexagram', marker_opacity=0.8,
                        hovertemplate="<b>SepalWidthCm:</b> %{x} <br><b>SepalLengthCm:</b> %{y}"))

#customizing figure
fig1.update_traces(mode='markers', marker_line_width=1.5, marker_size=12)
fig1.update_layout(template='plotly_white', xaxis=dict(title_text='SepalWidthCm', title_standoff=10),
                   yaxis=dict(title_text='SepalLengthCm', title_standoff=10),
                   title_text='Sepal Length/Width', title_x=0.5)
fig1.update_xaxes(showline=True, linewidth=3, linecolor='black',
                  showspikes=True, spikecolor='red', spikethickness=2)
fig1.update_yaxes(showline=True, linewidth=3, linecolor='black',
                  showspikes=True, spikecolor='red', spikethickness=2)
#showing figure
fig1.show()