In [5]:
import functools
import itertools
import math
import os
import statistics
import typing as t
from pathlib import Path

import kaggle_toolbox.features.generation as features
import kaggle_toolbox.nlp.features as text_features
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from kaggle_toolbox.environment import Environment
from kaggle_toolbox.features.transform import contiguous_to_categorical
from kaggle_toolbox.path import format_path
from kaggle_toolbox.prediction import PredDict
from kaggle_toolbox.progress import NotebookProgressBar
from kaggle_toolbox.trainer import train_kfold_model
from kaggle_toolbox.typing import ensure_list
from kaggle_toolbox.validation import analyze_val_strategy, build_fold_result_df
from sklearn.metrics import f1_score
from textstat import textstat
from tqdm.notebook import tqdm

tqdm.pandas()

In [6]:
TARGET_LIST = [
    'cohesion',
    'syntax',
    'vocabulary',
    'phraseology',
    'grammar',
    'conventions',
]
TARGET = TARGET_LIST[0]

SEED = 42
NUM_FOLDS = 5
FOLD_LIST = [0, 1, 2, 3, 4]

ENVIRONMENT = os.getenv('__KGLTBX_ENVIRONMENT', 'laptop')
_env = Environment(ENVIRONMENT)

ROOT_DIR = _env.param(
    kaggle=Path('/kaggle'),
    colab=Path('/content/drive/MyDrive'),
    laptop=Path('/kaggle'))
DATA_DIR = _env.param(
    kaggle=ROOT_DIR / 'input',
    colab=ROOT_DIR / 'data',
    laptop=ROOT_DIR / 'data')
FP_ELL_DATASET_DIR = _env.param(
    kaggle=DATA_DIR / 'feedback-prize-english-language-learning',
    colab=DATA_DIR / 'fp-ell',
    laptop=DATA_DIR / 'fp-ell')
MODEL_DIR = _env.param(
    kaggle=ROOT_DIR / 'working',
    colab=ROOT_DIR / 'models/fp-ell',
    laptop=ROOT_DIR / 'models')
OOF_DIR = _env.param(
    kaggle=ROOT_DIR / 'working',
    colab=ROOT_DIR / 'oof/fp-ell',
    laptop=ROOT_DIR / 'oof')

TARGET_TO_LVL1_OOF_PATH_DICT = {
    'cohesion': OOF_DIR / 'cohesion-v1-layer_norm-ep_4-valfreq_0p25-pooler_att-full.csv',
    'syntax': OOF_DIR / 'syntax-v1-layer_norm-ep_3-valfreq_0p25-full.csv',
    'vocabulary': OOF_DIR / 'vocabulary-v1-layer_norm-ep_3-valfreq_0p25-std_init.csv',
    'phraseology': OOF_DIR / 'phraseology-v1-layer_norm-ep_3-valfreq_0p25-std_init-full.csv',
    'grammar': OOF_DIR / 'grammar-v1-lnorm-ep_4-valfreq_0p25-sqzr_cat_9_to_12-full.csv',
    'conventions': OOF_DIR / 'conventions-v1-layer_norm-ep_3-valfreq_0p25-full.csv',
}

#### Feature generation

In [7]:
_LVL1_SCORE_FEATURE_LIST = [f'{target}_lvl1_score' for target in TARGET_LIST]
_FEATURE_GENERATOR_LIST = [
    # Score-based
    *features.L1Distance.pairwise_from_feature_list(_LVL1_SCORE_FEATURE_LIST),
    features.Mean(name='lvl1_mean', feature_list=_LVL1_SCORE_FEATURE_LIST),
    features.Stdev(name='lvl1_std', feature_list=_LVL1_SCORE_FEATURE_LIST),
    # Custom simple
    text_features.SubstrCount(name='num_commas', substr=','),
    text_features.SubstrCount(name='num_dots', substr='.'),
    text_features.SubstrCount(name='num_colons', substr=':'),
    text_features.SubstrCount(name='num_semicolons', substr=';'),
    text_features.SubstrCount(name='num_ellipsis', substr='...'),
    text_features.SubstrCount(name='num_newlines', substr='\n'),
    text_features.SubstrCount(name='num_spaces', substr=' '),
    # TextStat simple
    text_features.Func(name='syllable_count', func=textstat.syllable_count),
    text_features.Func(name='lexicon_count', func=functools.partial(textstat.lexicon_count, removepunct=True)),
    text_features.Func(name='char_count', func=functools.partial(textstat.char_count, ignore_spaces=True)),
    text_features.Func(name='letter_count', func=functools.partial(textstat.letter_count, ignore_spaces=True)),
    text_features.Func(name='polysyllabcount', func=functools.partial(textstat.polysyllabcount)),
    text_features.Func(name='monosyllabcount', func=functools.partial(textstat.monosyllabcount)),
    # Custom complex
    features.Div(name='ratio_commas', lhs_feature='num_commas', rhs_feature='char_count'),
    features.Div(name='ratio_dots', lhs_feature='num_dots', rhs_feature='char_count'),
    features.Div(name='ratio_colons', lhs_feature='num_colons', rhs_feature='char_count'),
    features.Div(name='ratio_semicolons', lhs_feature='num_semicolons', rhs_feature='char_count'),
    features.Div(name='ratio_ellipsis', lhs_feature='num_ellipsis', rhs_feature='char_count'),
    features.Div(name='ratio_newlines', lhs_feature='num_newlines', rhs_feature='char_count'),
    features.Div(name='ratio_spaces', lhs_feature='num_spaces', rhs_feature='char_count'),
    # TextStat complex
    text_features.Func(name='flesch_reading_ease', func=textstat.flesch_reading_ease),
    text_features.Func(name='flesch_kincaid_grade', func=textstat.flesch_kincaid_grade),
    text_features.Func(name='gunning_fog', func=textstat.gunning_fog),
    text_features.Func(name='smog_index', func=textstat.smog_index),
    text_features.Func(name='automated_readability_index', func=textstat.automated_readability_index),
    text_features.Func(name='coleman_liau_index', func=textstat.coleman_liau_index),
    text_features.Func(name='linsear_write_formula', func=textstat.linsear_write_formula),
    text_features.Func(name='dale_chall_readability_score', func=textstat.dale_chall_readability_score),
    text_features.Func(name='text_standard', func=functools.partial(textstat.text_standard, float_output=True)),  # type: ignore
    text_features.Func(name='spache_readability', func=textstat.spache_readability),
    text_features.Func(name='mcalpine_eflaw', func=textstat.mcalpine_eflaw),
    text_features.Func(name='reading_time', func=functools.partial(textstat.reading_time, ms_per_char=14.69)),
]


def build_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    text_srs = df['full_text']

    feature_arr_dict = text_features.generate_text_features(
        generator_list=_FEATURE_GENERATOR_LIST,
        text_seq=text_srs.tolist(),
        progress_bar=NotebookProgressBar(),
        init_feature_array_dict={
            f'{target}_lvl1_score': df[f'{target}_lvl1_score'].values
            for target in TARGET_LIST
        })  # type: ignore
    for feature_name, feature_arr in feature_arr_dict.items():
        df[feature_name] = feature_arr

    return df

In [8]:
def _read_data(
        dataset_dir_path: Path,
        target_list: t.List[str],
        target_to_lvl1_oof_path_dict: t.Dict[str, Path],
        num_folds: int,
        seed: int) -> pd.DataFrame:
    all_df = pd.read_csv(dataset_dir_path / 'train.csv')
    target_arr = contiguous_to_categorical(all_df[target_list].values)

    mskf = MultilabelStratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)
    for fold_, (_, v_) in enumerate(mskf.split(X=all_df, y=target_arr)):
        all_df.loc[v_, 'fold'] = fold_

    for target in target_list:
        all_df = all_df.merge(
            pd.read_csv(target_to_lvl1_oof_path_dict[target])
                .rename({
                    'id': 'text_id',
                    f'{target}_score': f'{target}_lvl1_score',
                }, axis=1),
            left_on='text_id',
            right_on='text_id')
        all_df[f'{target}_is_roundable'] = (all_df[f'{target}_lvl1_score'] - all_df[target]).abs() <= 0.25

    all_df = build_features(all_df)

    return all_df

all_df = _read_data(
    dataset_dir_path=FP_ELL_DATASET_DIR,
    target_list=TARGET_LIST,
    target_to_lvl1_oof_path_dict=TARGET_TO_LVL1_OOF_PATH_DICT,
    num_folds=NUM_FOLDS,
    seed=SEED)

analyze_val_strategy(all_df, target_list=TARGET_LIST, num_folds=NUM_FOLDS)

cohesion_lvl1_score_cohesion_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

cohesion_lvl1_score_syntax_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

cohesion_lvl1_score_vocabulary_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

cohesion_lvl1_score_phraseology_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

cohesion_lvl1_score_grammar_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

cohesion_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

syntax_lvl1_score_syntax_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

syntax_lvl1_score_vocabulary_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

syntax_lvl1_score_phraseology_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

syntax_lvl1_score_grammar_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

syntax_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

vocabulary_lvl1_score_vocabulary_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

vocabulary_lvl1_score_phraseology_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

vocabulary_lvl1_score_grammar_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

vocabulary_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

phraseology_lvl1_score_phraseology_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

phraseology_lvl1_score_grammar_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

phraseology_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

grammar_lvl1_score_grammar_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

grammar_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

conventions_lvl1_score_conventions_lvl1_score_l1:   0%|          | 0/3911 [00:00<?, ?it/s]

lvl1_mean:   0%|          | 0/3911 [00:00<?, ?it/s]

lvl1_std:   0%|          | 0/3911 [00:00<?, ?it/s]

num_commas:   0%|          | 0/3911 [00:00<?, ?it/s]

num_dots:   0%|          | 0/3911 [00:00<?, ?it/s]

num_colons:   0%|          | 0/3911 [00:00<?, ?it/s]

num_semicolons:   0%|          | 0/3911 [00:00<?, ?it/s]

num_ellipsis:   0%|          | 0/3911 [00:00<?, ?it/s]

num_newlines:   0%|          | 0/3911 [00:00<?, ?it/s]

num_spaces:   0%|          | 0/3911 [00:00<?, ?it/s]

syllable_count:   0%|          | 0/3911 [00:00<?, ?it/s]

lexicon_count:   0%|          | 0/3911 [00:00<?, ?it/s]

char_count:   0%|          | 0/3911 [00:00<?, ?it/s]

letter_count:   0%|          | 0/3911 [00:00<?, ?it/s]

polysyllabcount:   0%|          | 0/3911 [00:00<?, ?it/s]

monosyllabcount:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_commas:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_dots:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_colons:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_semicolons:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_ellipsis:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_newlines:   0%|          | 0/3911 [00:00<?, ?it/s]

ratio_spaces:   0%|          | 0/3911 [00:00<?, ?it/s]

flesch_reading_ease:   0%|          | 0/3911 [00:00<?, ?it/s]

flesch_kincaid_grade:   0%|          | 0/3911 [00:00<?, ?it/s]

gunning_fog:   0%|          | 0/3911 [00:00<?, ?it/s]

smog_index:   0%|          | 0/3911 [00:00<?, ?it/s]

automated_readability_index:   0%|          | 0/3911 [00:00<?, ?it/s]

coleman_liau_index:   0%|          | 0/3911 [00:00<?, ?it/s]

linsear_write_formula:   0%|          | 0/3911 [00:00<?, ?it/s]

dale_chall_readability_score:   0%|          | 0/3911 [00:00<?, ?it/s]

text_standard:   0%|          | 0/3911 [00:00<?, ?it/s]

spache_readability:   0%|          | 0/3911 [00:00<?, ?it/s]

mcalpine_eflaw:   0%|          | 0/3911 [00:00<?, ?it/s]

reading_time:   0%|          | 0/3911 [00:00<?, ?it/s]

Unnamed: 0,fold,num_samples,cohesion_mean,syntax_mean,vocabulary_mean,phraseology_mean,grammar_mean,conventions_mean
0,0,782,3.077366,2.971867,3.205243,3.065857,2.959719,3.035166
1,1,783,3.12516,3.007024,3.226054,3.111111,3.015964,3.079183
2,2,782,3.140665,3.068414,3.258312,3.138747,3.069693,3.116368
3,3,782,3.131074,3.048593,3.245524,3.125959,3.042839,3.074169
4,4,782,3.161125,3.045396,3.243606,3.142583,3.076087,3.100384


In [9]:
all_df.head(3)

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,fold,cohesion_lvl1_score,cohesion_is_roundable,syntax_lvl1_score,syntax_is_roundable,vocabulary_lvl1_score,vocabulary_is_roundable,phraseology_lvl1_score,phraseology_is_roundable,grammar_lvl1_score,grammar_is_roundable,conventions_lvl1_score,conventions_is_roundable,cohesion_lvl1_score_cohesion_lvl1_score_l1,cohesion_lvl1_score_syntax_lvl1_score_l1,cohesion_lvl1_score_vocabulary_lvl1_score_l1,cohesion_lvl1_score_phraseology_lvl1_score_l1,cohesion_lvl1_score_grammar_lvl1_score_l1,cohesion_lvl1_score_conventions_lvl1_score_l1,syntax_lvl1_score_syntax_lvl1_score_l1,syntax_lvl1_score_vocabulary_lvl1_score_l1,syntax_lvl1_score_phraseology_lvl1_score_l1,syntax_lvl1_score_grammar_lvl1_score_l1,syntax_lvl1_score_conventions_lvl1_score_l1,vocabulary_lvl1_score_vocabulary_lvl1_score_l1,vocabulary_lvl1_score_phraseology_lvl1_score_l1,vocabulary_lvl1_score_grammar_lvl1_score_l1,vocabulary_lvl1_score_conventions_lvl1_score_l1,phraseology_lvl1_score_phraseology_lvl1_score_l1,phraseology_lvl1_score_grammar_lvl1_score_l1,phraseology_lvl1_score_conventions_lvl1_score_l1,grammar_lvl1_score_grammar_lvl1_score_l1,grammar_lvl1_score_conventions_lvl1_score_l1,conventions_lvl1_score_conventions_lvl1_score_l1,lvl1_mean,lvl1_std,num_commas,num_dots,num_colons,num_semicolons,num_ellipsis,num_newlines,num_spaces,syllable_count,lexicon_count,char_count,letter_count,polysyllabcount,monosyllabcount,ratio_commas,ratio_dots,ratio_colons,ratio_semicolons,ratio_ellipsis,ratio_newlines,ratio_spaces,flesch_reading_ease,flesch_kincaid_grade,gunning_fog,smog_index,automated_readability_index,coleman_liau_index,linsear_write_formula,dale_chall_readability_score,text_standard,spache_readability,mcalpine_eflaw,reading_time
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0,1.0,2.85574,False,2.950954,False,3.219144,True,3.222088,True,3.093439,False,2.753647,True,0.0,0.095214,0.363405,0.366349,0.237699,0.102093,0.0,0.26819,0.271134,0.142485,0.197307,0.0,0.002944,0.125705,0.465497,0.0,0.128649,0.468441,0.0,0.339792,0.0,3.015835,0.177238,1.0,18.0,0.0,0.0,0.0,6.0,271.0,321,261,1110,1089,11,212,0.000901,0.016216,0.0,0.0,0.0,0.005405,0.244144,90.6,4.2,6.57,7.6,5.8,6.31,8.0,5.99,6.0,3.34,20.2,16.31
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5,0.0,2.784366,False,2.648692,True,2.870578,True,2.740792,False,2.370473,False,2.674716,True,0.0,0.135674,0.086212,0.043575,0.413893,0.10965,0.0,0.221886,0.0921,0.278219,0.026025,0.0,0.129786,0.500105,0.195862,0.0,0.370318,0.066075,0.0,0.304243,0.0,2.681603,0.156901,4.0,14.0,0.0,0.0,0.0,10.0,527.0,666,533,2098,2077,33,433,0.001907,0.006673,0.0,0.0,0.0,0.004766,0.251192,66.64,13.4,15.47,11.9,16.2,5.93,11.2,2.45,12.0,6.4,58.5,30.82
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5,4.0,2.891909,True,3.007077,False,3.099227,True,2.975205,True,2.962219,True,3.016932,False,0.0,0.115168,0.207318,0.083296,0.07031,0.125023,0.0,0.09215,0.031872,0.044858,0.009855,0.0,0.124022,0.137008,0.082295,0.0,0.012986,0.041727,0.0,0.054713,0.0,2.992095,0.062628,7.0,19.0,0.0,0.0,0.0,2.0,318.0,411,320,1343,1307,23,258,0.005212,0.014147,0.0,0.0,0.0,0.001489,0.236783,79.8,6.3,7.22,9.4,6.8,6.09,6.625,5.95,7.0,3.64,23.6,19.73


#### Grid Search

In [10]:
def grid_search(
        train_model_fn: t.Callable[[t.Dict[str, t.Any], int], t.Tuple[float, PredDict]],
        param_plan: t.Dict[str, t.List[t.Any]],
        fold_list: t.List[int],
        print_best_params: bool = False) -> t.Tuple[t.Dict[str, t.Any], t.List[float], PredDict]:
    param_name_list = list(param_plan.keys())
    param_comb_list = list(itertools.product(*[param_plan[param_name] for param_name in param_name_list]))
    best_param_dict, best_score_list, best_pred_dict = None, None, None
    it = tqdm(param_comb_list)
    for param_value_tuple in it:
        param_value_list = list(param_value_tuple)
        param_dict = dict(zip(param_name_list, param_value_list))
        param_str = ', '.join([f'{k} = {v}' for k, v in param_dict.items()])
        if best_score_list is not None:
            it.set_description(f'Best score: {statistics.mean(best_score_list):.4f}. Params: {param_str}')
        else:
            it.set_description(f'Params: {param_str}')
        iter_score_list, iter_pred_dict = train_kfold_model(
            train_model_fn=functools.partial(train_model_fn, param_dict),
            fold_list=fold_list)
        if best_score_list is None or statistics.mean(best_score_list) > statistics.mean(iter_score_list):
            best_param_dict = param_dict
            best_score_list = iter_score_list
            best_pred_dict = iter_pred_dict
            it.set_description(f'Best score: {statistics.mean(best_score_list):.4f}. Params: {param_str}')
            if print_best_params:
                print(f'Best params: {best_param_dict}')
    assert best_param_dict is not None
    assert best_score_list is not None
    assert best_pred_dict is not None
    return best_param_dict, best_score_list, best_pred_dict


#### Catboost

In [14]:
class _XY(t.NamedTuple):
    x: np.ndarray
    y: np.ndarray
    feature_name_list: t.List[str]


class _CatboostTrainer:
    target: str

    def __init__(
            self,
            model_params: t.Optional[t.Dict[str, t.Any]] = None,
            plot: bool = True,
            model_path_template: t.Optional[Path] = None,
            print_feature_importance: bool = False):
        self._model_params = model_params if model_params is not None else {}
        self._plot = plot
        self._model_path_template = model_path_template
        self._print_feature_importance = print_feature_importance

    def _get_model(self) -> CatBoostClassifier:
        return CatBoostClassifier(
            task_type='GPU',
            random_seed=SEED,
            eval_metric='F1',
            **self._model_params)

    def _is_feature_included(self, feature: str) -> bool:
        return feature not in {
            'text_id',
            'full_text',
            'fold',
            *TARGET_LIST,
            *[f'{target}_is_roundable' for target in TARGET_LIST],
        }

    def _convert_df_to_xy(self, df: pd.DataFrame, target: str) -> _XY:
        feature_name_list = [
            col for col in t.cast(t.List[str], df.columns)
            if self._is_feature_included(col)
        ]
        x = df[feature_name_list].values
        y = t.cast(np.ndarray, df[f'{target}_is_roundable'].values)
        return _XY(x=x, y=y, feature_name_list=feature_name_list)

    def __call__(self, fold: int) -> t.Tuple[float, PredDict]:
        train_df, valid_df = all_df[all_df['fold'] != fold], all_df[all_df['fold'] == fold]

        booster = self._get_model()
        train_xy = self._convert_df_to_xy(train_df, self.target)
        valid_xy = self._convert_df_to_xy(valid_df, self.target)
        booster.fit(
            train_xy.x,
            train_xy.y,
            eval_set=(valid_xy.x, valid_xy.y),
            plot=self._plot,
            silent=True)
        valid_y_pred = booster.predict(valid_xy.x)

        if self._model_path_template is not None:
            booster.save_model(str(format_path(self._model_path_template, target=self.target, fold=fold)))  # type: ignore
        score = f1_score(y_true=valid_xy.y, y_pred=valid_y_pred)

        if self._print_feature_importance:
            for feature, imp_score in sorted(
                    zip(train_xy.feature_name_list, booster.feature_importances_),
                    key=lambda x: x[1],
                    reverse=True):
                print(f'{feature.ljust(50)} = {imp_score:.4f}')

        return float(score), PredDict(zip(valid_df['text_id'].tolist(), [ensure_list(x) for x in valid_y_pred.tolist()]))


class CohesionCatboostTrainer(_CatboostTrainer):
    target = 'cohesion'

    def _is_feature_included(self, feature: str) -> bool:
        return super()._is_feature_included(feature) and not feature.endswith('_l1')


trainer = CohesionCatboostTrainer()
score_list, oof_pred_dict= train_kfold_model(
        train_model_fn=trainer,
        fold_list=FOLD_LIST)
oof_pred_dict.save_to_csv(
    OOF_DIR / f'rounding-catboost-{trainer.target}-cv2.csv',
    score_col_name_list=[f'{trainer.target}_score'])
print(f'Mean score: {statistics.mean(score_list):.4f}')
build_fold_result_df(fold_list=FOLD_LIST, score_list=score_list)
# best_param_dict, score_list, _ = grid_search(
#     train_model_fn=lambda params, fold: _VocabularyCatboostTrainer(params, plot=False)(fold),
#     param_plan={
#         'learning_rate': [None, 0.06, 0.065, 0.07, 0.075],
#         'iterations': [None, 500, 1500],
#     },
#     fold_list=FOLD_LIST,
#     print_best_params=True)
# build_fold_result_df(fold_list=FOLD_LIST, score_list=score_list)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

TypeError: Labels in y_true and y_pred should be of the same type. Got y_true=[False  True] and y_pred=['False' 'True']. Make sure that the predictions provided by the classifier coincides with the true labels.