# modAL + pyhard- Comparando estratégias

- modAL

    - Amostra por incerteza
    - Amostragem aleatória
    - Consulta por comitê
    - Aprendizado passivo
    - Redução do erro esperado

- Pyhard
    - H
    - U
    - H+U
    - LSC
    - N2
    - F3

In [None]:
%run -i set_environment

## Bibliotecas

In [None]:
%run -i importing_libraries

## Classificadores

### Algoritmos

In [None]:
%run -i classifiers

### Conjunto de dados

In [None]:
%run -i importing_datasets

## Estatratégias

### Amostra por incerteza

In [None]:
def uncertain_sampling(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost):
    
    from modAL.uncertainty import classifier_uncertainty
    
    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()
    
    X_train, X_test, y_train, y_test = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
    
    sample_size = sample_size + len(X_train)

    learner = ActiveLearner (
        estimator= which_classifier(classifier), #cls,
        query_strategy=uncertainty_sampling,
        X_training = X_train, y_training = y_train # AL AJUSTA O CLASSIFIER 
    )
    
    accuracy_history.append(learner.score(X_test, y_test))
    f1_history.append(compute_f1(learner, X_test, y_test, "weighted"))
    auc_history.append(compute_auc(learner, X_test, y_test, "weighted","ovo"))

    total_of_samples = 1
    while (total_of_samples != cost):
        
        #X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, train_size=0.03)
        
        idx = np.random.choice(range(len(idx_data[idx_bag][TRAIN])), size=init_size, replace=False)
        X_train, y_train = X_raw[idx_data[idx_bag][TRAIN][idx]], y_raw[idx_data[idx_bag][TRAIN][idx]]
        
        if classifier_uncertainty(learner, X_train[0].reshape(1,-1)) > 0.2:
            #print("IF", learner.score(X_test, y_test))
            sample_size = sample_size + len(X_train)
            learner.teach(X_train, y_train)
        accuracy_history.append(learner.score(X_test, y_test))
        f1_history.append(compute_f1(learner, X_test, y_test, "weighted"))
        auc_history.append(compute_auc(learner, X_test, y_test, "weighted", "ovo"))
        total_of_samples = total_of_samples + 1
    
    end = timer()
    time_elapsed = end - start
    
    return { "accuracy_history": accuracy_history,
             "f1_history": f1_history,
             "auc_history": auc_history,
             "package": "modAL",
             "time_elapsed": time_elapsed,
             "classifier": classifier,
             "sample_size": sample_size / len(X_raw), # RETORNAR TODAS AS AMOSTRAS DE CADA PERFORMANCE OU SÓ DO ULTIMO
             "Strategy": "Uncertain Sampling"}

### Amostragem aleatória

In [None]:
def random_sampling(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost):
        
    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()

    for i in range(1, cost+1):

        X_train, X_test, y_train, y_test = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
        sample_size = sample_size + len(X_train)
        
        cls = which_classifier(classifier)
        cls.fit(X_train, y_train)

        accuracy_history.append(cls.score(X_test,y_test))
        f1_history.append(compute_f1(cls, X_test, y_test, "weighted"))
        auc_history.append(compute_auc(cls, X_test, y_test, "weighted", "ovo"))

        
    end = timer()
    time_elapsed = end - start

    return { "accuracy_history": accuracy_history,
         "f1_history": f1_history,
         "auc_history": auc_history,
         "package": "modAL",
         "time_elapsed": time_elapsed,
         "classifier": classifier,
         "sample_size": sample_size / len(X_raw),
         "Strategy": "Random Sampling"}

### Consulta por comitê

In [None]:
def query_by_committee(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost):

    from modAL.models import ActiveLearner, Committee
    from modAL.disagreement import vote_entropy_sampling

    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()

    learner_list = []

    for j in range(1, cost+1): # Loop para criação do comitê

        X_train, X_pool, y_train, y_pool = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
        sample_size = sample_size + len(X_train)

        # initializing learner
        learner = ActiveLearner(
            estimator= which_classifier(classifier),
            X_training = X_train, y_training = y_train 
        )
        learner_list.append(learner)

    # assembling the committee
    committee = Committee(
        learner_list=learner_list,
        query_strategy=vote_entropy_sampling)
    
    # COLOCAR OU NÃO O PRIMEIRO SCORE?
    # accuracy_history.append(committee.score(X_pool, y_pool))
    # f1_history.append(compute_f1(committee, X_pool, y_pool, "weighted"))
    # auc_history.append(compute_auc(committee, X_pool, y_pool, "weighted", "ovo"))
    
    
    # query by committee
    for idx in range(cost):
        # print("\t Size of X_pool:", len(X_pool))
        query_idx, query_instance = committee.query(X_pool, n_instances = init_size+1)
        sample_size = sample_size + len(query_idx)
        
        committee.teach(
            X = X_pool[query_idx],
            y = y_pool[query_idx]
        )

        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx)

        accuracy_history.append(committee.score(X_pool, y_pool))
        f1_history.append(compute_f1(committee, X_pool, y_pool, "weighted"))
        auc_history.append(compute_auc(committee, X_pool, y_pool, "weighted", "ovo"))

        
    end = timer()
    time_elapsed = end - start

    return { "accuracy_history": accuracy_history,
         "f1_history": f1_history,
         "auc_history": auc_history,
         "package": "modAL",
         "time_elapsed": time_elapsed,
         "classifier": classifier,
         "sample_size": sample_size / len(X_raw),
         "Strategy": "Query by Committee"}

### Expected Error Reduction

In [None]:
def exp_error_reduction(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost):

    from modAL.expected_error import expected_error_reduction
    
    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()
    
    # parte randomica inicial da estratégia
    #initial_idx = np.random.choice(range(len(idx_data[idx_bag][TRAIN])), size=init_size, replace=False)
    #X_train, y_train = X_raw[idx_data[idx_bag][TRAIN][initial_idx]], y_raw[idx_data[idx_bag][TRAIN][initial_idx]]
    #X_pool, y_pool = X_raw[idx_data[idx_bag][TEST]], y_raw[idx_data[idx_bag][TEST]]
    
    X_train, X_pool, y_train, y_pool = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
    sample_size = sample_size + len(X_train)

    X_pool, y_pool = X_raw[idx_data[idx_bag][TEST]], y_raw[idx_data[idx_bag][TEST]]
    
    learner = ActiveLearner (
        estimator = which_classifier(classifier),
        X_training = X_train, y_training = y_train
    )
    accuracy_history.append(learner.score(X_pool, y_pool))
    f1_history.append(compute_f1(learner, X_pool, y_pool, "weighted"))
    auc_history.append(compute_auc(learner, X_pool, y_pool, "weighted", "ovo"))

    total_of_samples = 1
    while (total_of_samples != cost):
        # print("\t Size of X_pool:", len(X_pool))
        exp_error_idx = expected_error_reduction(learner, X_pool, 'binary', n_instances=init_size)

        learner.teach(X_pool[exp_error_idx], y_pool[exp_error_idx])
        sample_size = sample_size + init_size
    
        # X_pool = np.delete(X_pool, exp_error_idx, axis=0)
        # y_pool = np.delete(y_pool, exp_error_idx)
        
        accuracy_history.append(learner.score(X_pool, y_pool))
        f1_history.append(compute_f1(learner, X_pool, y_pool, "weighted"))
        auc_history.append(compute_auc(learner, X_pool, y_pool, "weighted", "ovo"))
        
        total_of_samples = total_of_samples + 1
    
    end = timer()
    time_elapsed = end - start


    return { "accuracy_history": accuracy_history,
         "f1_history": f1_history,
         "auc_history": auc_history,
         "package": "modAL",
         "time_elapsed": time_elapsed,
         "classifier": classifier,
         "sample_size": sample_size / len(X_raw),
         "Strategy": "Expected Error Reduction"}

### Expected Model Change

In [None]:
def exp_model_change(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost):

    from modAL.expected_error import expected_error_reduction
    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()
    
    X_train, X_pool, y_train, y_pool = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
    sample_size = sample_size + len(X_train)

    learner = ActiveLearner (
        estimator = which_classifier(classifier),
        X_training = X_train, y_training = y_train
    )
    
    accuracy_history.append(learner.score(X_pool, y_pool))
    f1_history.append(compute_f1(learner, X_pool, y_pool, "weighted"))
    auc_history.append(compute_auc(learner, X_pool, y_pool, "weighted", "ovo"))

    total_of_samples = 1
    while (total_of_samples != cost):
        # print("\t Size of X_pool:", len(X_pool))
        exp_error_idx = np.random.choice(range(len(X_pool)), size=init_size, replace=False)
        aux = deepcopy(learner)

        aux.teach(X_pool[exp_error_idx], y_pool[exp_error_idx])
        score_aux = aux.score(X_pool, y_pool)
        score_learner = learner.score(X_pool, y_pool)

        if score_aux > score_learner:
            learner = deepcopy(aux)
            sample_size = sample_size + init_size
        
        X_pool = np.delete(X_pool, exp_error_idx, axis=0)
        y_pool = np.delete(y_pool, exp_error_idx, axis=0)
        
        accuracy_history.append(learner.score(X_pool, y_pool))
        f1_history.append(compute_f1(learner, X_pool, y_pool, "weighted"))
        auc_history.append(compute_auc(learner, X_pool, y_pool, "weighted", "ovo"))

        total_of_samples = total_of_samples + 1
    
    end = timer()
    time_elapsed = end - start

    return { "accuracy_history": accuracy_history,
         "f1_history": f1_history,
         "auc_history": auc_history,
         "package": "modAL",
         "time_elapsed": time_elapsed,
         "classifier": classifier,
         "sample_size": sample_size / len(X_raw),
         "Strategy": "Expected Model Change"}

## Pyhard Strategies

In [None]:
def config(section, filename='strategies.config'):
    from configparser import ConfigParser

    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read("../" + filename)
    # get section, default to postgresql
    strategy = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            strategy[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    # transformando texto em bool
    strategy['ascending'] = list(map(lambda x: bool(0 if x == "False" else 1), strategy['ascending'].split(',')))
    strategy['sortby'] = strategy['sortby'].split(',')
    
    print(strategy)
    
    return strategy

In [None]:
def pyhard_strategies(X_raw, y_raw, idx_data, idx_bag, classifier, init_size, cost, strategy):
    
    from modAL.uncertainty import classifier_uncertainty
    
    sample_size = 0 #contador de amostras utilizadas pela estratégia
    accuracy_history = []
    f1_history = []
    auc_history = []
    start = timer()
    
    strategy = config(strategy)
    
    # parte randomica inicial da estratégia
    
    X_train, X_test, y_train, y_test = train_test_split(X_raw[idx_data[idx_bag][TRAIN]], y_raw[idx_data[idx_bag][TRAIN]], train_size= len(np.unique(y_raw)) + init_size, stratify = y_raw[idx_data[idx_bag][TRAIN]])
    
    sample_size = sample_size + len(X_train)

    learner = ActiveLearner (
        estimator= which_classifier(classifier), #cls,
        query_strategy=uncertainty_sampling,
        X_training = X_train, y_training = y_train # AL AJUSTA O CLASSIFIER 
    )

    accuracy_history.append(learner.score(X_test, y_test))
    f1_history.append(compute_f1(learner, X_test, y_test, "weighted"))
    auc_history.append(compute_auc(learner, X_test, y_test, "weighted", "ovo"))

    total_of_samples = 1

    #X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, train_size=0.03)

    idx = np.random.choice(range(len(idx_data[idx_bag][TRAIN])), size=init_size, replace=False)
    X_train, y_train = X_raw[idx_data[idx_bag][TRAIN][idx]], y_raw[idx_data[idx_bag][TRAIN][idx]]

    X_rawAndY_raw = np.column_stack([X_raw[idx_data[idx_bag][TRAIN]],y_raw[idx_data[idx_bag][TRAIN]]])
    np.savetxt("data.csv", X_rawAndY_raw, fmt='%i', delimiter=",")
    
    which_pyhard_measure(strategy['measure'])

    !pyhard --no-isa

    df = pd.read_csv('metadata.csv')

    idx = list(df.sort_values(by=strategy['sortby'], ascending=strategy['ascending'])['instances'][:cost])

    X_train = X_raw[idx_data[idx_bag][TRAIN][idx]]
    y_train = y_raw[idx_data[idx_bag][TRAIN][idx]]

    sample_size = cost
    learner.teach(X_train, y_train)
    
    accuracy_history.append(learner.score(X_test, y_test))
    f1_history.append(compute_f1(learner, X_test, y_test, "weighted"))
    auc_history.append(compute_auc(learner, X_test, y_test, "weighted", "ovo"))
    
    end = timer()
    time_elapsed = end - start

    return { "accuracy_history": accuracy_history,
         "f1_history": f1_history,
         "auc_history": auc_history,
         "package": "Pyhard",
         "time_elapsed": time_elapsed,
         "classifier": classifier,
         "sample_size": sample_size / len(X_raw),
         "Strategy": strategy['name']}

## Setup

In [None]:
def compute_f1(learner, X, y_true, average = None):
    y_pred = learner.predict(X)
    return metrics.f1_score(y_true, y_pred, average = average)

In [None]:
def compute_auc(learner, X, y_true, average = None, multi_class = "ovo"):
    y_pred = learner.predict_proba(X)
    return metrics.roc_auc_score(y_true, y_pred, average = average, multi_class = multi_class)

In [None]:
def which_pyhard_measure(measure='LSC'):
    import yaml
    with open(r'config-template.yaml') as file:
        configs_list = yaml.load(file, Loader=yaml.FullLoader)

        if measure == 'LSC':
            configs_list['measures_list'] = ['LSC']
        elif measure == 'Harmfulness':
            configs_list['measures_list'] = ['Harmfulness']
        elif measure == 'Usefulness':
            configs_list['measures_list'] = ['Usefulness']
        elif measure == 'U+H':
            configs_list['measures_list'] = ['Harmfulness','Usefulness']
        elif measure == 'N2':
            configs_list['measures_list'] = ['N2']
        elif measure == 'F3':
            configs_list['measures_list'] = ['F3']

    with open(r'config.yaml', 'w') as file:
        yaml.dump(configs_list, file)

In [None]:
def which_dataset(dataset = "iris", n_splits = 5):
    
    # Futuramente essa etapa será ajustada para receber qualquer dataset (ou lista com datasets)
    if (dataset == "iris"):
        data = load_iris()
        X_raw = data['data']
        y_raw = data['target']
    
    if (dataset == "wine"):
        data = load_wine()
        X_raw = data['data']
        y_raw = data['target']
        
    if (dataset == "digits"):
        data = load_digits()
        X_raw = data['data']
        y_raw = data['target']
        
    # cross validation bags
    data_cv = StratifiedShuffleSplit(n_splits= n_splits, train_size=0.7, random_state=0) #n_splits
    
    # extraindo ids do data_cv
    idx_data = []
    for train_index, test_index in data_cv.split(X_raw):
            idx_data.append([train_index, test_index])

    return X_raw, y_raw, idx_data

In [None]:
def which_oml_dataset(dataset_id, n_splits = 5):
    data = openml.datasets.get_dataset(dataset_id)
    
    X_raw, y_raw, categorical_indicator, attribute_names = data.get_data(
    dataset_format="array", target=data.default_target_attribute)
    
    le = preprocessing.LabelEncoder()
    le.fit(y_raw)
    y_raw = le.transform(y_raw)
    
    X_raw = np.nan_to_num(X_raw)
    
    data_cv = StratifiedShuffleSplit(n_splits= n_splits, train_size=0.7, random_state=0) #n_splits
    
    idx_data = []
    for train_index, test_index in data_cv.split(X_raw):
            idx_data.append([train_index, test_index])

    return X_raw, y_raw, idx_data, data.name

In [None]:
def which_arff_dataset(dataset, n_splits = 5):
   
    from sklearn.preprocessing import OrdinalEncoder
    
    data = arff.loadarff('datasets/luis/' + dataset)
    data = pd.DataFrame(data[0])

    X_raw = data[data.columns[:-1]].to_numpy()
    y_raw = data[data.columns[-1]].to_numpy()
    
    lex = preprocessing.OrdinalEncoder()
    lex.fit(X_raw)
    X_raw = lex.transform(X_raw)
        
    ley = preprocessing.LabelEncoder()
    ley.fit(y_raw)
    y_raw = ley.transform(y_raw)
    
    # cross validation bags
    data_cv = StratifiedShuffleSplit(n_splits= n_splits, train_size=0.7, random_state=0) #n_splits
    data_cv.get_n_splits(X_raw,y_raw)
    
    # extraindo ids do data_cv
    idx_data = []
    for train_index, test_index in data_cv.split(X_raw, y_raw):
            idx_data.append([train_index, test_index])

    return X_raw, y_raw, idx_data, dataset

In [None]:
def which_classifier(classifier = '5NN'):
    
    if (classifier == '5NN'):
        return KNeighborsClassifier(5)
    elif (classifier == 'C4.5'):
        return tree.DecisionTreeClassifier()
    elif (classifier == 'NB'):
        return GaussianNB()
    elif (classifier == 'SVM'):
        return SVC(probability=True, gamma='auto')
    elif (classifier == 'RF'):
        return RandomForestClassifier()

In [None]:
def fetch_datasets(dataset):
    
    data = arff.loadarff('./datasets/luis/' + dataset)
    metadata = data[1]
    data = pd.DataFrame(data[0])
    
    instances = len(data)
    classes = len(data.iloc[:,-1].value_counts())
    attributes = len(data.columns)- 1
    nominal_attributes = str(metadata).count("nominal")
    
    proportion = data.iloc[:,-1].value_counts()
    proportion = proportion.map(lambda x: round(x/instances*100,2))

    majority = max(proportion)
    minority = min(proportion)

    
    return {
        "name": dataset[:-5],
        "instances": instances,
        "classes": classes,
        "attributes": attributes,
        "nominal attributes": nominal_attributes,
        "majority": majority,
        "minority": minority
    }

In [None]:
datasets = os.listdir('./datasets/luis')
classifiers = ['5NN', 'C4.5', 'NB','RF']
total_performance_history = []

In [None]:
datasets

In [None]:
metadata = []

for ds in datasets:
    metadata.append(fetch_datasets(ds))

metadata = pd.DataFrame.from_dict(metadata)
metadata

In [None]:
pyhard_strategies_names = ['H','U','H+U','LSC','N2','F3']

for ds in datasets:
    for classifier in classifiers:
        X_raw, y_raw, idx_data, dataset_name = which_arff_dataset(ds)
        #para cada i em idx_bag ("n_splits") (1 a 5)
        for idx_bag in range(n_splits):
            for ph_strategy in pyhard_strategies_names:
                tqdm.write("Testando: " + str(ds[:-5]) + " " + str(classifier) + " " + str(idx_bag) + "/" + str(n_splits) + " " + ph_strategy)
                result = pyhard_strategies(deepcopy(X_raw), deepcopy(y_raw), idx_data, idx_bag, classifier, k, cost, ph_strategy)
                result['dataset'] = ds[:-5]
                total_performance_history.append(result)
                tqdm.write("Passou: " + str(ds[:-5]) + " " + str(classifier) + " " + str(idx_bag) + "/" + str(n_splits) + " " + ph_strategy)        

In [None]:
functions = ["uncertain_sampling", "random_sampling", "query_by_committee", "exp_error_reduction", "exp_model_change"]
parameters = "(deepcopy(X_raw), deepcopy(y_raw), idx_data, idx_bag, classifier, k, cost)"

for ds in tqdm(datasets,  desc ="Dataset"):
    for classifier in classifiers:
        X_raw, y_raw, idx_data, dataset_name = which_arff_dataset(ds)
        #para cada i em idx_bag ("n_splits") (1 a 5)
        for idx_bag in range(n_splits):
            for func in functions:
                tqdm.write("Testando: " + str(ds[:-5]) + " " + str(classifier) + " " + str(idx_bag+1) + "/" + str(n_splits) + " " + func)
                result = eval(func+parameters)
                result['dataset'] = ds[:-5]
                total_performance_history.append(result)
                tqdm.write("Passou: " + str(ds[:-5]) + " " + str(classifier) + " " + str(idx_bag+1) + "/" + str(n_splits) + " " + func)

In [None]:
total_performance_history

## Visualization

### Preprocessing

In [None]:
df = pd.DataFrame.from_dict(total_performance_history)

### Checking Strategies on Data Frame

In [None]:
pd.unique(df['Strategy'])

In [None]:
df = df.explode('accuracy_history')

In [None]:
df2 = df
df2.groupby(['Strategy', 'classifier']).agg({'performance_history':['mean','std'],'time_elapsed':['mean','std'], 'sample_size':['mean','std']})

In [None]:
performance_mean = df2.groupby(['Strategy', 'classifier']).mean()
performance_std = df2.groupby(['Strategy', 'classifier']).std()

In [None]:
performance_mean

### Last loop results (last cost)

In [None]:
df_last_result = df[:]

In [None]:
df_last_result['f1_history'] = df_last_result['f1_history'].apply(lambda x: x[-1])
df_last_result['auc_history'] = df_last_result['auc_history'].apply(lambda x: x[-1])
df_last_result['accuracy_history'] = df_last_result['accuracy_history'].apply(lambda x: x[-1])

In [None]:
df_last_result

In [None]:
df_last_result_mean = df_last_result.groupby(['Strategy', 'classifier']).mean()

### Removing most time expensive strategies to improve visualization 

In [None]:
df[df.Strategy != "Query by Committee"].sort_values('performance_history', ascending = False)

In [None]:
df[df.Strategy == "Expected Error Reduction"].sort_values('time_elapsed', ascending = False)

In [None]:
df.info()

### Plots

In [None]:
g = sns.relplot(
    data= df,
    x="accuracy_history", y="time_elapsed",
    hue="Strategy", size="sample_size",
    palette=sns.color_palette(n_colors=10), sizes=(100, 300), alpha=0.3
)
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
_ = g.despine(left=True, bottom=True)

In [None]:
g = sns.relplot(
    data= df_last_result,
    x="auc_history", y="time_elapsed",
    hue="Strategy", size="sample_size", style="classifier",
    palette=sns.color_palette(n_colors=len(pd.unique(df_last_result['Strategy']))), sizes=(100, 300), alpha=0.3
)
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
_ = g.despine(left=True, bottom=True)