# Import

## Modules

In [18]:
import sklearn.metrics as sm
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from collections.abc import Iterable
import pandas as pd
import numpy as np

In [58]:
def cost_function(y_true: Iterable[float], y_pred: Iterable[float], threshold: float, score_opt_val_map: Iterable[tuple[str, str, float]]) -> float:
    """_summary_

    Args:
        y_true (Iterable[float]): _description_
        y_pred (Iterable[float]): _description_
        threshold (float): _description_
        score_opt_val_map (Iterable[tuple[str, str, float]]): _description_

    Returns:
        float: _description_
        
    Example:
        >>> y_true = [0, 0, 0, 1, 1]
        >>> y_pred = [0.46, 0.6, 0.29, 0.25, 0.012]
        >>> threshold = 0.5
        >>> score_opt_val_map = [("f1_score", "class", 1),
        ... ("log_loss", "class", 0),
        ... ("roc_auc_score", "proba", 1)]
        >>> round(cost_function(y_true, y_pred, threshold, score_opt_val_map))
        21
    """
    
    y_pred_array = np.fromiter(y_pred, float)
    y_true_array = np.fromiter(y_true, float)
    
    cost = 0
    for (score_name, prediction_type, opt_val) in score_opt_val_map:
        scorer = getattr(sm, score_name)
        
        if prediction_type == "class":
            y_hat = np.where(y_pred_array > threshold, 1, 0)
            
        else:
            y_hat = y_pred_array
        
        cost += (scorer(y_true_array, y_hat) - opt_val) ** 2
        
    return np.sqrt(cost)

In [63]:
score_opt_val_map = [
        ("accuracy_score", "class", 1),
        ("f1_score", "class", 1),
        ("log_loss", "class", 0),
        ("precision_score", "class", 1),
        ("recall_score", "class", 1),
        ("roc_auc_score", "proba", 1),
]
scorer = sm.make_scorer(cost_function, greater_is_better=False, threshold = 0.5, score_opt_val_map = score_opt_val_map)


In [64]:
X, y = make_classification()

In [65]:
param_grid = {"C": [0.5, 1, 10]}
cv = GridSearchCV(LogisticRegression(), param_grid, scoring=scorer)

In [66]:
cv.fit(X, y)

GridSearchCV(estimator=LogisticRegression(), param_grid={'C': [0.5, 1, 10]},
             scoring=make_scorer(cost_function, greater_is_better=False, threshold=0.5, score_opt_val_map=[('accuracy_score', 'class', 1)]))

In [67]:
pd.DataFrame.from_dict(cv.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.011781,0.009709,0.000875,0.000592,0.5,{'C': 0.5},-0.2,-0.15,-0.15,-0.15,-0.1,-0.15,0.031623,2
1,0.007976,0.001714,0.00047,0.000105,1.0,{'C': 1},-0.2,-0.15,-0.15,-0.1,-0.1,-0.14,0.037417,1
2,0.005901,0.000245,0.000348,1.4e-05,10.0,{'C': 10},-0.2,-0.25,-0.1,-0.0,-0.3,-0.17,0.107703,3


In [61]:
y

array([0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1])