In [16]:
from cuml.svm import SVC

import transformers
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from joblib import Parallel, delayed
import itertools
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from sklearn.base import clone

In [17]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features = 1000, n_informative=500, random_state=42, n_classes= 3)
X = X - X.min()  # make minimum value 0
X = X * 10        # scale to get values in count-like range
X = np.random.poisson(lam=X).astype(int)

y = y.astype("int")
study_labels = np.random.choice(['A', 'B', 'C'], size=X.shape[0], p=[0.2, 0.5, 0.3])
y

array([1, 2, 1, 2, 2, 1, 1, 1, 0, 1, 1, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2,
       0, 2, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 2, 2, 1, 0, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 0, 1, 1, 2, 2, 1, 0,
       2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 1, 2, 2, 2, 2, 0, 2, 2, 1, 1, 1, 2,
       0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 2, 0, 2, 1, 1, 2, 1, 1, 2,
       2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 0, 1, 1, 1, 2,
       0, 1, 1, 1, 1, 2, 1, 0, 2, 1, 2, 0, 0, 0, 2, 2, 0, 0, 2, 1, 0, 0,
       2, 0, 1, 2, 0, 0, 1, 2, 2, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 2, 1, 0, 0, 1, 1, 2, 2, 1, 0, 2, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 0, 1, 2, 2, 0, 0, 0, 2, 1, 0,
       2, 0, 0, 2, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 1,
       2, 2, 0, 0, 2, 1, 2, 0, 2, 2, 2, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2,
       2, 0, 0, 2, 2, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0, 0, 2, 1, 1, 2, 2, 0,
       1, 1, 2, 0, 0, 0, 2, 0, 0, 1, 0, 2, 1, 2, 2,

In [18]:
from train_test import load_data
study_labels, X, y = load_data("/home/jeppe/Documents/Leukem.ai/data")

  Studies: 2974
  X shape: (2974, 60660)
  y: 2974


In [19]:
def filter_data(X, y, study_labels):
    """
    Removes samples based on class counts and selected studies.

    Args:
        X (numpy.ndarray): Feature matrix.
        y (numpy.ndarray): Target labels.
        study_labels (numpy.ndarray): Study labels.

    Returns:
        tuple: Filtered X, y, and study_labels.
    """

    unique_classes, class_counts = np.unique(y, return_counts=True)
    valid_classes = unique_classes[class_counts >= 20]

    valid_classes = [c for c in valid_classes if c != "AML NOS" and c != "Missing data"]

    valid_indices_classes = np.isin(y, valid_classes)

    selected_studies = [
        "BEATAML1.0-COHORT",
        "AAML0531",
        "AAML1031",
        "TCGA-LAML",
        "LEUCEGENE"
    ]

    valid_indices_studies = np.isin(study_labels, selected_studies)

    # Combine the indices to keep samples that satisfy both conditions
    valid_indices = valid_indices_classes & valid_indices_studies

    filtered_X = X[valid_indices]
    filtered_y = y[valid_indices]
    filtered_study_labels = study_labels[valid_indices]

    print(f"  Studies: {len(filtered_study_labels)}")
    print(f"  X shape: {filtered_X.shape}")
    print(f"  y: {len(filtered_y)}")

    return filtered_X, filtered_y, filtered_study_labels

X, y, study_labels = filter_data(X, y, study_labels)

  Studies: 1914
  X shape: (1914, 60660)
  y: 1914


In [23]:
def encode_labels(y):
    """Encodes string labels to integers and returns the mapping."""
    unique_labels = np.unique(y)
    label_to_int = {label: i for i, label in enumerate(unique_labels)}
    int_y = np.array([label_to_int[label] for label in y])
    return int_y, label_to_int

# Example usage (assuming y is your string label array):
y, label_mapping = encode_labels(y)

In [24]:
# CV setup
outer_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Hyperparameter grid
param_grid = {
    'n_genes': [2000],
    'C': [100, 1e3],
    'gamma': ['auto'],
    'kernel': ['rbf'],
    'class_weight': ["balanced"],
    'max_iter': [1e6]
}

param_combos = list(ParameterGrid(param_grid))

model = SVC
pipe = Pipeline([
    ('DEseq2', transformers.DESeq2RatioNormalizer()),
    ('feature_selection', transformers.FeatureSelection()),
    ('scaler', StandardScaler())
])

In [25]:
# Function to evaluate one inner fold + hyperparam combo
def evaluate_inner_fold(outer_fold, inner_fold, 
                        train_inner_idx, val_inner_idx, 
                        X_train_outer, y_train_outer, study_labels,
                        model, 
                        pipe,
                        params,
                        type = "standard"):
    
      
    def standard_eval():
        clf.fit(X_train_inner, y_train_inner)
        preds = clf.predict(X_val_inner)
        return {
            'outer_fold': outer_fold,
            'inner_fold': inner_fold,
            'params': params,
            'accuracy': accuracy_score(y_val_inner, preds),
            'f1_macro': f1_score(y_val_inner, preds, average='macro'),
            'f1_per_class': f1_score(y_val_inner, preds, average=None)
        }

    def ovr_eval():
        results = []
        classes = np.unique(y_train_inner)
        for cl in classes:
            y_train_bin = [1 if yy == cl else 0 for yy in y_train_inner]
            y_val_bin = [1 if yy == cl else 0 for yy in y_val_inner]

            y_train_bin = np.array(y_train_bin, dtype=np.int32)
            y_val_bin = np.array(y_val_bin, dtype=np.int32)

            clf.fit(X_train_inner, y_train_bin)
            preds = clf.predict(X_val_inner)
            results.append({
                'outer_fold': outer_fold,
                'inner_fold': inner_fold,
                'class': cl,
                'params': params,
                'accuracy': accuracy_score(y_val_bin, preds),
                'f1_binary': f1_score(y_val_bin, preds, average='binary', pos_label=1)
            })
        return results

    def ovo_eval():
        results = []
        classes = np.unique(y_train_inner)
        for i, j in itertools.combinations(classes, 2):
            train_mask = [(yy == i or yy == j) for yy in y_train_inner]
            val_mask = [(yy == i or yy == j) for yy in y_val_inner]

            X_train_ij = X_train_inner[train_mask]
            y_train_ij = np.array([yy for yy in y_train_inner if yy == i or yy == j], dtype=np.int32) 

            X_val_ij = X_val_inner[val_mask]
            y_val_ij = np.array([yy for yy in y_val_inner if yy == i or yy == j], dtype=np.int32)
            
            clf.fit(X_train_ij, y_train_ij)
            preds = clf.predict(X_val_ij)
            results.append({
                'outer_fold': outer_fold,
                'inner_fold': inner_fold,
                'class_0': i,
                'class_1': j,
                'params': params,
                'accuracy': accuracy_score(y_val_ij, preds),
                'f1_binary': f1_score(y_val_ij, preds, average='binary', pos_label=i)
            })
        return results

    # Dispatch table for clean logic
    eval_dispatch = {
        'standard': standard_eval,
        'OvR': ovr_eval,
        'OvO': ovo_eval
    }

    if type not in eval_dispatch:
        raise ValueError(f"Unsupported evaluation type: {type}")
    
    ### Setup pipeline and classifier
    pipe_inner = clone(pipe)
    # Set n_genes and drop it from params
    #pipe_inner.set_params()
    
    n_genes = params.pop('n_genes')
    ### Subset training data and validation data
    ## And perform normalization and feature selection
    X_train_inner = X_train_outer[train_inner_idx]
    study_labels_inner = study_labels[train_inner_idx]
    X_train_inner = pipe_inner.fit_transform(X_train_inner, 
                                             feature_selection__study_per_patient=study_labels_inner, 
                                             feature_selection__n_genes=n_genes)
    y_train_inner = y_train_outer[train_inner_idx]
    
    X_val_inner = X_train_outer[val_inner_idx]
    X_val_inner = pipe_inner.transform(X_val_inner)
    y_val_inner = y_train_outer[val_inner_idx]
    

    X_train_inner = np.array(X_train_inner, dtype=np.float32)
    y_train_inner = np.array(y_train_inner, dtype=np.int32)
    X_val_inner = np.array(X_val_inner, dtype=np.float32)
    y_val_inner = np.array(y_val_inner, dtype=np.int32)
    
    ### Set classifier
    clf = clone(model(**params))
    params['n_genes'] = n_genes
    return eval_dispatch[type]()

In [27]:
all_results = []

for outer_fold, (train_idx, test_idx) in enumerate(outer_cv.split(X, y)):
    X_train_outer, X_test_outer = X[train_idx], X[test_idx]
    y_train_outer, y_test_outer = y[train_idx], y[test_idx]
    study_labels_outer = study_labels[train_idx]
    
    inner_tasks = []
    for inner_fold, (train_inner_idx, val_inner_idx) in enumerate(inner_cv.split(X_train_outer, y_train_outer)):
        for params in param_combos:
            inner_tasks.append(delayed(evaluate_inner_fold)(
                outer_fold, inner_fold,
                train_inner_idx, val_inner_idx,
                X_train_outer, y_train_outer, study_labels,
                model,
                pipe,
                params,
                type = "OvR" # standard, OvR, OvO
            ))

    # Run inner CV tasks in parallel (adjust n_jobs to number of CPU cores)
    inner_results = Parallel(n_jobs=2, verbose=1)(inner_tasks)
    if isinstance(inner_results[0], dict):
        # Flat list of dictionaries
        all_results.extend(inner_results)
    elif isinstance(inner_results[0], list):
        # List of lists of dictionaries
        for res in inner_results:
            all_results.extend(res)
    else:
        raise ValueError("Unexpected structure in inner_results")


# Convert to DataFrame
df_parallel_results = pd.DataFrame(all_results)
df_parallel_results


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0


[Parallel(n_jobs=2)]: Done   4 out of   4 | elapsed:   28.8s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0
MASK SUM == 0


[Parallel(n_jobs=2)]: Done   4 out of   4 | elapsed:   28.2s finished


Unnamed: 0,outer_fold,inner_fold,class_0,class_1,params,accuracy,f1_binary
0,0,0,0,1,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",0.880342,0.900000
1,0,0,0,2,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",0.975000,0.985294
2,0,0,0,3,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",0.970370,0.971429
3,0,0,0,4,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",0.945946,0.971831
4,0,0,0,5,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",0.972973,0.971429
...,...,...,...,...,...,...,...
1083,1,1,13,15,"{'C': 1000.0, 'class_weight': 'balanced', 'gam...",1.000000,1.000000
1084,1,1,13,16,"{'C': 1000.0, 'class_weight': 'balanced', 'gam...",1.000000,1.000000
1085,1,1,14,15,"{'C': 1000.0, 'class_weight': 'balanced', 'gam...",1.000000,1.000000
1086,1,1,14,16,"{'C': 1000.0, 'class_weight': 'balanced', 'gam...",1.000000,1.000000


In [43]:
def process_cv_results(df, param_grid, label_mapping, score_col='f1_binary'):
    #  Extract param names and expand 'params'
    param_names = list(param_grid.keys())
    params_df = df['params'].apply(pd.Series)

    # Normalize None values for groupby
    for col in param_names:
        if col in params_df.columns:
            params_df[col] = params_df[col].apply(lambda x: 'none' if x is None else x)

    #Combine expanded params with original DataFrame
    df_with_params = pd.concat([df.drop(columns=['params']), params_df], axis=1)

    # Determine group-by strategy based on evaluation type
    if 'class' in df_with_params.columns:
        # OvR
        group_cols = param_names + ['class']
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()
        best = summary.loc[summary.groupby('class')[score_col].idxmax()].reset_index(drop=True)

    elif 'class_0' in df_with_params.columns and 'class_1' in df_with_params.columns:
        # OvO
        group_cols = param_names + ['class_0', 'class_1']
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()
        best = summary.loc[summary.groupby(['class_0', 'class_1'])[score_col].idxmax()].reset_index(drop=True)

    else:
        # Standard multiclass
        group_cols = param_names
        score_col = 'f1_macro'  # override if not passed
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()
        best = summary.loc[[summary[score_col].idxmax()]].reset_index(drop=True)

    int_to_label = {v: k for k, v in label_mapping.items()}
    if 'class' in best.columns:
        # OvR case
        best['class'] = best['class'].map(int_to_label)
        return best

    elif 'class_0' in best.columns and 'class_1' in best.columns:
        # OvO case
        best['class_0'] = best['class_0'].map(int_to_label)
        best['class_1'] = best['class_1'].map(int_to_label)
        return best

    else:
        return best

best_per_class_df = process_cv_results(
    df_parallel_results,
    param_grid=param_grid,
    label_mapping = label_mapping
)
best_per_class_df

Unnamed: 0,n_genes,C,gamma,kernel,class_weight,max_iter,class_0,class_1,f1_binary
0,2000,100.0,auto,rbf,balanced,1000000.0,AML with MDS-related cytogenetic abnormalities,AML with MDS-related gene mutations,0.879148
1,2000,100.0,auto,rbf,balanced,1000000.0,AML with MDS-related cytogenetic abnormalities,AML with in-frame bZIP CEBPA,0.970784
2,2000,100.0,auto,rbf,balanced,1000000.0,AML with MDS-related cytogenetic abnormalities,AML with inv(16)/t(16;16)/CBFB::MYH11,0.982142
3,2000,100.0,auto,rbf,balanced,1000000.0,AML with MDS-related cytogenetic abnormalities,AML with inv(3)/t(3;3)/GATA2;MECOM,0.971730
4,2000,100.0,auto,rbf,balanced,1000000.0,AML with MDS-related cytogenetic abnormalities,AML with mutated NPM1,0.963418
...,...,...,...,...,...,...,...,...,...
131,2000,100.0,auto,rbf,balanced,1000000.0,KMT2A::MLLT10,NUP98::KDM5A,0.961329
132,2000,100.0,auto,rbf,balanced,1000000.0,KMT2A::MLLT10,NUP98::NSD1,0.991935
133,2000,100.0,auto,rbf,balanced,1000000.0,KMT2A::MLLT4 (::AFDN),NUP98::KDM5A,0.924632
134,2000,100.0,auto,rbf,balanced,1000000.0,KMT2A::MLLT4 (::AFDN),NUP98::NSD1,0.964103


In [None]:
int_to_label = {v: k for k, v in label_mapping.items()}
if 'class' in best_per_class_df.columns:
    # OvR case
    best_per_class_df['class'] = best_per_class_df['class'].map(int_to_label)
    print("\nDataFrame with 'class_label' added (OvR):")

elif 'class_0' in best_per_class_df.columns and 'class_1' in best_per_class_df.columns:
    # OvO case
    best_per_class_df['class_0'] = best_per_class_df['class_0'].map(int_to_label)
    best_per_class_df['class_1'] = best_per_class_df['class_1'].map(int_to_label)
    print("\nDataFrame with 'class_0_label' and 'class_1_label' added (OvO):")

else:
     # Standard multiclass case (no specific class columns to map in 'best')
     print("\nStandard multiclass results, no class labels to map in 'best' summary.")

best_per_class_df


DataFrame with 'class_0_label' and 'class_1_label' added (OvO):


Unnamed: 0,n_genes,C,gamma,kernel,class_weight,max_iter,class_0,class_1,f1_binary,class_0_label,class_1_label
0,2000,100.0,auto,rbf,balanced,1000000.0,0,1,0.879148,AML with MDS-related cytogenetic abnormalities,AML with MDS-related gene mutations
1,2000,100.0,auto,rbf,balanced,1000000.0,0,2,0.970784,AML with MDS-related cytogenetic abnormalities,AML with in-frame bZIP CEBPA
2,2000,100.0,auto,rbf,balanced,1000000.0,0,3,0.982142,AML with MDS-related cytogenetic abnormalities,AML with inv(16)/t(16;16)/CBFB::MYH11
3,2000,100.0,auto,rbf,balanced,1000000.0,0,4,0.971730,AML with MDS-related cytogenetic abnormalities,AML with inv(3)/t(3;3)/GATA2;MECOM
4,2000,100.0,auto,rbf,balanced,1000000.0,0,5,0.963418,AML with MDS-related cytogenetic abnormalities,AML with mutated NPM1
...,...,...,...,...,...,...,...,...,...,...,...
131,2000,100.0,auto,rbf,balanced,1000000.0,13,15,0.961329,KMT2A::MLLT10,NUP98::KDM5A
132,2000,100.0,auto,rbf,balanced,1000000.0,13,16,0.991935,KMT2A::MLLT10,NUP98::NSD1
133,2000,100.0,auto,rbf,balanced,1000000.0,14,15,0.924632,KMT2A::MLLT4 (::AFDN),NUP98::KDM5A
134,2000,100.0,auto,rbf,balanced,1000000.0,14,16,0.964103,KMT2A::MLLT4 (::AFDN),NUP98::NSD1


In [None]:
import pandas as pd

def process_cv_results(df, param_grid, score_col='f1_binary', label_mapping=None):
    """
    Processes cross-validation results, converting class integers to strings if label_mapping is provided.

    Args:
        df (pd.DataFrame): DataFrame containing cross-validation results.
        param_grid (dict): Parameter grid used in cross-validation.
        score_col (str): Column name for the score to be used for best model selection.
        label_mapping (dict, optional): Mapping from integer labels to string labels.

    Returns:
        tuple: Summary DataFrame and DataFrame with best results per class.
    """
    param_names = list(param_grid.keys())
    params_df = df['params'].apply(pd.Series)

    for col in param_names:
        if col in params_df.columns:
            params_df[col] = params_df[col].apply(lambda x: 'none' if x is None else x)

    df_with_params = pd.concat([df.drop(columns=['params']), params_df], axis=1)

    if 'class' in df_with_params.columns:
        group_cols = param_names + ['class']
        if label_mapping:
            df_with_params['class'] = df_with_params['class'].map(label_mapping)
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()
        best = summary.loc[summary.groupby('class')[score_col].idxmax()].reset_index(drop=True)

    elif 'class_0' in df_with_params.columns and 'class_1' in df_with_params.columns:
        group_cols = param_names + ['class_0', 'class_1']
        if label_mapping:
            df_with_params['class_0'] = df_with_params['class_0'].map(label_mapping)
            df_with_params['class_1'] = df_with_params['class_1'].map(label_mapping)
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()

        best = summary.loc[summary.groupby(['class_0', 'class_1'])[score_col].idxmax()].reset_index(drop=True)

    else:
        group_cols = param_names
        score_col = 'f1_macro'
        summary = df_with_params.groupby(group_cols)[score_col].mean().reset_index()
        best = summary.loc[[summary[score_col].idxmax()]].reset_index(drop=True)

    return summary, best

# Example usage (assuming df_parallel_results, param_grid, and label_mapping are defined):
# label_mapping = {0: 'ClassA', 1: 'ClassB', 2: 'ClassC'} #Example mapping.
summary_df, best_per_class_df = process_cv_results(
    df_parallel_results,
    param_grid=param_grid,
    label_mapping=label_mapping #Pass the mapping here.
)

best_per_class_df



In [62]:
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from joblib import Parallel, delayed
import itertools
import numpy as np
import pandas as pd

# Load data
X, y = load_iris(return_X_y=True)
y = y.astype("str")
print(np.unique(y))
# CV setup
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Hyperparameter grid
param_grid = {
    'n_estimators': [10, 20],
    'max_depth': [3, 5]
}
param_combos = list(itertools.product(param_grid['n_estimators'], param_grid['max_depth']))

# Function to evaluate one inner fold + hyperparam combo
def evaluate_inner_fold(outer_fold, inner_fold, train_inner_idx, val_inner_idx, X_train_outer, y_train_outer, n_estimators, max_depth):
    results = []
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=42
        ))
    ])
    X_train_inner = X_train_outer[train_inner_idx]
    y_train_inner = y_train_outer[train_inner_idx]
    X_val_inner = X_train_outer[val_inner_idx]
    y_val_inner = y_train_outer[val_inner_idx]

    classes = np.unique(y_train_inner)
    for cl in classes:
        y_train_inner_ovr = [cl if yy == cl else "other" for yy in y_train_inner]
        y_val_inner_ovr = [cl if yy == cl else "other" for yy in y_val_inner]
        
        pipe.fit(X_train_inner, y_train_inner_ovr)
        preds = pipe.predict(X_val_inner)
        acc = accuracy_score(y_val_inner_ovr, preds)
        f1_bi = f1_score(y_val_inner_ovr, preds, average='binary', pos_label=cl)
        results.append({
            'outer_fold': outer_fold,
            'class': cl,
            'inner_fold': inner_fold,
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'accuracy': acc,
            'f1_binary': f1_bi
        })
    return(results)

# Outer CV loop with parallel inner loop
all_results = []

for outer_fold, (train_idx, test_idx) in enumerate(outer_cv.split(X, y)):
    X_train_outer, X_test_outer = X[train_idx], X[test_idx]
    y_train_outer, y_test_outer = y[train_idx], y[test_idx]

    inner_tasks = []
    for inner_fold, (train_inner_idx, val_inner_idx) in enumerate(inner_cv.split(X_train_outer, y_train_outer)):
        for n_estimators, max_depth in param_combos:
            inner_tasks.append(delayed(evaluate_inner_fold)(
                outer_fold, inner_fold,
                train_inner_idx, val_inner_idx,
                X_train_outer, y_train_outer,
                n_estimators, max_depth
            ))

    # Run inner CV tasks in parallel (adjust n_jobs to number of CPU cores)
    inner_results = Parallel(n_jobs=-1, verbose=1)(inner_tasks)
    for res in inner_results:
        all_results.extend(res)

# Convert to DataFrame
df_parallel_results = pd.DataFrame(all_results)
df_parallel_results

['0' '1' '2']


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished


Unnamed: 0,outer_fold,class,inner_fold,n_estimators,max_depth,accuracy,f1_binary
0,0,0,0,10,3,1.000000,1.000000
1,0,1,0,10,3,0.875000,0.823529
2,0,2,0,10,3,0.875000,0.800000
3,0,0,0,10,5,1.000000,1.000000
4,0,1,0,10,5,0.875000,0.823529
...,...,...,...,...,...,...,...
295,4,1,4,20,3,0.958333,0.941176
296,4,2,4,20,3,0.958333,0.933333
297,4,0,4,20,5,1.000000,1.000000
298,4,1,4,20,5,0.958333,0.941176


In [63]:
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from joblib import Parallel, delayed
import itertools
import numpy as np
import pandas as pd

# Load data
X, y = load_iris(return_X_y=True)
y = y.astype("str")
# CV setup
outer_cv = KFold(n_splits=3, shuffle=True, random_state=42)
inner_cv = KFold(n_splits=3, shuffle=True, random_state=42)

# Hyperparameter grid
param_grid = {
    'n_estimators': [10, 20],
    'max_depth': [3, 5]
}
param_combos = list(itertools.product(param_grid['n_estimators'], param_grid['max_depth']))

# Function to evaluate one inner fold + hyperparam combo
def evaluate_inner_fold(outer_fold, inner_fold, train_inner_idx, val_inner_idx, X_train_outer, y_train_outer, n_estimators, max_depth):
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=42
        ))
    ])
    X_train_inner = X_train_outer[train_inner_idx]
    y_train_inner = y_train_outer[train_inner_idx]
    X_val_inner = X_train_outer[val_inner_idx]
    y_val_inner = y_train_outer[val_inner_idx]

    classes = np.unique(y_train_inner)
    results = []
    for i, j in itertools.combinations(classes, r = 2):
        train_mask = [(yy == i or yy == j) for yy in y_train_inner]
        val_mask = [(yy == i or yy == j) for yy in y_val_inner]

        X_train_ij = X_train_inner[train_mask]
        y_train_ij = [yy for yy in y_train_inner if yy == i or yy == j]

        X_val_ij = X_val_inner[val_mask]
        y_val_ij = [yy for yy in y_val_inner if yy == i or yy == j]
        
        pipe.fit(X_train_ij, y_train_ij)
        preds = pipe.predict(X_val_ij)
        acc = accuracy_score(y_val_ij, preds)
        f1_bi = f1_score(y_val_ij, preds, average='binary', pos_label=i)
        results.append({
            'outer_fold': outer_fold,
            'class_0': i,
            'class_1': j,
            'inner_fold': inner_fold,
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'accuracy': acc,
            'f1_binary': f1_bi
        })
    return(results)

# Outer CV loop with parallel inner loop
all_results = []

for outer_fold, (train_idx, test_idx) in enumerate(outer_cv.split(X)):
    X_train_outer, X_test_outer = X[train_idx], X[test_idx]
    y_train_outer, y_test_outer = y[train_idx], y[test_idx]

    inner_tasks = []
    for inner_fold, (train_inner_idx, val_inner_idx) in enumerate(inner_cv.split(X_train_outer)):
        for n_estimators, max_depth in param_combos:
            inner_tasks.append(delayed(evaluate_inner_fold)(
                outer_fold, inner_fold,
                train_inner_idx, val_inner_idx,
                X_train_outer, y_train_outer,
                n_estimators, max_depth
            ))

    # Run inner CV tasks in parallel (adjust n_jobs to number of CPU cores)
    inner_results = Parallel(n_jobs=-1, verbose=1)(inner_tasks)
    for res in inner_results:
        all_results.extend(res)

# Convert to DataFrame
df_parallel_results = pd.DataFrame(all_results)
df_parallel_results

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  12 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  12 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  12 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    0.1s finished


Unnamed: 0,outer_fold,class_0,class_1,inner_fold,n_estimators,max_depth,accuracy,f1_binary
0,0,0,1,0,10,3,1.000000,1.000000
1,0,0,2,0,10,3,1.000000,1.000000
2,0,1,2,0,10,3,0.818182,0.818182
3,0,0,1,0,10,5,1.000000,1.000000
4,0,0,2,0,10,5,1.000000,1.000000
...,...,...,...,...,...,...,...,...
103,2,0,2,2,20,3,1.000000,1.000000
104,2,1,2,2,20,3,0.920000,0.909091
105,2,0,1,2,20,5,1.000000,1.000000
106,2,0,2,2,20,5,1.000000,1.000000
