# k-cross

In [1]:
import numpy as np
from sklearn.base import clone
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score

In [2]:
def k_cross(input_model, X, y, k=10, n=1, random_state=42):
    f1_scores = []
    recall_scores = []
    rkf = RepeatedKFold(n_splits=k, n_repeats=n, random_state=random_state)
    for train_index, val_index in rkf.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        
        model = clone(input_model)
        model.fit(X_train, y_train) 
        
        y_pred = model.predict(X_val)
        f1 = f1_score(y_val, y_pred)
        f1_scores.append(f1)
        recall = recall_score(y_val, y_pred)
        recall_scores.append(recall)
        
    return f1_scores, recall_scores

# Example with Logistic Regression

In [3]:
from sklearn import linear_model

X_train = np.array([[1,3],[2,2],[4,5],[6,7]])
y_train = np.array([0,0,1,1])

In [4]:
model = linear_model.LogisticRegression()
f1, recall = k_cross(model, X_train, y_train, k=2)

In [5]:
print("F1-scores: ", f1)
print("Recall scores: ", recall)

F1-scores:  [1.0, 1.0]
Recall scores:  [1.0, 1.0]
