FIX prettier version, RM more_itertools

dieterich-lab · Sep 12, 2023 · 6cc478a · 6cc478a
1 parent d18ec22
commit 6cc478a
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 54 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,15 +21,15 @@ repos:
       - id: nbstripout
 
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.6
+    rev: v3.0.3
     hooks:
       - id: prettier
         additional_dependencies:
-          - prettier@2.3.2
+          - prettier@3.0.3
           - "prettier-plugin-toml"
 
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.22.0
+    rev: 0.23.2
     hooks:
       - id: check-github-workflows
       - id: check-readthedocs

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## [Unreleased] - started 2023-02
 
+### Removed
+
+- `math_utils.get_kth_fold` (more_itertools)
+
 ## [4.0.1] - 2023-02
 
 ### Fixed

diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,6 @@ dependencies = [
   "joblib",
   "matplotlib",
   "matplotlib_venn",
-  "more_itertools",
   "mygene",
   "numpy",
   "openpyxl",

diff --git a/src/pbiotools/misc/math_utils.py b/src/pbiotools/misc/math_utils.py
@@ -17,7 +17,6 @@
 import itertools
 from enum import Enum
 
-import more_itertools
 import numpy as np
 import pandas as pd
 import scipy.stats
@@ -1268,52 +1267,3 @@ def calc_provost_and_domingos_auc(y_true, y_score):
 
 fold_tuple_fields = ["X_train", "y_train", "X_test", "y_test"]
 fold_tuple = collections.namedtuple("fold", " ".join(fold_tuple_fields))
-
-
-def get_kth_fold(X, y, fold, num_folds=10, random_seed=8675309):
-    """Select the kth cross-validation fold using stratified CV
-
-    In partcular, this function uses `sklearn.model_selection.StratifiedKFold`
-    to split the data. It then selects the training and testing splits
-    from the k^th fold.
-
-    N.B. If `y` is None, the simple `KFold` is used instead.
-
-    Parameters
-    ----------
-    X, y: sklearn-formated data matrices
-
-    fold: int
-        The cv fold
-
-    num_folds: int
-        The total number of folds
-
-    random_seed: int or random state
-        The value used a the random seed for the k-fold split
-    """
-
-    check_range(fold, 0, num_folds, max_inclusive=False, variable_name="fold")
-
-    if y is None:
-        cv = sklearn.model_selection.KFold(n_splits=num_folds, random_state=random_seed)
-    else:
-        cv = sklearn.model_selection.StratifiedKFold(
-            n_splits=num_folds, random_state=random_seed
-        )
-
-    splits = cv.split(X, y)
-    train, test = more_itertools.nth(splits, fold)
-
-    X_train = X[train]
-    X_test = X[test]
-
-    if y is None:
-        y_train = None
-        y_test = None
-    else:
-        y_train = y[train]
-        y_test = y[test]
-
-    ret = fold_tuple(X_train, y_train, X_test, y_test)
-    return ret