In [1]:
import os
from glob import glob
from pathlib import Path

cwd = Path.cwd()
print(cwd)
prepped_data_dir = cwd / "PreppedData"

/home/fs1620/MLBD_2024_25/Research_Project/LiaDataAnalysis


In [7]:
# If current path does not end in "Annotations", move into it
if cwd.name != "PreppedData":
    if prepped_data_dir.exists():
        os.chdir(prepped_data_dir)
        print(f"Changed working directory to: {prepped_data_dir}")
    else:
        raise FileNotFoundError(f"'Annotations' directory not found at: {prepped_data_dir}")
else:
    print("Already in Annotations directory.")

Changed working directory to: /home/fs1620/MLBD_2024_25/Research_Project/LiaDataAnalysis/PreppedData


In [9]:
X = np.load('X.npy')
y = np.load('y.npy')

In [13]:
import numpy as np
import json
from sklearn.model_selection import StratifiedKFold
import os

def generate_strat_folds(
    x_path='',
    y_path='',
    save_path='',
    n_splits=5,
    random_state=42,
    shuffle=True,
    return_folds=True
):
    """
    Generates stratified cross-validation folds and saves them to a JSON file.

    Args:
        x_path (str): Path to X.npy (features).
        y_path (str): Path to y.npy (labels).
        save_path (str): Path to save folds JSON.
        n_splits (int): Number of cross-validation folds.
        random_state (int): Random seed for reproducibility.
        shuffle (bool): Whether to shuffle before splitting.
        return_folds (bool): If True, returns folds as a list of dicts.

    Returns:
        List[dict]: List of folds with 'train_idx' and 'val_idx' if return_folds is True.
    """
    
    X = np.load(x_path)
    y = np.load(y_path)

    skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)

    folds = []
    for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        folds.append({
            'fold': fold_idx,
            'train_idx': train_idx.tolist(),
            'val_idx': val_idx.tolist()
        })

    save_dir = os.path.dirname(save_path)
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
    with open(save_path, 'w') as f:
        json.dump(folds, f, indent=2)

    print(f"Saved {n_splits} stratified folds to: {save_path}")
    
    if return_folds:
        return folds

In [14]:
folds = generate_strat_folds(
    x_path='X.npy',
    y_path='y.npy',
    save_path='stratified_folds.json',
    n_splits=5
)

Saved 5 stratified folds to: stratified_folds.json
