In [None]:
import numpy as np
import random
from sklearn.model_selection import KFold


class DataSplitter:

    @staticmethod
    def reduceSet(label, ratio):
        ''' 
        Randomly get the reduced dataset from full dataset - Reduce SVM
        label = label in data
        ratio = The ratio want to sample from full dataset
        
        Output:
        subset = reduced set's index (an index list)
        '''
        
        # Random sampling data
        unique_labels = np.unique(label)
        subset = []
        for uni_lab in unique_labels:
            num = round(np.where(label == uni_lab)[0].shape[0] * ratio, 0) #count no. labels with that value
            subset = subset + random.sample(list(np.where(label == uni_lab)[0]), int(num)) 

        return subset

    @staticmethod
    def splitData(label, ratio=1, num_fold=1):
        ''' 
        Get the slices of data
        label = class of data
        ratio = The ratio want to sample from full dataset (default: 1)
        num_fold = The number of fold in cross validation (default: 1)

        Output:
        list = subset = reduced set's index
            fold = CrossValidation with reduced set
        '''
        # list of reduced set's index's
        subset = DataSplitter.reduceSet(label, ratio)

        # Split the reduced set into k-folds
        kf = KFold(n_splits=num_fold, shuffle=True, random_state=42)
        train_test_splits = []
        for _, (train_index, test_index) in enumerate(kf.split(label[subset])):
            train_test_splits.append({"train": train_index, "test": test_index})
        
        return {"subset": subset, "fold": train_test_splits}