# Playground for Utility

In [1]:
import numpy as np

## train_test_split

In [4]:
def train_test_split(*arrays, test_ratio=0.1, random_state=42):
    """Split the data onto train set and test set. The function shuffle the data
    before splitting it

    Parameters:
    ----------

    *arrays: array-like
        Sequence of indexables with same length / shape[0]

    test_ratio : float, between 0 and 1
        Percentage of test set

    random_state : integer
        Random state seed

    Returns:
    -------

    splitting : list, length=2 * len(arrays)
        List containing train-test split of inputs.

    """

    seed = np.random.RandomState(random_state)
    index = seed.permutation(np.arange(len(arrays[0])))
    test_size = int(len(arrays[0]) * test_ratio)

    def wrapper():
        for element in arrays:
            test_index = index[:test_size]
            train_index = index[test_size:]
            test = element[test_index]
            train = element[train_index]
            yield train
            yield test

    return tuple(wrapper())

In [8]:
X1 = np.random.randint(0, 10, size=(100, 10))
y1 = np.random.randint(0, 10, size=(100,))

print("x1 shape", X1.shape)
print("y1 shape", y1.shape)

x1 shape (100, 10)
y1 shape (100,)


## make_batch_index

In [None]:
def make_batch_index(sample_size, num_batch, size, shuffle=False, random_state=42):
    """Make batch index for further batch making process

    Parameters:
    ----------

    sample_size : integer 
        Sample size

    num_batch : integer
        Number of batch

    size : integer
        sample size will be created
    
    shuffle : boolean
        If True, using permutation to create index.
        If False, using arange to create index 

    random_state : integer
        Random state seed

    Returns:
        Batch index
    
    """


    gen = np.random.RandomState(random_state)
    if sample_size == size:
        if shuffle:
            index_batch = np.array_split(gen.permutation(sample_size), num_batch)
        else:
            index_batch = np.array_split(np.arange(sample_size), num_batch)
    elif num_batch <= size:
        index_batch = np.array_split(gen.randint(0, sample_size, size=size), num_batch)
    else:
        raise ValueError("Size must be higher than number of batch")
    
    return index_batch

## MinMaxScaler

In [None]:
class MinMaxScaler(object):
    """ Scaling the data to between 0 and 1 """

    def __init__(self):
        pass
    
    def fit(self, X):
        self.min_ = X.min(axis=0)
        self.max_ = X.max(axis=0)

        return self

    def transform(self, X):
        diff_X = X - self.min_
        diff_minmax = self.max_ - self.min_

        return diff_X / diff_minmax
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)


## StandardScaler

In [None]:
class StandardScaler(object):
    """ Standardize the data """
    
    def __init__(self):
        pass
    
    def fit(self, X):
        self.mean_ = X.mean(axis=0)
        self.stddev_ = X.std(axis=0)

        return self

    def transform(self, X):
        diff_mean = X - self.mean_

        return diff_mean / self.stddev_
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

---

## to_categorical

In [None]:
def to_categorical(labels):

    sample = len(labels)
    cols = np.max(labels) + 1
    result = np.zeros(shape=(sample, cols))
    for i, row in enumerate(result):
        row[labels[i]] = 1
    
    return result

## Testing Site

In [None]:
X1 = np.random.randint(0, 10, size=(100, 10))
X2 = np.random.randint(0, 10, size=(250, 10))
X3 = np.random.randint(0, 10, size=(300, 10))
y1 = np.random.randint(0, 10, size=(100,))
y2 = np.random.randint(0, 10, size=(250,))
y3 = np.random.randint(0, 10, size=(300,))

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1)

print(X1_train.shape)
print(X1_test.shape)
print(y1_train.shape)
print(y1_test.shape)