In [1]:
import numpy as np

In [2]:
fake_data = np.random.randint(0,100,100).reshape(10,10)
fake_data

array([[44, 42, 47, 60, 78, 60, 26, 79, 43, 63],
       [38, 60, 16, 75, 44, 63, 80,  8, 21, 80],
       [14, 76, 22, 67,  5, 16, 11,  5, 26, 57],
       [32,  1, 22, 96,  0, 40, 23, 54, 24, 76],
       [83, 96, 34, 47, 15, 39, 85, 58, 29, 80],
       [43, 55, 36, 72, 77, 61,  4, 77, 38, 94],
       [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],
       [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],
       [72, 56, 71, 25, 25, 12, 79, 26, 81, 68],
       [63,  0, 47, 80, 78, 13, 32, 90, 50,  9]])

In [3]:
def train_test_split(data, shuffle=True, test_proportion=0.2, random_state=None):
    '''return train and test sets'''
    
    # error checking
    assert type(shuffle) == bool, 'shuffle must be True or False!'
    if random_state != None:
        assert type(random_state) == int, 'random_state must be an integer!'
    
    # reproducibility
    if random_state:
        np.random.seed(random_state)
    
    # setup indices
    indices = np.arange(data.shape[0])
    if shuffle:
        np.random.shuffle(indices)
    
    # main logic
    num_indices = len(indices)
    split = int( np.floor(test_proportion * num_indices) )
    train = data[indices[split:]]
    test = data[indices[:split]]
    
    return train, test

In [4]:
train, test = train_test_split(fake_data, shuffle=True, test_proportion=0.4, random_state=43)

In [5]:
train

array([[14, 76, 22, 67,  5, 16, 11,  5, 26, 57],
       [43, 55, 36, 72, 77, 61,  4, 77, 38, 94],
       [38, 60, 16, 75, 44, 63, 80,  8, 21, 80],
       [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],
       [44, 42, 47, 60, 78, 60, 26, 79, 43, 63],
       [83, 96, 34, 47, 15, 39, 85, 58, 29, 80]])

In [6]:
test

array([[32,  1, 22, 96,  0, 40, 23, 54, 24, 76],
       [63,  0, 47, 80, 78, 13, 32, 90, 50,  9],
       [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],
       [72, 56, 71, 25, 25, 12, 79, 26, 81, 68]])