In [1]:
import numpy as np

In [2]:
fake_data = np.random.randint(0,100,100).reshape(10,10)
fake_data

array([[75, 50, 96, 10, 21, 46, 93,  1, 91,  7],
       [64, 92,  2, 21, 25, 53, 62, 82,  5, 58],
       [58, 26, 55, 33, 91, 47,  3, 14, 40, 23],
       [93, 67,  6,  0, 49, 13,  5, 61, 11, 16],
       [36, 41, 40, 58, 47, 55, 80, 98, 66, 94],
       [32, 69, 52, 87, 79, 50, 30, 68, 93, 65],
       [11, 73, 39, 59, 81, 55, 71,  2, 68, 61],
       [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],
       [ 2, 34, 30, 10,  5, 18, 76, 43, 11, 88],
       [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])

In [3]:
def train_validation_test_split(data, shuffle=True, validation_proportion=0.3,
                                test_proportion=0.2, random_state=None):
    '''return train, validation, and test sets'''
    
    # error checking
    assert type(shuffle) == bool, 'shuffle must be True or False!'
    if random_state != None:
        assert type(random_state) == int, 'random_state must be an integer!'
    assert validation_proportion + test_proportion < 1, 'need some training data!'
    
    # reproducibility
    if random_state:
        np.random.seed(random_state)
    
    # setup indices
    indices = np.arange(data.shape[0])
    if shuffle:
        np.random.shuffle(indices)
    
    # MAIN LOGIC
    # calc num of indices to keep
    num_indices = len(indices)
    validation_split = int( np.floor(validation_proportion * num_indices) )
    test_split = int( np.floor(test_proportion * num_indices) )
    
    # parse indices
    training_indices = indices[test_split:]
    train_indices = training_indices[validation_split:]
    validation_indices = training_indices[:validation_split]
    test_indices = indices[:test_split]
    
    # create data sets
    train = data[train_indices]
    validation = data[validation_indices]
    test = data[test_indices]
    
    return train, validation, test

In [4]:
train, validation, test = train_validation_test_split(fake_data,  
                                                      shuffle=True, 
                                                      validation_proportion=0.3,
                                                      test_proportion=0.2, 
                                                      random_state=43)

In [5]:
train

array([[32, 69, 52, 87, 79, 50, 30, 68, 93, 65],
       [64, 92,  2, 21, 25, 53, 62, 82,  5, 58],
       [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],
       [75, 50, 96, 10, 21, 46, 93,  1, 91,  7],
       [36, 41, 40, 58, 47, 55, 80, 98, 66, 94]])

In [6]:
validation

array([[11, 73, 39, 59, 81, 55, 71,  2, 68, 61],
       [ 2, 34, 30, 10,  5, 18, 76, 43, 11, 88],
       [58, 26, 55, 33, 91, 47,  3, 14, 40, 23]])

In [7]:
test

array([[93, 67,  6,  0, 49, 13,  5, 61, 11, 16],
       [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])