In [1]:
import numpy as np
import utils
import config

In [2]:
BASE_IMAGE = config.BASE_IMAGE
S3_END_POINT = config.S3_END_POINT
S3_ACCESS_ID = config.S3_ACCESS_ID
S3_ACCESS_KEY = config.S3_ACCESS_KEY
bucket_name = config.BUCKET_NAME

In [3]:
get_client = utils.get_client
create_bucket = utils.create_bucket
read_from_store = utils.read_from_store
write_to_store = utils.write_to_store

In [5]:
def download_data() -> int:
    '''Download and store data in persistent storage
    '''

    client = get_client()

    def generate_binary_data(N_examples=1000, seed=None):
    #Generate N_examples points with two features each
    #
    #Args:
    #    seed: seed that should be fixed if want to generate same points again    
    #Returns:
    #    features: A 2-dimensional numpy array with one row per example and one column per feature
    #    target: A 1-dimensional numpy array with one row per example denoting the class - 0 or 1

        if seed is not None:
            np.random.seed(seed)

        features = []
        target = []

        for i in range(N_examples):
            #class = 0
            r = np.random.uniform() #class 0 has radius between 0 and 1
            theta = np.random.uniform(0, 2*np.pi) #class 0 has any angle between 0 and 360 degrees

            features.append([r*np.cos(theta), r*np.sin(theta)])
            target.append(0)

            #class = 1
            r = 3 + np.random.uniform() #class 1 has radius between 3+0=3 and 3+1=4
            theta = np.random.uniform(0, 2*np.pi) #class 1 has any angle between 0 and 360 degrees

            features.append([r*np.cos(theta), r*np.sin(theta)])
            target.append(1)

        features = np.array(features)
        target = np.array(target)

        return features, target

    features_train, target_train = generate_binary_data(N_examples=1000, seed=100)
    features_test, target_test = generate_binary_data(N_examples=500, seed=105)

    create_bucket(bucket_name, client)
    write_to_store(bucket_name, features_train, 'features_train', client)
    write_to_store(bucket_name, target_train, 'target_train', client)
    write_to_store(bucket_name, features_test, 'features_test', client)
    write_to_store(bucket_name, target_test, 'target_test', client)

    return 0

In [6]:
retcode = download_data()
if retcode > 0: raise ValueError("Step 1a failed")