# Overview

---

**BLAH BLAH BLAH**

---

# Libraries and Functions

In [None]:
# Import Libraries needed by the Lambda Function
import numpy as np
import h5py
import scipy
import os
from os import environ
import json
from json import dumps, loads
from boto3 import client, resource, Session
import botocore
import uuid
import io
from redis import StrictRedis as redis

# Import libraries needed for the Codebook
from PIL import Image
from scipy import ndimage
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Global Variables
s3_client = client('s3', region_name='us-west-2') # S3 access
s3_resource = resource('s3')
redis_client = client('elasticache', region_name='us-west-2')
lambda_client = client('lambda', region_name='us-west-2') # Lambda invocations
# Retrieve the Elasticache Cluster endpoint
cc = redis_client.describe_cache_clusters(ShowCacheNodeInfo=True)
endpoint = cc['CacheClusters'][0]['CacheNodes'][0]['Endpoint']['Address']
cache = redis(host=endpoint, port=6379, db=0)

In [None]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))

    return s

In [None]:
def vectorize(x_orig):
    """
    Vectorize the image data into a matrix of column vectors
    
    Argument:
    x_orig -- Numpy array of image data
    
    Return:
    Reshaped/Transposed Numpy array
    """
    return x_orig.reshape(x_orig.shape[0], -1).T

In [None]:

def standardize(x_orig):
    """
    Standardize the input data
    
    Argument:
    x_orig -- Numpy array of image data
    
    Return:
    Call to `vectorize()`, stndrdized Numpy array of image data
    """
    return vectorize(x_orig) / 255

In [None]:
def initialize_data(endpoint, w, b):
    """
    Extracts the training and testing data from S3, flattens, 
    standardizes and then dumps the data to ElastiCache 
    for neurons to process as layer a^0
    """
    
    # Load main dataset
    dataset = h5py.File('/tmp/datasets.h5', "r")
    
    # Create numpy arrays from the various h5 datasets
    train_set_x_orig = np.array(dataset["train_set_x"][:]) # train set features
    train_set_y_orig = np.array(dataset["train_set_y"][:]) # train set labels
    test_set_x_orig = np.array(dataset["test_set_x"][:]) # test set features
    test_set_y_orig = np.array(dataset["test_set_y"][:]) # test set labels
    
    # Reshape labels
    train_set_y = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    # Preprocess inputs
    train_set_x = standardize(train_set_x_orig)
    test_set_x = standardize(test_set_x_orig)

    # Dump the inputs to the temporary s3 bucket for TrainerLambda
    #bucket = storage_init() # Creates a temporary bucket for the propogation steps
    data_keys = {} # Dictionary for the hask keys of the data set
    dims = {} # Dictionary of data set dimensions
    a_list = [train_set_x, train_set_y, test_set_x, test_set_y]
    a_names = [] # Placeholder for array names
    for i in range(len(a_list)):
        # Create a lis of the names of the numpy arrays
        a_names.append(name2str(a_list[i], locals()))
    for j in range(len(a_list)):
        # Dump the numpy arrays to ElastiCache
        data_keys[str(a_names[j][0])] = to_cache(endpoint, obj=a_list[j], name=a_names[j][0])
        # Append the array dimensions to the list
        dims[str(a_names[j][0])] = a_list[j].shape
    
    # Initialize weights
    if w == 0: # Initialize weights to dimensions of the input data
        dim = dims.get('train_set_x')[0]
        weights = np.zeros((dim, 1))
        # Store the initial weights as a column vector on S3
        data_keys['weights'] = to_cache(endpoint, obj=weights, name='weights')
    else:
        #placeholder for random weight initialization
        pass
        
    # Initialize Bias
    if b != 0:
        #placeholder for random bias initialization
        #data_keys['bias'] = to_cache(endpoint, obj=bias, name='bias')
        pass
    else:
        data_keys['bias'] = to_cache(endpoint, obj=b, name='bias')
    
    # Initialize training example size
    m = train_set_x.shape[1]
    data_keys['m'] = to_cache(endpoint, obj=m, name='m')
    
#    # Initialize the results tracking object
#    to_cache(endpoint, dump='', name='results')
        
    return data_keys, [j for i in a_names for j in i], dims

**Origional version
```python
def to_cache(endpoint, obj, name):
    """
    Serializes multiple data type to ElastiCache and returns
    the Key.
    
    Arguments:
    endpoint -- The ElastiCache endpoint
    obj -- the object to srialize. Can be of type:
            - Numpy Array
            - Python Dictionary
            - String
            - Integer
    name -- Name of the Key
    
    Returns:
    key -- For each type the key is made up of {name}|{type} and for
           the case of Numpy Arrays, the Length and Widtch of the 
           array are added to the Key.
    """
    if 'numpy.ndarray' in str(type(obj)):
        array_dtype = str(obj.dtype)
        length, width = obj.shape
        # Convert the array to string
        val = obj.ravel().tostring()
        # Create a key from the name and necessary parameters from the array
        # i.e. {name}|{type}#{length}#{width}
        key = '{0}|{1}#{2}#{3}'.format(name, array_dtype, length, width)
        # Store the binary string to Redis
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    elif type(obj) is str:
        key = '{0}|{1}'.format(name, 'string')
        val = obj
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    elif type(obj) is int:
        key = '{0}|{1}'.format(name, 'int')
        val = str(obj)
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    elif type(obj) is dict:
        #x = json.dumps(obj)
        #val = json.loads(x)
        val = json.dumps(obj)
        key = '{0}|{1}'.format(name, 'json')
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    else:
        print(str(type(obj)) + "is not a supported serialization type")

def from_cache(endpoint, key):
    """
    De-serializes binary object from ElastiCache by reading
    the type of object from the name and converting it to
    the appropriate data type.
    
    Arguments:
    endpoint -- ElastiCache endpoint.
    key -- Name of the Key to retrieve the object.
    
    Returns:
    obj -- The object converted to specifed data type.
    """
    
    # Check if the Key is for a Numpy array containing
    # `float64` data types
    if 'float64' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        val = cache.get(key)
        # De-serialize the value
        array_dtype, length, width = key.split('|')[1].split('#')
        obj = np.fromstring(val, dtype=array_dtype).reshape(int(length), int(width))
        return obj
    # Check if the Key is for a Numpy array containing
    # `int64` data types
    elif 'int64' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        data = cache.get(key)
        # De-serialize the value
        array_dtype, length, width = key.split('|')[1].split('#')
        obj = np.fromstring(data, dtype=array_dtype).reshape(int(length), int(width))
        return obj
    # Check if the Key is for a json type
    elif 'json' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return json.loads(obj)
    # Chec if the Key is an integer
    elif 'int' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return int(obj)
    # Check if the Key is a string
    elif 'string' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return obj
    else:
        print(str(type(obj)) + "is not a supported de-serialization type")
```

In [None]:
def to_cache(endpoint, obj, name):
    """
    Serializes multiple data type to ElastiCache and returns
    the Key.
    
    Arguments:
    endpoint -- The ElastiCache endpoint
    obj -- the object to srialize. Can be of type:
            - Numpy Array
            - Python Dictionary
            - String
            - Integer
    name -- Name of the Key
    
    Returns:
    key -- For each type the key is made up of {name}|{type} and for
           the case of Numpy Arrays, the Length and Widtch of the 
           array are added to the Key.
    """
    
    # Test if the object to Serialize is a Numpy Array
    if 'numpy' in str(type(obj)):
        array_dtype = str(obj.dtype)
        if len(obj.shape) == 0:
            length = 0
            width = 0
        else:
            length, width = obj.shape
        # Convert the array to string
        val = obj.ravel().tostring()
        # Create a key from the name and necessary parameters from the array
        # i.e. {name}|{type}#{length}#{width}
        key = '{0}|{1}#{2}#{3}'.format(name, array_dtype, length, width)
        # Store the binary string to Redis
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    # Test if the object to serialize is a string
    elif type(obj) is str:
        key = '{0}|{1}'.format(name, 'string')
        val = obj
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    # Test if the object to serialize is an integer
    elif type(obj) is int:
        key = '{0}|{1}'.format(name, 'int')
        # Convert to a string
        val = str(obj)
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    # Test if the object to serialize is a dictionary
    elif type(obj) is dict:
        # Convert the dictionary to a String
        val = json.dumps(obj)
        key = '{0}|{1}'.format(name, 'json')
        cache = redis(host=endpoint, port=6379, db=0)
        cache.set(key, val)
        return key
    else:
        print(str(type(obj)) + "is not a supported serialization type")

def from_cache(endpoint, key):
    """
    De-serializes binary object from ElastiCache by reading
    the type of object from the name and converting it to
    the appropriate data type
    
    Arguments:
    endpoint -- ElastiCacheendpoint
    key -- Name of the Key to retrieve the object
    
    Returns:
    obj -- The object converted to specifed data type
    """
    
    # Check if the Key is for a Numpy array containing
    # `float64` data types
    if 'float64' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        val = cache.get(key)
        # De-serialize the value
        array_dtype, length, width = key.split('|')[1].split('#')
        if int(length) == 0:
            obj = np.float64(np.fromstring(val))
        else:
            obj = np.fromstring(val, dtype=array_dtype).reshape(int(length), int(width))
        return obj
    # Check if the Key is for a Numpy array containing
    # `int64` data types
    elif 'int64' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        val = cache.get(key)
        # De-serialize the value
        array_dtype, length, width = key.split('|')[1].split('#')
        obj = np.fromstring(val, dtype=array_dtype).reshape(int(length), int(width))
        return obj
    # Check if the Key is for a json type
    elif 'json' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return json.loads(obj)
    # Chec if the Key is an integer
    elif 'int' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return int(obj)
    # Check if the Key is a string
    elif 'string' in key:
        cache = redis(host=endpoint, port=6379, db=0)
        obj = cache.get(key)
        return obj
    else:
        print(str(type(obj)) + "is not a supported serialization type")

In [None]:
def name2str(obj, namespace):
    """
    Converts the name of the numpy array to string
    
    Arguments:
    obj -- Numpy array object
    namespace -- dictionary of the current global symbol table
    
    Return:
    List of the names of the Numpy arrays
    """
    return [name for name in namespace if namespace[name] is obj]

---
# Launch Event

In [None]:
w = 0
b = 0
epoch = 1
layer = 1
# Simulate S3 event trigger data
event = {
    "Records": [
        {
            "eventVersion": "2.0",
            "eventTime": "1970-01-01T00:00:00.000Z",
            "requestParameters": {
                "sourceIPAddress": "127.0.0.1"
             },
            "s3": {
                "configurationId": "testConfigRule",
                "object": {
                    "eTag": "0123456789abcdef0123456789abcdef",
                    "sequencer": "0A1B2C3D4E5F678901",
                    "key": "training_input/datasets.h5",
                    "size": 1024
                },
                "bucket": {
                    "arn": "arn:aws:s3:::lnn",
                    "name": "lnn",
                    "ownerIdentity": {
                        "principalId": "EXAMPLE"
                    }
                },
                "s3SchemaVersion": "1.0"
            },
            "responseElements": {
                "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH",
                "x-amz-request-id": "EXAMPLE123456789"
            },
            "awsRegion": "us-west-2",
            "eventName": "ObjectCreated:Put",
            "userIdentity": {
                "principalId": "EXAMPLE"
            },
            "eventSource": "aws:s3"
        }
    ]
}

# Simulate TrainerLambda ARN
#environ[str('TrainerLambda')] = str(None)

In [None]:
# Retrieve datasets and setting from S3
input_bucket = s3_resource.Bucket(str(event['Records'][0]['s3']['bucket']['name']))
dataset_key = str(event['Records'][0]['s3']['object']['key'])
settings_key = dataset_key.split('/')[-2] + '/parameters.json'
try:
    input_bucket.download_file(dataset_key, '/tmp/datasets.h5')
    input_bucket.download_file(settings_key, '/tmp/parameters.json')
except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == '404':
        print("Error downloading input data from S3, S3 object does not exist")
    else:
        raise
    
# Extract the neural network parameters
with open('/tmp/parameters.json') as parameters_file:
    parameters = json.load(parameters_file)
    
# Build in additional parameters from neural network parameters
parameters['epoch'] = 1
# Next Layer to process
parameters['layer'] = 1
# Input data sets and data set parameters
parameters['data_keys'], parameters['input_data'], parameters['data_dimensions'] = initialize_data(endpoint=endpoint, w=parameters.get('weight'), b=parameters.get('bias'))
    
# Initialize payload to `TrainerLambda`
payload = {}
# Initialize the overall state
payload['state'] = 'start'
# Dump the parameters to ElastiCache
payload['parameter_key'] = to_cache(endpoint, obj=parameters, name='parameters')
#payload['endpoint'] = endpoint
# Prepare the payload for `TrainerLambda`
payloadbytes = dumps(payload)
    
print("Complete Neural Network Settings: \n")
print(dumps(parameters, indent=4, sort_keys=True))
print("Payload to be sent to TrainerLambda: \n" + dumps(payload, indent=4, sort_keys=True))

---
# Trainer -> Neuron Event for Forward Propogation
**Simulating Forward Propogation of the Neuron**

In [None]:
# Fake results object created by `TrainerLambda`
results = {}
results['epoch' + str(epoch)] = {}
results_key = to_cache(endpoint=endpoint, obj=results, name='results')

In [None]:
event = payload

In [None]:
global parameter_key
parameter_key = event.get('parameter_key')
global parameters 
parameters = from_cache(endpoint, parameter_key)

In [None]:
w = from_cache(endpoint=endpoint, key=parameters['data_keys']['weights'])
b = from_cache(endpoint=endpoint, key=parameters['data_keys']['bias'])
X = from_cache(endpoint=endpoint, key=parameters['data_keys']['train_set_x'])
Y = from_cache(endpoint=endpoint, key=parameters['data_keys']['train_set_y'])
m = from_cache(endpoint=endpoint, key=parameters['data_keys']['m'])

In [None]:
a_1 = sigmoid(np.dot(w.T, X) + b)

In [None]:
a_1.shape

In [None]:
a_1

In [None]:
# First clean up any existing activations in Redis for testing
for key in cache.scan_iter(match='a_*'):
    cache.delete(key)

In [None]:
ID = 1
to_cache(endpoint=endpoint, obj=a_1, name='a_'+str(ID))

---
# Neuron -> Trainer Event
**Simulating processing the Cost from the Activation output/s**

In [None]:
# Use this redis command to ensure that a pure string is returned for the key
r = redis(host=endpoint, port=6379, db=0, charset="utf-8", decode_responses=True)

In [None]:
for key in cache.scan_iter(match='a_*'):
    print(key)

In [None]:
for key in r.scan_iter(match='a_*'):
    print(key)

In [None]:
key_list = []
for key in r.scan_iter(match='a_*'):
    key_list.append(key)
print(key_list)

In [None]:
A_dict = {}
for i in key_list:
    A_dict[i] = from_cache(endpoint=endpoint, key=i)

In [None]:
A_dict

In [None]:
A_dict.values()

In [None]:
A = np.array([arr.tolist() for arr in A_dict.values()])

In [None]:
A.shape

In [None]:
from_cache(endpoint=endpoint, key='a_1|float64#1#209')

In [None]:
A = A.reshape(1, 209)

In [None]:
#A = np.squeeze(A)

In [None]:
A

**Merging two activation arrays**

In [None]:
a_2 = sigmoid(np.dot(w.T, X) + b)

In [None]:
a_2

In [None]:
a_2.shape

In [None]:
ID = 2
to_cache(endpoint=endpoint, obj=a_2, name='a_'+str(ID))

In [None]:
r = redis(host=endpoint, port=6379, db=0, charset="utf-8", decode_responses=True)
for key in r.scan_iter(match='a_*'):
    print(key)

In [None]:
key_list = []
for key in r.scan_iter(match='a_*'):
    key_list.append(key)
print(key_list)

In [None]:
A_dict = {}
for i in key_list:
    A_dict[i] = from_cache(endpoint=endpoint, key=i)

In [None]:
A_dict

In [None]:
A = np.array([arr.tolist() for arr in A_dict.values()])

In [None]:
#A = A.reshape(2, 209)

In [None]:
A.shape

In [None]:
A = np.squeeze(A)

In [None]:
A.shape

**Simulate 3 Hidden Unit activations**

In [None]:
a_3 = sigmoid(np.dot(w.T, X) + b)
ID = 3
to_cache(endpoint=endpoint, obj=a_3, name='a_'+str(ID))
key_list = []
for key in r.scan_iter(match='a_*'):
    key_list.append(key)
print(key_list)

In [None]:
a_3

In [None]:
A_dict = {}
for i in key_list:
    A_dict[i] = from_cache(endpoint=endpoint, key=i)
A_dict

In [None]:
A = np.array([arr.tolist() for arr in A_dict.values()])
A = np.squeeze(A)
A.shape

### Final Test
**Single Activation**

In [None]:
# First clean up any existing activations in Redis for testing
for key in cache.scan_iter(match='a_*'):
    cache.delete(key)

# Calculate activation
a_1 = sigmoid(np.dot(w.T, X) + b)
a_2 = sigmoid(np.dot(w.T, X) + b)
a_3 = sigmoid(np.dot(w.T, X) + b)

# Push one activation to cache
ID = 1
to_cache(endpoint=endpoint, obj=a_1, name='a_'+str(ID))

# Run the algorithm
key_list = []
for key in r.scan_iter(match='a_*'):
    key_list.append(key)
# Create a dictionat of numpy arrays
A_dict = {}
for i in key_list:
    A_dict[i] = from_cache(endpoint=endpoint, key=i)
# Create the numpy array of activations, depending on the 
# number of hidden units
no_activations = len(key_list)
A = np.array([arr.tolist() for arr in A_dict.values()])
if no_activations == 1:
    dims = (key_list[0].split('|')[1].split('#')[1:])
    A = A.reshape(int(dims[0]), int(dims[1]))
    print(A.shape)
else:
    A = np.squeeze(A)
    print(A.shape)

**Three Activations**

```python
# Push other two activations to cache
ID = 2
to_cache(endpoint=endpoint, obj=a_2, name='a_'+str(ID))
ID = 3
to_cache(endpoint=endpoint, obj=a_3, name='a_'+str(ID))

# Run the algorithm
key_list = []
for key in r.scan_iter(match='a_*'):
    key_list.append(key)
# Create a dictionat of numpy arrays
A_dict = {}
for i in key_list:
    A_dict[i] = from_cache(endpoint=endpoint, key=i)
# Create the numpy array of activations, depending on the 
# number of hidden units
no_activations = len(key_list)
A = np.array([arr.tolist() for arr in A_dict.values()])
if no_activations == 1:
    dims = (key_list[0].split('|')[1].split('#')[1:])
    A = A.reshape(int(dims[0]), int(dims[1]))
    print(A.shape)
else:
    A = np.squeeze(A)
    print(A.shape)
```

### Calculate the Cost

In [None]:
# Cost on activations
cost = (-1 / m) * np.sum(Y * (np.log(A)) + ((1 - Y) * np.log(1 - A)))
cost

### Upload Data to Cache

In [None]:
# Log A as input data to Backprop
# Get the latest parameters 
parameters = from_cache(endpoint, key=parameter_key)

# Update `A` to the local dictionary
parameters['data_keys']['A'] = to_cache(endpoint=endpoint, obj=A, name='A')

# Update paramaters in ElastiCache
parameter_key = to_cache(endpoint, obj=parameters, name='parameters')
#print(parameter_key) # should be `parameters|json'

# Update `results` with the Cost
results = from_cache(endpoint=endpoint, key='results|json')
results['epoch' + str(epoch)]['cost'] = cost
results_key = to_cache(endpoint=endpoint, obj=results, name='results')

---
# Trainer -> Neuron Event for Backprop
**Simulate the process of Calculating the Gradients**

In [None]:
# Simulate fake configurations on the `TrainerLambda`
grads = {}
# Keys to the derivative of w and b for layer=1
grads['layer' + str(layer)] = {}
grads = to_cache(endpoint=endpoint, obj=grads, name='grads')
parameters['data_keys']['grads'] = grads
# Simulate fake payload additions
payload['state'] = 'backward'
payload['parameter_key'] = parameter_key
payload['results_key'] = results_key
payload['id'] = 1
payload['layer'] = 1
print("Complete Neural Network Settings: \n")
print(dumps(parameters, indent=4, sort_keys=True))
print("Payload to be sent to NeuLambda: \n" + dumps(payload, indent=4, sort_keys=True))

## Calculate the Gradients

In [None]:
# Backward propogation to determine gradients
dw = (1 / m) * np.dot(X, (A - Y).T)
print("Partial Derivatives - Weights for Neuron" + str(ID) + ":\n")
print(dw)
db = (1 / m) * np.sum(A - Y)
print("Partial Derivatives - Bias for Neuron" + str(ID) + ":\n")
print(db)

**There are two differnt numpy array type: *ndarray* and *float64*. The `to_cache()` function can serialise a numpy array, but not a *float64*. The `cost` is also *float64* but to get around the limitations of Elasticache, it was added to results. So since the `Bias` and `Weights` are already `data_kays` -> Therefore the `to_cache()` and `from_cache()` will need to differentiate between different numpy types.**

In [None]:
print("db shape: " + str(db.shape))
print("db type: " + str(type(db)))
print("dn dtrype: " + str(db.dtype))

In [None]:
print("dw shape: " + str(dw.shape))
print("dw type: " + str(type(dw)))
print("dw dtype: " + str(dw.dtype))

**Sanity Check**

In [None]:
test_db_key = to_cache(endpoint, obj=db, name='layer'+str(layer)+'_db')
print(test_db_key)
test_db = from_cache(endpoint, key=test_db_key)
print("test_db type: " + str(type(test_db)))
print("test_db shape: " + str(test_db.shape))
test_dw_key = to_cache(endpoint, obj=dw, name='layer'+str(layer)+'_dw')
print(test_dw_key)
test_dw = from_cache(endpoint, key=test_dw_key)
print("test_dw type: " + str(type(test_dw)))
print("test_dw shape: " + str(test_dw.shape))
assert(test_dw.shape==dw.shape)
assert(test_db.shape == db.shape)

## Upload Data to Cache
The `TrainerLambda` must access the derivatives to determine the final `Weights` and `Bias` for the next epoch though *Gradient Descent*. 

In [None]:
# Pull the gradients object created by the `TrainerLambda`
grads_key = parameters['data_keys']['grads']
# Load the grads object
grads = from_cache(endpoint, key=grads_key)
grads

In [None]:
# Update the grads object with the calculated derivatives
grads['layer' + str(layer)]['dw_' + str(ID)] = to_cache(endpoint, obj=dw, name='dw_'+str(ID))
grads['layer' + str(layer)]['db_' + str(ID)] = to_cache(endpoint, obj=db, name='db_'+str(ID))
# Update the pramaters (local)
parameters['data_keys']['grads'] = grads
# Upload to ElastiCache
parameters_key = to_cache(endpoint, obj=parameters, name='parameters')

In [None]:
parameters

# Neuron -> Trainer - Optimization