## MultiUniMulitplication

A unit is a Strassen-Multiplication StepFunction.
Matrix multiplication to be performed is `m.dot(m.transpose)` with `m.dim(4000,4000)`.
M will be divided into `4*4=16` matrix partitions of dimension `1000*1000`. Partitions will be named m_row_col from m_0_0, m_0_1 ... to m_3_3.


m_0_0  |  m_0_1  |  m_0_2  |  m_0_3

m_1_0  |  m_1_1  |  m_1_2  |  m_1_3

m_2_0  |  m_2_1  |  m_2_2  |  m_2_3

m_3_0  |  m_3_1  |  m_3_2  |  m_3_3

### Generate Matrix Partitions

In [3]:
import numpy as np
import os

In [3]:
def create_square_staircase_matrix(n=10):
    Matrix = [[1 for x in range(n)] for y in range(n)] 
    for i in range(n):
        for j in range(n):
            Matrix[i][j] = i+j
    return np.array(Matrix)

In [257]:
def write_partition_to_file(partition, directory, row, column):
    if not os.path.exists(directory):
        os.mkdir(directory)
    path = os.path.join(directory, "m_" + str(row) + "_" + str(column))
    np.save(path, partition)

In [259]:
def split_matrix_into(matrix, x, y, partition_size):
    '''split matrix into x*y partitions of given size'''
    p = partition_size
    for i in range(0,x):
        for j in range(0,y):
            partition = matrix[i*p:(i+1)*p, j*p:(j+1)*p]
            write_partition_to_file(partition, "/tmp/sc4000t", i, j)

In [261]:
sq_m = np.transpose(create_square_staircase_matrix(4000))
split_matrix_into(sq_m, 4, 4, 1000)

### Upload Matrix Partitions

In [40]:
import boto3

In [41]:
s3_client = boto3.client('s3')

In [68]:
def upload_partitions(x, y, directory, matrix_name, bucket):
    for i in range(0,x):
        for j in range(0,y):
            filename = "m_" + str(i) + "_" + str(j) + ".npy"
            path = os.path.join(directory, filename)
            s3_client.upload_file(path, bucket, matrix_name + "/" + filename)

In [71]:
upload_partitions(4, 4, '/tmp/', "sc4000", "jmue-matrix-tests")

### Call Matrix Multiplication Lambda

### Check Results

### Input Format

In [None]:
{
  "matA": {
    "bucket": "jmue-matrix-tests",
    "key": "sc4000",
    "split": { ... }
  },
  "matB": {
      "bucket": "jmue-matrix-tests",
      "key": "sc4000t",
      "split": { ... }
  },
  "result": {
      "bucket": "jmue-matrix-tests",
      "key": "sc4000-result"
  }
}

{
  "matA": {
    "bucket": "jmue-matrix-tests",
    "key": "sc4000",
    "split": {
      "x1": 0,
      "y1": 0,
      "x2":1000,
      "y2":1000
    }
  },
  "matB": {
      "bucket": "jmue-matrix-tests",
      "key": "sc4000t",
      "split": {
        "x1": 0,
        "y1": 0,
        "x2":1000,
        "y2":1000
      }
  },
  "result": {
      "bucket": "jmue-matrix-tests",
      "key": "sc4000-result"
  }
}

# A split
{
  "split": {
    "x1": 0,
    "y1": 0,
    "x2":1000,
    "y2":1000
  }
}

In [175]:
matrix = { "bucket": "jmue-matrix-tests", "key": "sc4000", "split": { "x1": 0, "y1": 0, "x2":1000, "y2":1000 } }
x1 = matrix['split']['x1']
y1 = matrix['split']['y1']
key = matrix['key'] +  "/m_" + str(x1) + "_" + str(y1) + ".npy"
print key

sc4000/m_0_0.npy


In [184]:
if not False:
    print "False"

False


In [185]:
if not os.path.exists('/tmp/' + matrix['key']):
    os.mkdir('/tmp/' + matrix['key'])
s3_client.download_file('jmue-matrix-tests', key, '/tmp/' + key)
split = np.load('/tmp/' + key)

In [174]:
split

array([[   0,    1,    2, ...,  997,  998,  999],
       [   1,    2,    3, ...,  998,  999, 1000],
       [   2,    3,    4, ...,  999, 1000, 1001],
       ..., 
       [ 997,  998,  999, ..., 1994, 1995, 1996],
       [ 998,  999, 1000, ..., 1995, 1996, 1997],
       [ 999, 1000, 1001, ..., 1996, 1997, 1998]])

In [5]:
import json
import math

In [83]:
b = 5
a = {"key": b, "key2": "Hello"}
sfn_input = json.dumps({"value": a["key"]})

In [84]:
sfn_input

'{"value": 5}'

In [98]:
int(math.ceil(4030.0/2000.0))

3

In [150]:
ssl = 1000
def call_multi(i,j,k):
    print "x1", str(i*ssl), "y1", str(k*ssl), "| x1", str(k*ssl), "y1", str(j*ssl)
    print "x2", str((i+1)*ssl), "y2", str((k+1)*ssl), "| x2", str((k+1)*ssl), "y2", str((j+1)*ssl)

In [151]:
m = 3
n = 2
p = 2

for i in range(m):
    for j in range(n):
        for k in range(p):
            print "A" + str(i+1) + str(k+1) + "*" + "B" + str(k+1) + str(j+1)
            call_multi(i,j,k)
        print ""

A11*B11
x1 0 y1 0 | x1 0 y1 0
x2 1000 y2 1000 | x2 1000 y2 1000
A12*B21
x1 0 y1 1000 | x1 1000 y1 0
x2 1000 y2 2000 | x2 2000 y2 1000

A11*B12
x1 0 y1 0 | x1 0 y1 1000
x2 1000 y2 1000 | x2 1000 y2 2000
A12*B22
x1 0 y1 1000 | x1 1000 y1 1000
x2 1000 y2 2000 | x2 2000 y2 2000

A21*B11
x1 1000 y1 0 | x1 0 y1 0
x2 2000 y2 1000 | x2 1000 y2 1000
A22*B21
x1 1000 y1 1000 | x1 1000 y1 0
x2 2000 y2 2000 | x2 2000 y2 1000

A21*B12
x1 1000 y1 0 | x1 0 y1 1000
x2 2000 y2 1000 | x2 1000 y2 2000
A22*B22
x1 1000 y1 1000 | x1 1000 y1 1000
x2 2000 y2 2000 | x2 2000 y2 2000

A31*B11
x1 2000 y1 0 | x1 0 y1 0
x2 3000 y2 1000 | x2 1000 y2 1000
A32*B21
x1 2000 y1 1000 | x1 1000 y1 0
x2 3000 y2 2000 | x2 2000 y2 1000

A31*B12
x1 2000 y1 0 | x1 0 y1 1000
x2 3000 y2 1000 | x2 1000 y2 2000
A32*B22
x1 2000 y1 1000 | x1 1000 y1 1000
x2 3000 y2 2000 | x2 2000 y2 2000



In [242]:
def partition(matrix, x, y):
    split = matrix['split']
    x += split['x1']/1000
    y += split['y1']/1000
    partition_factor = ((split['x2']-split['x1'])/2)/1000 # length of split us twice the size of a partition

    filename = "m_" + str(partition_factor*x) + "_" + str(partition_factor*y) + ".npy"
    key = matrix['folder'] + "/" + filename # e.g. m_0_0.npy

    if not os.path.exists('/tmp/' + matrix['folder']):
        os.mkdir('/tmp/' + matrix['folder'])

    print key
    s3_client.download_file(matrix['bucket'], key, '/tmp/' + key)
    return np.load('/tmp/' + key)

In [241]:
s3_client.download_file("jmue-matrix-tests", "sc4000/m_3_2.npy", '/tmp/sc4000/m_3_2.npy')

In [244]:
matrix = { "bucket": "jmue-matrix-tests", "folder": "sc4000", "split": { "x1": 2000, "y1": 2000, "x2":4000, "y2":4000 } }
partition(matrix=matrix, x=1, y=0)

sc4000/m_3_2.npy


array([[5000, 5001, 5002, ..., 5997, 5998, 5999],
       [5001, 5002, 5003, ..., 5998, 5999, 6000],
       [5002, 5003, 5004, ..., 5999, 6000, 6001],
       ..., 
       [5997, 5998, 5999, ..., 6994, 6995, 6996],
       [5998, 5999, 6000, ..., 6995, 6996, 6997],
       [5999, 6000, 6001, ..., 6996, 6997, 6998]])

## Building a State Machine in Code

In [78]:
def task_state(resource, next_state=None):
    task_state = {
            "Type": "Task",
            "Resource": resource,
            "End": True
    }
    if next_state is not None:
        task_state["End"] = False
        task_state["Next"] = next_state
    return task_state

In [79]:
def pass_state(next_state, result=None, resultPath=None, outputPath=None):
    return {
              "Type": "Pass",
              "Result": result,
              "ResultPath": resultPath,
              "OutputPath": outputPath,
              "Next": next_state
    }

In [114]:
def parallel_state(branches, next_state, resultPath=None, outputPath=None):
    parallel_state = {
            "Type": "Parallel",
            "Next": next_state,
            "Branches": branches
    }
    if resultPath is not None:
        parallel_state["ResultPath"] = resultPath
    
    if outputPath is not None:
        parallel_state["OutputPath"] = outputPath
    
    return parallel_state

In [81]:
def branch(startAt, states):
    return {
        "StartAt": startAt,
        "States": states
    }

In [118]:
def create_strassen_sfn(unit=""):
    branches = []
    for index in range(0,7):
        unit_m = "U{}_m{}".format(unit, index) # U0_m_0
        unit_m_lambda = "U{}_m{}_lambda".format(unit, index) # U0_m_0_lambda
        states = {
            unit_m: pass_state(next_state=unit_m_lambda, result=index, resultPath="$.intermediate", outputPath="$"),
            unit_m_lambda: task_state(resource="arn:aws:lambda:eu-central-1:146904559692:function:mmultiply-prod-strassen-split-intermediate")
        }
        branches.append(branch(states=states, startAt=unit_m))

    unit_name = "unit" + unit
    i_name = "U{}_Intermediate".format(unit)
    c_name = "U{}_Collect".format(unit)
    
    unit_setup = pass_state(next_state=i_name, result=unit, resultPath="$.unit", outputPath="$")
    intermediates = parallel_state(branches=branches, next_state=c_name, resultPath="$.responses", outputPath="$")
    collect = task_state(resource="arn:aws:lambda:eu-central-1:146904559692:function:mmultiply-prod-strassen-split-collector")
        
    states = { unit_name: unit_setup, i_name: intermediates, c_name: collect }
    return branch(startAt=unit_name, states=states)

In [119]:
accumulation_arn="arn:aws:lambda:eu-central-1:146904559692:function:mmultiply-prod-unit-accumulate"
unit_branches = []
for i in range(0,2):
    unit_branches.append(create_strassen_sfn(unit=str(i)))

split_states = {
    "Accumulate": task_state(resource=accumulation_arn),
    "Units": parallel_state(branches=unit_branches, next_state="Accumulate", resultPath="$.responses", outputPath="$")
}
partial = branch(startAt="Units", states=split_states)

In [121]:
with open('/Users/Johannes/Uni/Master/Master Arbeit/repos/matrix-operations/state-machines/created.asl', 'w') as file:
    json.dump(partial, file)