Skip to content

Commit

Permalink
[SYSTEMDS-3195] Federated python tutorial fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Baunsgaard committed Nov 16, 2021
1 parent 4f3c1e8 commit 1f51b40
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 61 deletions.
29 changes: 29 additions & 0 deletions src/main/python/docs/source/code/federatedTutorial_part1.py
@@ -0,0 +1,29 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Python
import numpy as np
import os
if not os.path.isdir("temp"):
os.mkdir("temp")
a = np.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.savetxt("temp/test.csv", a, delimiter=",")
with open("temp/test.csv.mtd", "w") as mtd:
mtd.write('{ "format":"csv", "header":false, "rows":3, "cols":3 }')
39 changes: 39 additions & 0 deletions src/main/python/docs/source/code/federatedTutorial_part2.py
@@ -0,0 +1,39 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Python
import numpy as np
from systemds.context import SystemDSContext

# Create a federated matrix
# Indicate the dimensions of the data:
# Here the first list in the tuple is the top left Coordinate,
# and the second the bottom left coordinate.
# It is ordered as [col,row].
dims = ([0, 0], [3, 3])

# Specify the address + file path from worker:
address = "localhost:8001/temp/test.csv"

with SystemDSContext() as sds:
fed_a = sds.federated([address], [dims])
# Sum the federated matrix and call compute to execute
print(fed_a.sum().compute())
# Result should be 45.
47 changes: 47 additions & 0 deletions src/main/python/docs/source/code/federatedTutorial_part3.py
@@ -0,0 +1,47 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Python
import numpy as np
from systemds.context import SystemDSContext

addr1 = "localhost:8001/temp/test.csv"
addr2 = "localhost:8002/temp/test.csv"
addr3 = "localhost:8003/temp/test.csv"

# Create a federated matrix using two federated environments
# Note that the two federated matrices are stacked on top of each other

with SystemDSContext() as sds:
# federated data on three locations
fed = sds.federated([addr1, addr2, addr3], [
([0, 0], [3, 3]),
([3, 0], [6, 3]),
([6, 0], [9, 3])])
# local matrix to multiply with
loc = sds.from_numpy(np.array([
[1,2,3,4,5,6,7,8,9],
[1,2,3,4,5,6,7,8,9],
[1,2,3,4,5,6,7,8,9]
]))
# Multiply local and federated
ret = loc @ fed
# execute the lazy script and print
print(ret.compute())
44 changes: 44 additions & 0 deletions src/main/python/docs/source/code/federatedTutorial_part3_old.py
@@ -0,0 +1,44 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Python
import numpy as np
from systemds.context import SystemDSContext

addr1 = "localhost:8001/temp/test.csv"
addr2 = "localhost:8002/temp/test.csv"
addr3 = "localhost:8003/temp/test.csv"

# Create a federated matrix using two federated environments
# Note that the two federated matrices are stacked on top of each other

with SystemDSContext() as sds:
fed_a = sds.federated(
[addr1, addr2],
[([0, 0], [3, 3]), ([0, 3], [3, 6])])

fed_b = sds.federated(
[addr1, addr3],
[([0, 0], [3, 3]), ([0, 3], [3, 6])])

# Multiply, compute and print.
res = (fed_a * fed_b).compute()

print(res)
55 changes: 55 additions & 0 deletions src/main/python/docs/source/code/federatedTutorial_part3_old2.py
@@ -0,0 +1,55 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Python
import numpy as np
from systemds.context import SystemDSContext

addr1 = "localhost:8001/temp/test.csv"
addr2 = "localhost:8002/temp/test.csv"
addr3 = "localhost:8003/temp/test.csv"

# Create a federated matrix using two federated environments
# Note that the two federated matrices are stacked on top of each other

with SystemDSContext() as sds:

fed_a = sds.federated([addr1],[([0, 0], [3, 3])])
fed_b = sds.federated([addr2],[([0, 0], [3, 3])])
# fed_c = sds.federated([addr3],[([0, 0], [3, 3])])

np_array = np.array([[1,2,3],[4,5,6],[7,8,9]])

loc_a = sds.from_numpy(np_array)
loc_b = sds.from_numpy(np_array)

fed_res = fed_a @ fed_b
loc_res = loc_a @ loc_b

hybrid_res_1 = fed_a @ loc_b
hybrid_res_2 = loc_a @ fed_b

# compute and print
print(fed_a.compute())
print(fed_b.compute())
print(fed_res.compute(verbose=True))
print(loc_res.compute(verbose=True))
print(hybrid_res_1.compute())
print(hybrid_res_1.compute())
77 changes: 16 additions & 61 deletions src/main/python/docs/source/guide/federated.rst
Expand Up @@ -32,12 +32,12 @@ Start Federated worker
To start a federated worker, you first have to setup your environment variables.
A simple guide to do this is in the SystemDS Repository_.

.. _Repository: https://github.com/apache/systemds/tree/master/bin/
.. _Repository: https://github.com/apache/systemds/tree/main/bin/

If that is setup correctly simply start a worker using the following command.
Here the ``8001`` refer to the port used by the worker.

.. code-block:: python
.. code-block::
systemds WORKER 8001
Expand All @@ -47,45 +47,22 @@ Simple Aggregation Example
In this example we use a single federated worker, and aggregate the sum of its data.

First we need to create some data for our federated worker to use.
In this example we simply use Numpy to create a ``test.csv`` file

.. code-block:: python
# Import numpy
import numpy as np
a = np.asarray([[1,2,3], [4,5,6], [7,8,9]])
np.savetxt("temp/test.csv", a, delimiter=",")
In this example we simply use Numpy to create a ``test.csv`` file.

Currently we also require a metadata file for the federated worker.
This should be located next to the ``test.csv`` file called ``test.csv.mtd``.
To make this simply execute the following::
To make both the data and metadata simply execute the following

echo '{ "format":"csv", "header":false, "rows":3, "cols":3 }' > temp/test.csv.mtd
.. include:: ../code/federatedTutorial_part1.py
:start-line: 20
:code: python

After creating our data we the federated worker becomes able to execute federated instructions.
After creating our data the federated worker becomes able to execute federated instructions.
The aggregated sum using federated instructions in python SystemDS is done as follows

.. code-block:: python
# Import numpy and SystemDS
import numpy as np
from systemds.context import SystemDSContext
# Create a federated matrix
## Indicate the dimensions of the data:
### Here the first list in the tuple is the top left Coordinate,
### and the second the bottom left coordinate.
### It is ordered as [col,row].
dims = ([0,0], [3,3])
## Specify the address + file path from worker:
address = "localhost:8001/temp/test.csv"
with SystemDSContext() as sds:
fed_a = sds.federated([address], [dims])
# Sum the federated matrix and call compute to execute
print(fed_a.sum().compute())
# Result should be 45.
.. include:: ../code/federatedTutorial_part2.py
:start-line: 20
:code: python

Multiple Federated Environments
-------------------------------
Expand All @@ -96,43 +73,21 @@ Using the data created from the last example we can simulate
multiple federated workers by starting multiple ones on different ports.
Start with 3 different terminals, and run one federated environment in each.

.. code-block:: python
.. code-block::
systemds WORKER 8001
systemds WORKER 8002
systemds WORKER 8003
Once all three workers are up and running we can leverage all three in the following example

.. code-block:: python
import numpy as np
from systemds.context import SystemDSContext
addr1 = "localhost:8001/temp/test.csv"
addr2 = "localhost:8002/temp/test.csv"
addr3 = "localhost:8003/temp/test.csv"
# Create a federated matrix using two federated environments
# Note that the two federated matrices are stacked on top of each other
with SystemDSContext() as sds:
fed_a = sds.federated(
[addr1, addr2],
[([0,0], [3,3]), ([0,3], [3,6])])
fed_b = sds.federated(
[addr1, addr3],
[([0,0], [3,3]), ([0,3], [3,6])])
# Multiply, compute and print.
res = (fed_a * fed_b).compute()
print(res)
.. include:: ../code/federatedTutorial_part3.py
:start-line: 20
:code: python

The print should look like

.. code-block:: python
.. code-block::
[[ 1. 4. 9. 1. 4. 9.]
[16. 25. 36. 16. 25. 36.]
Expand Down

0 comments on commit 1f51b40

Please sign in to comment.