# Getting Started 
with the NDN Distributed Processing Engine

Note: you may want to run this in a venv or Conda environment.

### Get Dependencies

In [1]:
# Get submodules
!git submodule update --init --recursive

Note: Installing local packages should automatically install pip dependencies such as `python-ndn`. 

In [38]:
# Install packages
!bash -c 'cd ..; for dir in ./pkg/*/; do [ -d "$dir" ] && pip install --find-links=./pkg "$dir"; done'

Looking in links: ./pkg
Processing ./pkg/ndn_compute
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: ndn-compute
  Building wheel for ndn-compute (pyproject.toml) ... [?25ldone
[?25h  Created wheel for ndn-compute: filename=ndn_compute-0.1-py3-none-any.whl size=24533 sha256=604acb0486aa315133db45b4fd1c7793ae00716812be17fdebde2c525ce7ec21
  Stored in directory: /private/var/folders/x8/vn51j39n4ggf0j7pk0zk6vjh0000gn/T/pip-ephem-wheel-cache-g_rgqnc2/wheels/a5/b5/9c/2f295dc3901a3c993c666f8c266e58a95f2800ba25612473db
Successfully built ndn-compute
Installing collected packages: ndn-compute
  Attempting uninstall: ndn-compute
    Found existing installation: ndn-compute 0.1
    Uninstalling ndn-compute-0.1:
      Successfully uninstalled ndn-compute-0.1
Successfully installed ndn-compute-0.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m

### Security Setup

In [7]:
import os
import shutil
from ndn_compute_key_creator import create_keys

if any(not os.path.isdir(f'../sec_data/{dir}') for dir in ['certs', 'driver', 'worker']):
    if os.path.isdir('../sec_data'):
        shutil.rmtree('../sec_data')
        
    create_keys('../sec_data/')

### Generate Data

In [8]:
from ndn_compute_jsonl_generator import generate_large_jsonl
from ndn_compute_fs_creator import create_fs_from_directory

In [9]:
# Generate flat files
!mkdir -p ../generated_data/flat/appA
!mkdir -p ../generated_data/flat/appB

generate_large_jsonl(filename='../generated_data/flat/appA/events.log.jsonl', target_size_mb=200)
generate_large_jsonl(filename='../generated_data/flat/appB/events.log.jsonl', target_size_mb=500)

Generating JSONL file of approximately 200MB...
Progress: 22.76% complete
Records written: 100,000
Current file size: 45.51MB
Progress: 45.51% complete
Records written: 200,000
Current file size: 91.03MB
Progress: 68.27% complete
Records written: 300,000
Current file size: 136.55MB
Progress: 91.03% complete
Records written: 400,000
Current file size: 182.06MB

File generation complete!
Final file size: 200.00MB
Total records written: 439,417
Generating JSONL file of approximately 500MB...
Progress: 9.10% complete
Records written: 100,000
Current file size: 45.51MB
Progress: 18.21% complete
Records written: 200,000
Current file size: 91.03MB
Progress: 27.31% complete
Records written: 300,000
Current file size: 136.55MB
Progress: 36.41% complete
Records written: 400,000
Current file size: 182.06MB
Progress: 45.52% complete
Records written: 500,000
Current file size: 227.58MB
Progress: 54.62% complete
Records written: 600,000
Current file size: 273.10MB
Progress: 63.72% complete
Records w

In [10]:
# Distribute files into a toy distributed filesystem

!mkdir -p ../generated_data/distributed
create_fs_from_directory(in_dir="../generated_data/flat",
                         out_dir="../generated_data/distributed",
                         num_partitions=2,
                         num_copies=1,
                         chunk_size=64
                         )

../generated_data/flat/appB/events.log.jsonl
../generated_data/flat/appA/events.log.jsonl


### Starting the cluster

Please run `docker-compose up` in another terminal (from the `ndn-compute` repository root) so that you can see the stdout output in the foreground.

In [1]:
# !docker-compose up

# Make sure your cluster is running
!docker-compose ps

NAME      IMAGE                 COMMAND                  SERVICE   CREATED          STATUS          PORTS
driver1   ndn-compute-driver    "python -m ndn_compu…"   driver    43 seconds ago   Up 42 seconds   0.0.0.0:5214->5214/tcp, :::5214->5214/tcp
nfd1      ndn-compute-nfd       "/usr/bin/nfd --conf…"   nfd       24 hours ago     Up 42 seconds   6363/tcp, 9696/tcp, 6363/udp
worker1   ndn-compute-worker1   "python -m ndn_compu…"   worker1   43 seconds ago   Up 42 seconds   
worker2   ndn-compute-worker2   "python -m ndn_compu…"   worker2   43 seconds ago   Up 42 seconds   


IMPORTANT: You should see a driver, NFD, and worker(s) up

### Using the engine

In [2]:
from ndn_compute_client import NdnComputeClient

In [3]:
client = NdnComputeClient('http://localhost:5214')

In [4]:
# Trivial test to make sure driver and worker are talking to each other
client.add(8, 9)

17

In [17]:
# Test to make sure result store and large transfers are working
import zlib
random_bytes = client.urandom() # This may take a while
print(zlib.crc32(random_bytes)) # Does it match what was computed worker-side? (See docker logs)

3354918877


In [None]:
# Create dataset from remote file
dataset = client.create_dataset("appA/events.log.jsonl")

# Transform data
dataset = dataset.transform({"id": lambda x: len(x)})
dataset = dataset.cache() # Cache it to materialize results

# Transform data again
dataset = dataset.transform({"id": lambda x: x + 1})
dataset = dataset.cache() # Cache it to materialize results, but using previous cache

[[10, 10, 10, 10]]
[[10, 10, 10, 10]]
