## Dot product
This is the standard way to compute a dot product in parallel

In [1]:
n = 4 # number of processes
from ipyparallel import Cluster
cluster = await Cluster(engines="mpi").start_and_connect(n=n, activate=True)

Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>


  0%|          | 0/4 [00:00<?, ?engine/s]

In [2]:
%%px
from mpi4py import MPI
import numpy as np

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

n = 1000

n_local = n // size

x_local = 2*np.ones(n_local, dtype='d')
y_local = 4*np.ones(n_local, dtype='d')

local_dot = np.dot(x_local, y_local)
global_dot = comm.allreduce(local_dot, op=MPI.SUM)

print(f"Rank {rank}: local_dot = {local_dot}, global_dot = {global_dot}")

[stdout:1] Rank 1: local_dot = 2000.0, global_dot = 8000.0


[stdout:0] Rank 0: local_dot = 2000.0, global_dot = 8000.0


[stdout:2] Rank 2: local_dot = 2000.0, global_dot = 8000.0


[stdout:3] Rank 3: local_dot = 2000.0, global_dot = 8000.0
