In [1]:
# from IPython.parallel import Client
from ipyparallel import Client 

# to be run from terminal: "  ipcluster start -n 4  --profile=mpi "

c = Client(profile='mpi')
%pxconfig --block
print ( c.ids ) 

c[:].apply_sync(lambda : "Hello, World")

[0, 1]


['Hello, World', 'Hello, World']

In [2]:
%%px 

import sys
sys.stdout.flush()

import numpy as np
from mpi4py import MPI
from math   import ceil 

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

print (size, rank)

[stdout:0] 2 0
[stdout:1] 2 1


In [3]:
%%px

def scatter_int (N):
    a0    = np.zeros(N,dtype=np.int8)
    split = np.array_split(a0,size)
    
    split_sizes = []
    for i in range(0,len(split),1):
        split_sizes = np.append(split_sizes, len(split[i]))
    split_sizes = np.asarray(split_sizes,dtype=np.int32) 
    N_loc = comm.scatter ( split_sizes, root = 0)
    return N_loc

# for N in [5,6,7,8,9]: 
#     N_loc = scatter_int (N)
#     print (N_loc)

In [4]:
%%px 

def gen_array (dtype = np.int32, dim = 1, N1=1, N2 = 1, N3 = 1, N4 = 1 ): 

    vec_global = None
#     N1 = 5
#     N2 = 4
#     N3 = 3
#     N4 = 3

    if (dim == 1 and rank == 0 ):
        vec_global = np.zeros(N1, dtype=dtype )
        for ix in range(vec_global.shape[0]):
            vec_global[ix]= ix
        
    if (dim ==2 and rank == 0 ):
        vec_global = np.zeros((N1,N2), dtype=dtype)
        for ix in range(vec_global.shape[0]):
            for iy in range(vec_global.shape[1]):
                vec_global[ix,iy]=(ix) + ( vec_global.shape[1] * iy)

    if (dim == 3 and rank == 0):
        vec_global = np.zeros((N1,N2,N3),dtype=dtype)
        for ix in range(vec_global.shape[0]):
            for iy in range(vec_global.shape[1]):
                for iz in range(vec_global.shape[2]):
                    vec_global[ix,iy,iz]=(iy) + ( vec_global.shape[1] * ix) + ( vec_global.shape[2] * iz)


    if (dim == 4 and rank == 0 ): 
        vec_global = np.zeros((N1,N2,N3,N4),dtype=dtype)
        for ix in range(vec_global.shape[0]):
            for iy in range(vec_global.shape[1]):
                for iz in range(vec_global.shape[2]):
                    for it in range(vec_global.shape[3]):
                        vec_global[ix,iy,iz,it]=(iy) + ( vec_global.shape[1] * ix) + ( vec_global.shape[2] * iz) + ( vec_global.shape[3] * it) 
                        
    return vec_global 


In [5]:
%%px 

v = gen_array (dim = 2, N1 = 3, N2= 7 , N3=4 , N4= 4,  dtype=np.int32 ) 
print (v)

[stdout:0] 
[[ 0  7 14 21 28 35 42]
 [ 1  8 15 22 29 36 43]
 [ 2  9 16 23 30 37 44]]
[stdout:1] None


In [6]:
%%px 

def scatter_1D_array (vec_global,dtype = np.float64):
    if rank == 0:
        N1 = vec_global.shape[0]
        split = np.array_split(vec_global,size,axis = 0) #Split input array by the number of available cores
        split_sizes = []
        for i in range(0,len(split),1):
            split_sizes = np.append(split_sizes, len(split[i]))
        split_sizes_input = split_sizes 
        displacements_input = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]

    else:
    #Create variables on other cores
        split_sizes_input = None
        displacements_input = None
        split = None
        vec_global = None
        
    split = comm.bcast(split, root=0) #Broadcast split array to other cores
    vec_local = np.zeros(np.shape(split[rank]),dtype=dtype) #Create array to receive subset of data on each core, where rank specifies the core
    if   dtype == np.float64 :
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.DOUBLE],vec_local,root=0)
    elif dtype == np.int32 : 
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.INT],vec_local,root=0)
    return vec_local

def gather_1D_array ( vec_local,dtype = np.float64 ): 

        N1_loc = vec_local.shape[0]
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        if rank == 0:
            vec_global = np.zeros([N1],dtype=dtype)             #Create output array of same size
        else: 
            vec_global = None 
            
        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size 
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
#             print("Input data split into vectors of sizes %s" %split_sizes_output )
#             print("Input data split with displacements of %s" %displacements_output)   
        else : 
            split_sizes_output = None 
            displacements_output = None 
            
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if   dtype == np.int32 :
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT], root=0) #Gather output data together
        if   dtype == np.float64 :
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE], root=0) #Gather output data together
        return vec_global
    

def allgather_1D_array ( vec_local, dtype = np.float64 ): 

        N1_loc = vec_local.shape[0]
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        vec_global = np.zeros([N1],dtype=dtype)             #Create output array of same size

        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        split_sizes_output = split_size 
        displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
            
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if   dtype == np.int32 :
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT]) #Gather output data together
        if   dtype == np.float64 :
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE]) #Gather output data together
        return vec_global
    

# scatters a 2D double array over all ranks 

def scatter_2D_array ( vec_global, dtype=np.float64 ): 

    if rank == 0:
        N1 = vec_global.shape[0]
        N2 = vec_global.shape[1]
        
        split = np.array_split(vec_global,size,axis = 0) #Split input array by the number of available cores
        split_sizes = []
        for i in range(0,len(split),1):
            split_sizes = np.append(split_sizes, len(split[i]))

        split_sizes_input = split_sizes * N2
        displacements_input = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]

    else:
    #Create variables on other cores
        split_sizes_input = None
        displacements_input = None
        split = None
        vec_global = None
        
    split = comm.bcast(split, root=0) #Broadcast split array to other cores
    vec_local = np.zeros(np.shape(split[rank]),dtype=dtype) #Create array to receive subset of data on each core, where rank specifies the core
    if   dtype == np.float64:
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.DOUBLE],vec_local,root=0)
    elif dtype == np.int32:
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.INT],vec_local,root=0)
        
    return vec_local



def gather_2D_array ( vec_local , dtype=np.float64): 

        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        if rank == 0:
            vec_global = np.zeros([N1,N2],dtype=dtype)             #Create output array of same size
        else: 
            vec_global = None 
            
        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if   dtype == np.float64:
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE], root=0) #Gather output data together
        if   dtype == np.int32:
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT], root=0) #Gather output data together
        return vec_global

def allgather_2D_array ( vec_local , dtype=np.float64): 

        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        vec_global = np.zeros([N1,N2],dtype=dtype)             #Create output array of same size

        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if   dtype == np.float64:
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE]) #Gather output data together
        if   dtype == np.int32:
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT]) #Gather output data together
        return vec_global


def scatter_3D_array ( vec_global, dtype = np.float64 ): 
    
    if rank == 0:
        N1 = vec_global.shape[0]
        N2 = vec_global.shape[1]
        N3 = vec_global.shape[2]
        
        split = np.array_split(vec_global,size,axis = 0) #Split input array by the number of available cores
        split_sizes = []
        for i in range(0,len(split),1):
            split_sizes = np.append(split_sizes, len(split[i]))
            
        split_sizes_input = split_sizes * N2 * N3 
        displacements_input = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]

    else:
    #Create variables on other cores
        split_sizes_input = None
        displacements_input = None
        split = None
        vec_global = None
        
    split = comm.bcast(split, root=0) #Broadcast split array to other cores
    vec_local = np.zeros(np.shape(split[rank]), dtype = dtype) #Create array to receive subset of data on each core, where rank specifies the core
    if dtype == np.float64 : 
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.DOUBLE],vec_local,root=0)
    if dtype == np.int32 : 
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.INT],vec_local,root=0)
    return vec_local


def gather_3D_array ( vec_local, dtype = np.float64  ): 
        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        N3     = vec_local.shape[2]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        if rank == 0:
            vec_global = np.zeros([N1,N2,N3],dtype = dtype)             #Create output array of same size
        else: 
            vec_global = None 
            
        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2 * N3 
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if dtype == np.float64 : 
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE], root=0) #Gather output data together
        if dtype == np.int32 : 
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT], root=0) #Gather output data together

        return vec_global
    

def allgather_3D_array ( vec_local , dtype = np.float64  ): 

        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        N3     = vec_local.shape[2]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        vec_global = np.zeros([N1,N2,N3],dtype=dtype)             #Create output array of same size

        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2 * N3
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if dtype == np.float64 : 
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE]) #Gather output data together
        if dtype == np.int32 : 
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT]) #Gather output data together
        return vec_global


    
def scatter_4D_array ( vec_global, dtype = np.float64 ): 
    
    if rank == 0:
        
        N1 = vec_global.shape[0]
        N2 = vec_global.shape[1]
        N3 = vec_global.shape[2]
        N4 = vec_global.shape[3]
        
        split = np.array_split(vec_global,size,axis = 0) #Split input array by the number of available cores
        split_sizes = []
        for i in range(0,len(split),1):
            split_sizes = np.append(split_sizes, len(split[i]))
            
        split_sizes_input = split_sizes * N2 * N3 * N4 
        displacements_input = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]

    else:
    #Create variables on other cores
        split_sizes_input = None
        displacements_input = None
        split = None
        vec_global = None
        
    split = comm.bcast(split, root=0) #Broadcast split array to other cores
    vec_local = np.zeros(np.shape(split[rank]),dtype=dtype) #Create array to receive subset of data on each core, where rank specifies the core
    if dtype == np.float64 : 
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.DOUBLE],vec_local,root=0)
    if dtype == np.int32 : 
        comm.Scatterv([vec_global,split_sizes_input, displacements_input,MPI.INT],vec_local,root=0)
    return vec_local


def gather_4D_array ( vec_local, dtype = np.float64 ): 
        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        N3     = vec_local.shape[2]
        N4     = vec_local.shape[3]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        if rank == 0:
            vec_global = np.zeros([N1,N2,N3,N4],dtype=dtype)             #Create output array of same size
        else: 
            vec_global = None 
            
        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2 * N3 * N4 
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if dtype == np.float64 : 
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE], root=0) #Gather output data together
        if dtype == np.int32 : 
            comm.Gatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT], root=0) #Gather output data together
        return vec_global
    

def allgather_4D_array ( vec_local, dtype = np.float64 ): 

        N1_loc = vec_local.shape[0]
        N2     = vec_local.shape[1]
        N3     = vec_local.shape[2]
        N4     = vec_local.shape[3]
        
        N1 =  comm.allreduce(N1_loc,op=MPI.SUM)   # recover full size along first dimension 
        
        vec_global = np.zeros([N1,N2,N3,N4], dtype=dtype)             #Create output array of same size

        split_size_loc = vec_local.shape[0]
        split_size = np.asarray ( comm.gather (split_size_loc, root=0))
        
        if rank == 0 : 
            split_sizes_output = split_size * N2 * N3 * N4 
            displacements_output = np.insert(np.cumsum(split_sizes_output),0,0)[0:-1]
        else : 
            split_sizes_output = None 
            displacements_output = None 
        split_sizes_output = comm.bcast(split_sizes_output, root = 0)
        displacements_output = comm.bcast(displacements_output, root = 0)

        comm.Barrier()
        if dtype == np.float64 :         
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.DOUBLE]) #Gather output data together
        if dtype == np.int32 :         
            comm.Allgatherv(vec_local,[vec_global,split_sizes_output,displacements_output,MPI.INT]) #Gather output data together
        return vec_global




In [7]:
%%px

# 1D -- Scatter, Gather, Allgather - np.int32 VERSION 
vec_in_glob      = gen_array            ( dim = 1, N1 = 7, dtype = np.int32 ) 
vec_local        = scatter_1D_array     ( vec_in_glob,     dtype = np.int32 ) 
vec_out_glob     = gather_1D_array      ( vec_local,       dtype = np.int32 ) 
vec_out_glob_all = allgather_1D_array   ( vec_local,       dtype = np.int32 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 

[stdout:0] 
True
True
[stdout:1] 
True
False


In [8]:
%%px

# 1D -- Scatter, Gather, Allgather - np.float64 VERSION 
vec_in_glob      = gen_array            ( dim = 1, N1 = 7, dtype = np.float64 ) 
vec_local        = scatter_1D_array     ( vec_in_glob,     dtype = np.float64 ) 
vec_out_glob     = gather_1D_array      ( vec_local,       dtype = np.float64 ) 
vec_out_glob_all = allgather_1D_array   ( vec_local,       dtype = np.float64 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be True only on root 

[stdout:0] 
True
True
[stdout:1] 
True
False


In [9]:
%%px

# 2D -- Scatter, Gather, Allgather - np.float64 VERSION 
vec_in_glob      = gen_array    ( dim = 2, N1 = 7, N2 = 5, dtype = np.float64 ) 
vec_local        = scatter_2D_array     ( vec_in_glob,     dtype = np.float64 ) 
vec_out_glob     = gather_2D_array      ( vec_local,       dtype = np.float64 ) 
vec_out_glob_all = allgather_2D_array   ( vec_local,       dtype = np.float64 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 


[stdout:0] 
True
True
[stdout:1] 
True
False


In [10]:
%%px

# 2D -- Scatter, Gather, Allgather - np.int32 VERSION 
vec_in_glob      = gen_array    ( dim = 2, N1 = 7, N2 = 5, dtype = np.int32 ) 
vec_local        = scatter_2D_array     ( vec_in_glob,     dtype = np.int32 ) 
vec_out_glob     = gather_2D_array      ( vec_local,       dtype = np.int32 ) 
vec_out_glob_all = allgather_2D_array   ( vec_local,       dtype = np.int32 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 



[stdout:0] 
True
True
[stdout:1] 
True
False


In [11]:
%%px

# 3D -- Scatter, Gather, Allgather - np.float64 VERSION 
vec_in_glob      = gen_array    ( dim = 3, N1 = 7, N2 = 5, N3 = 5, dtype = np.float64 ) 
vec_local        = scatter_3D_array             ( vec_in_glob,     dtype = np.float64 ) 
vec_out_glob     = gather_3D_array              ( vec_local,       dtype = np.float64 ) 
vec_out_glob_all = allgather_3D_array           ( vec_local,       dtype = np.float64 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 



[stdout:0] 
True
True
[stdout:1] 
True
False


In [12]:
%%px

# 3D -- Scatter, Gather, Allgather - np.int32 VERSION 
vec_in_glob      = gen_array    ( dim = 3, N1 = 7, N2 = 5, N3 = 5, dtype = np.int32 ) 
vec_local        = scatter_3D_array     ( vec_in_glob,     dtype = np.int32 ) 
vec_out_glob     = gather_3D_array      ( vec_local,       dtype = np.int32 ) 
vec_out_glob_all = allgather_3D_array   ( vec_local,       dtype = np.int32 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 

[stdout:0] 
True
True
[stdout:1] 
True
False


In [13]:
%%px

# 4D -- Scatter, Gather, Allgather - np.float64 VERSION 
vec_in_glob      = gen_array    ( dim = 4, N1 = 7, N2 = 5, N3 = 5, N4 = 3, dtype = np.float64 ) 
vec_local        = scatter_4D_array             ( vec_in_glob,     dtype = np.float64 ) 
vec_out_glob     = gather_4D_array              ( vec_local,       dtype = np.float64 ) 
vec_out_glob_all = allgather_4D_array           ( vec_local,       dtype = np.float64 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 


[stdout:0] 
True
True
[stdout:1] 
True
False


In [18]:
%%px

# 4D -- Scatter, Gather, Allgather - np.int32 VERSION 
vec_in_glob      = gen_array    ( dim = 4, N1 = 100, N2 = 100, N3 = 50, N4 = 3, dtype = np.int32 ) 
# print (vec_in_glob)
vec_local        = scatter_4D_array             ( vec_in_glob,     dtype = np.int32 ) 
vec_out_glob     = gather_4D_array              ( vec_local,       dtype = np.int32 ) 
vec_out_glob_all = allgather_4D_array           ( vec_local,       dtype = np.int32 ) 
# print (vec_in_glob)
# print (vec_local)
# print (vec_out_glob)
print ( np.array_equal (vec_in_glob, vec_out_glob) ) 
print ( np.array_equal (vec_in_glob, vec_out_glob_all) ) # should be equal only on root 



[stdout:0] 
True
True
[stdout:1] 
True
False


In [15]:
%%px
v      = gen_array    ( dim = 4, N1 = 1, N2 =2, N3 = 3, N4 = 4, dtype = np.int32 ) 
if rank ==0 :
    print(v.shape)
    v1 = np.swapaxes (v,1,3)
    print(v1.shape)



[stdout:0] 
(1, 2, 3, 4)
(1, 4, 3, 2)
