한국전산유체공학회 제 14차 CFD 단기강좌 (2024.05.29.-2024.05.30.)

파이선 병렬프로그래밍: 수치 해석 예제 실습
===================================================


### 한국과학기술정보연구원 강지훈

***

### 필요 패키지

  - mpi4py
  - numpy
  - random
  - scikit-learn
  - matplotlib

***


# 1. 벡터와 행렬 연산

## 1.1. 행렬/벡터 만들기

In [47]:
import numpy as np

n = 10

A = np.random.rand(n, n)
B = np.random.rand(n, n)
v = np.random.rand(n)
w = np.random.rand(n)

np.save("A", A)
np.save("B", B)
np.save("v", v)
np.save("w", w)



## 1.2. 벡터 내적

1. 순차코드
   
   <img src = "images/image01.png">

2. 병렬코드 - 등분할

  <img src = "images/image02.png">

In [48]:
%%writefile v.py
import numpy as np
from mpi4py import MPI

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

if rank == 0:
    v = np.load("v.npy")
    w = np.load("w.npy")
    n = v.size
else :
    v = None
    w = None
    n = 0

n = comm.bcast(n, root = 0)


##### n_row 크기 정하기 
n_row = int(n / size)# FIX ME

v_row = np.empty(n_row, dtype = np.float64)
w_row = np.empty(n_row, dtype = np.float64)

##### Scatter 함수 호출
comm.Scatter(v, v_row, root = 0) # FIX ME
comm.Scatter(w, w_row, root = 0) # FIX ME

##### 프로세스별 Local sum 
s = np.dot(v_row,w_row) # FIX ME

##### reduce를 이용한 Global sum
s_all = comm.allreduce(s, MPI.SUM) # FIX ME

#if rank == 1:
print(rank, s_all)


Overwriting v.py


In [49]:
! mpiexec -np 2 python v.py

0 2.2197261999963946
1 2.2197261999963946


3. 병렬코드 - 비등분할

    <img src = "images/image03.png">

In [50]:
%%writefile v_var.py
import numpy as np
from mpi4py import MPI

def para_range(n, size, rank) :
    iwork = divmod(n, size) 
    ista = rank * iwork[0] + min(rank, iwork[1])
    iend = ista + iwork[0] - 1
    if iwork[1] > rank :
        iend = iend + 1
    return ista, iend

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

if rank == 0:
    v = np.load("v.npy")
    w = np.load("w.npy")
    n = v.size
else :
    v = None
    w = None
    n = 0

n = comm.bcast(n, root = 0)

##### 프로세스별 범위 할당
ista, iend = para_range(n, size, rank) # FIX ME
n_row =  (iend - ista + 1) # FIX ME

n_rows = comm.gather(n_row, root = 0)

v_row = np.empty(n_row, dtype = np.float64)
w_row = np.empty(n_row, dtype = np.float64)

##### Scatter
comm.Scatterv((v, n_rows), v_row, root = 0) #FIX ME
comm.Scatterv((w, n_rows), w_row, root = 0) #FIX ME

s = np.dot(v_row,w_row)

##### reduce를 이용한 Global sum
s_all = comm.reduce(s, MPI.SUM, root = 0) #FIX ME

if rank == 0:
    print(n_rows)
    print(s_all)


Overwriting v_var.py


In [51]:
! mpirun -np 3 python v_var.py

[4, 3, 3]
2.2197261999963946


4. para_range 저장

In [52]:
%%writefile tools.py

def para_range(n, size, rank) :
    iwork = divmod(n, size) 
    ista = rank * iwork[0] + min(rank, iwork[1])
    iend = ista + iwork[0] - 1
    if iwork[1] > rank :
        iend = iend + 1
    return ista, iend


Overwriting tools.py


## 1.3. 행렬-벡터곱

1. 순차코드
   
    <img src = "images/image04.png">

In [53]:
A = np.load("A.npy")
v = np.load("v.npy")

b = np.matmul(A,v)
print (b)


[2.45150902 3.01583514 2.74146952 3.12538633 2.40105885 2.3072745
 2.67861769 2.66861108 1.99255701 2.50945972]


2. 행렬의 행 등분할

    <img src = "images/image05.png">

In [54]:
%%writefile Av.py

import numpy as np
from mpi4py import MPI

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    v = np.load("v.npy")
    n = v.size
    n = comm.bcast(n, root = 0)
else :
    A = None
    n = 0
    n = comm.bcast(n, root = 0)
    v = np.empty(n, dtype = np.float64)

n_row = int(n / size)

A_row = np.empty((n_row, n), dtype = np.float64)

##### 행렬의 행 분할
comm.Scatter(A, A_row, root = 0) #FIX ME

comm.Bcast(v, root = 0)

##### 분할된 행렬과의 연산
b = np.matmul(A_row,v) #FIX ME

print(b, rank)

Overwriting Av.py


In [55]:
! mpirun -np 2 python Av.py

[2.45150902 3.01583514 2.74146952 3.12538633 2.40105885] 0
[2.3072745  2.67861769 2.66861108 1.99255701 2.50945972] 1


3. 행렬의 행 비등분할

    <img src = "images/image06.png">

In [56]:
%%writefile Avar.py

# Matrix A의 Row decomposition

import numpy as np
from mpi4py import MPI
from tools import para_range

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    v = np.load("v.npy")
    n = v.size
    n = comm.bcast(n, root = 0)

else :
    A = None
    n = 0
    n = comm.bcast(n, root = 0)
    v = np.empty(n, dtype = np.float64)

ista, iend = para_range(n, size, rank)

n_row = (iend - ista + 1)

A_row = np.empty((n_row, n), dtype = np.float64)

##### 행렬의 행 분할 및 각 프로세스별 크기 지정
n_rows = comm.gather(n_row * n, root = 0) #FIX ME

comm.Scatterv((A, n_rows), A_row, root = 0) #FIX ME
comm.Bcast(v, root = 0)

b = np.matmul(A_row,v)

print(b, rank)

Overwriting Avar.py


In [57]:
! mpirun -np 3 python Avar.py

[2.40105885 2.3072745  2.67861769] 1
[2.66861108 1.99255701 2.50945972] 2
[2.45150902 3.01583514 2.74146952 3.12538633] 0


4. 행렬/벡터의 행 비등분할

    <img src = "images/image07.png">

In [58]:
%%writefile Av_var.py

# Matrix A의 Row decomposition

from tools import para_range
import numpy as np
from mpi4py import MPI

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    v = np.load("v.npy")
    n = v.size
    n = comm.bcast(n, root = 0)

else :
    A = None
    v = None
    n = 0
    n = comm.bcast(n, root = 0)

ista, iend = para_range(n, size, rank)

n_row = (iend - ista + 1)

A_row = np.empty((n_row, n), dtype = np.float64)
v_row = np.empty(n_row, dtype = np.float64)

##### 행렬의 행 분할 및 각 프로세스별 크기 지정
n_chunks = comm.gather(n_row * n, root = 0) #FIX ME
n_rows = comm.allgather(n_row) #FIX ME

comm.Scatterv([A, n_chunks], A_row, root = 0) #FIX ME
comm.Scatterv([v, n_rows], v_row, root = 0) #FIX ME

##### 분할된 벡터 곱 범위 지정
vsta_list = []
vend_list = []

for i in range(size) :
    vsta_list.append(sum(n_rows[:i])) #FIX ME
    vend_list.append(sum(n_rows[:i])+n_rows[i]) #FIX ME

##### Local MV (최초 자신의 벡터부분)
b = np.matmul(A_row[:,vsta_list[rank]:vend_list[rank]], v_row) #FIX ME

##### 송수신 프로세스 지정
inext = rank + 1 if rank < size - 1 else 0 #FIX ME
iprev = rank - 1 if rank > 0 else size - 1 #FIX ME

for i in range(size - 1) :
    iloc = iprev - i if iprev >= i else iprev - i + size
    v_recv = np.empty(n_rows[iloc], dtype = np.float64)
    ##### 통신
    comm.Sendrecv(v_row, inext, 1, v_recv, iprev, 1) #FIX ME
    v_row = np.copy(v_recv)
    b += np.matmul(A_row[:,vsta_list[iloc]:vend_list[iloc]], v_row)

print(b, rank)

Overwriting Av_var.py


In [59]:
! mpirun -np 3 python Av_var.py

[2.40105885 2.3072745  2.67861769] 1
[2.66861108 1.99255701 2.50945972] 2
[2.45150902 3.01583514 2.74146952 3.12538633] 0


## 3. 행렬-행렬 곱

1. 순차코드

    <img src = "images/image08.png">

In [60]:
A = np.load("A.npy")
B = np.load("B.npy")

C = np.matmul(A, B)
print (C)


[[2.6826069  2.65309421 1.95489435 2.15408649 2.39436101 2.80342462
  1.41995297 2.79304207 2.41664541 1.96724264]
 [2.97310408 3.50137103 3.0442885  2.79891636 2.55238991 3.38425573
  1.38250451 3.78127846 3.09987314 2.54323953]
 [3.17897934 3.20706715 2.60027798 2.93007947 3.08054083 2.57694642
  1.52038116 3.08103388 3.03599842 2.99806167]
 [3.36838944 3.56373124 3.50546146 3.49957622 3.67368927 3.80750494
  1.53323986 3.97391478 3.49488622 3.1345249 ]
 [2.53074187 2.74557879 2.01868724 2.44577043 2.54219079 3.15861836
  1.46197454 2.49602078 2.27413066 2.14442767]
 [2.34483477 2.4647671  1.92909959 1.93603212 2.01298781 2.34926266
  1.11137097 2.94240057 1.9732404  1.92400967]
 [3.50843192 3.01114581 2.38027008 2.13631684 2.35315996 3.13212192
  1.60384757 3.50379224 2.75963474 2.60156954]
 [3.55933885 2.68755534 2.91479657 2.68343096 3.13622667 4.24653505
  1.34100071 3.98939321 3.14327613 2.59644317]
 [2.9762361  2.43331644 1.82262527 1.66563843 2.05569196 2.51507972
  1.33625425

2. 행렬A의 행분할

    <img src = "images/image09.png">

In [61]:
%%writefile AB.py

import numpy as np
from mpi4py import MPI

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    B = np.load("B.npy")
    n = A[0].size
    n = comm.bcast(n, root = 0)

else :
    n = 0
    n = comm.bcast(n, root = 0)
    A = None
    B = np.empty((n, n), dtype = np.float64)

n_row = int(n / size)

A_row = np.empty((n_row, n), dtype = np.float64)

##### 행렬 A의 분할
comm.Scatter(A, A_row, root = 0) #FIX ME
comm.Bcast(B, root = 0)

C = np.matmul(A_row,B)

print(C, rank)

Overwriting AB.py


In [62]:
! mpirun -np 2 python AB.py

[[2.34483477 2.4647671  1.92909959 1.93603212 2.01298781 2.34926266
  1.11137097 2.94240057 1.9732404  1.92400967]
 [3.50843192 3.01114581 2.38027008 2.13631684 2.35315996 3.13212192
  1.60384757 3.50379224 2.75963474 2.60156954]
 [3.55933885 2.68755534 2.91479657 2.68343096 3.13622667 4.24653505
  1.34100071 3.98939321 3.14327613 2.59644317]
 [2.9762361  2.43331644 1.82262527 1.66563843 2.05569196 2.51507972
  1.33625425 2.66109537 2.53598422 1.9066874 ]
 [2.99319349 3.07641558 2.20794542 2.29301705 2.79072711 3.03242235
  1.46365289 2.87137779 2.18693529 2.71765557]] 1
[[2.6826069  2.65309421 1.95489435 2.15408649 2.39436101 2.80342462
  1.41995297 2.79304207 2.41664541 1.96724264]
 [2.97310408 3.50137103 3.0442885  2.79891636 2.55238991 3.38425573
  1.38250451 3.78127846 3.09987314 2.54323953]
 [3.17897934 3.20706715 2.60027798 2.93007947 3.08054083 2.57694642
  1.52038116 3.08103388 3.03599842 2.99806167]
 [3.36838944 3.56373124 3.50546146 3.49957622 3.67368927 3.80750494
  1.53323

3. 행렬A의 행분할 (비등분할)

    <img src = "images/image10.png">

In [63]:
%%writefile AvarB.py

# Matrix A의 Row decomposition

import numpy as np
from mpi4py import MPI
from tools import para_range

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    B = np.load("B.npy")
    n = A[0].size
    n = comm.bcast(n, root = 0)
else :
    n = 0
    n = comm.bcast(n, root = 0)
    A = None
    B = np.empty((n, n), dtype = np.float64)

##### 행렬 A의 분할 범위
ista, iend = para_range(n, size, rank) #FIX ME

n_row = (iend - ista + 1) #FIX ME

A_row = np.empty((n_row, n), dtype = np.float64)
n_list = comm.gather(n_row * n, root = 0)

##### 행렬 A의 분할
comm.Scatterv([A, n_list], A_row, root = 0) #FIX ME
comm.Bcast(B, root = 0)

C_rows = np.matmul(A_row,B)

print(C_rows, rank)

Overwriting AvarB.py


In [64]:
! mpirun -np 3 python AvarB.py

[[3.55933885 2.68755534 2.91479657 2.68343096 3.13622667 4.24653505
  1.34100071 3.98939321 3.14327613 2.59644317]
 [2.9762361  2.43331644 1.82262527 1.66563843 2.05569196 2.51507972
  1.33625425 2.66109537 2.53598422 1.9066874 ]
 [2.99319349 3.07641558 2.20794542 2.29301705 2.79072711 3.03242235
  1.46365289 2.87137779 2.18693529 2.71765557]] 2
[[2.6826069  2.65309421 1.95489435 2.15408649 2.39436101 2.80342462
  1.41995297 2.79304207 2.41664541 1.96724264]
 [2.97310408 3.50137103 3.0442885  2.79891636 2.55238991 3.38425573
  1.38250451 3.78127846 3.09987314 2.54323953]
 [3.17897934 3.20706715 2.60027798 2.93007947 3.08054083 2.57694642
  1.52038116 3.08103388 3.03599842 2.99806167]
 [3.36838944 3.56373124 3.50546146 3.49957622 3.67368927 3.80750494
  1.53323986 3.97391478 3.49488622 3.1345249 ]] 0
[[2.53074187 2.74557879 2.01868724 2.44577043 2.54219079 3.15861836
  1.46197454 2.49602078 2.27413066 2.14442767]
 [2.34483477 2.4647671  1.92909959 1.93603212 2.01298781 2.34926266
  1.11

4. 행렬A의 행분할, 행렬 B의 열분할

    <img src = "images/image11.png">

In [65]:
%%writefile ABvar.py

import numpy as np
from mpi4py import MPI
from tools import para_range

comm = MPI.COMM_WORLD

rank = comm.Get_rank()
size = comm.Get_size()

if rank == 0 :
    A = np.load("A.npy")
    B = np.load("B.npy")
    BT = np.transpose(B).copy()
    n = A[0].size
    n = comm.bcast(n, root = 0)

else :
    n = 0
    n = comm.bcast(n, root = 0)
    A = None
    BT = None

ista, iend = para_range(n, size, rank)

n_row = (iend - ista + 1)

A_row = np.empty((n_row, n), dtype = np.float64)
BT_row = np.empty((n_row, n), dtype = np.float64)
n_rows = comm.allgather(n_row * n)

##### 행렬 A, B의 분할
comm.Scatterv([A, n_rows], A_row, root = 0) #FIX ME
comm.Scatterv([BT, n_rows], BT_row, root = 0) #FIX ME
B_col = np.transpose(BT_row)

inext = rank + 1 if rank < size - 1 else 0
iprev = rank - 1 if rank > 0 else size - 1

C_unordered_rows = np.matmul(A_row, B_col)

for i in range(size - 1) :
    iloc = iprev - i if iprev >= i else iprev - i + size
    B_recv = np.empty(n_rows[iloc], dtype = np.float64)

    ##### 분할된 행렬 B를 송수신하고 A의 분할된 부분과 곱하여 C에 저장
    comm.Sendrecv(B_col, inext, 1, B_recv, iprev, 1) #FIX ME
    B_col = np.copy(B_recv)
    B_col = np.reshape(B_col, (n, int(n_rows[iloc]/n))) #FIX ME
    C_block = np.matmul(A_row, B_col) #FIX ME
    C_unordered_rows = np.append(C_unordered_rows, C_block, axis = 1) #FIX ME

print(C_unordered_rows, rank)

Overwriting ABvar.py


In [66]:
! mpirun -np 3 python ABvar.py

[[2.6826069  2.65309421 1.95489435 2.15408649 3.02642898 2.58987778
  2.32959485 2.13631834 2.55437644 1.55412349]
 [2.97310408 3.50137103 3.0442885  2.79891636 3.84402881 3.02797521
  3.05487209 2.61902644 2.63315789 1.629111  ]
 [3.17897934 3.20706715 2.60027798 2.93007947 3.18416452 2.60633353
  2.68287179 2.53311158 2.84829833 2.02024325]
 [3.36838944 3.56373124 3.50546146 3.49957622 3.89414878 2.96433186
  3.13460905 3.37265242 3.42959369 2.23982589]] 0
[[2.54219079 3.15861836 1.46197454 1.92804629 1.22457906 2.87358428
  2.5712001  3.24680317 2.44887285 2.05770618]
 [2.01298781 2.34926266 1.11137097 1.64714964 1.66399601 2.87381109
  2.71268562 2.42672795 2.1558859  1.90324371]
 [2.35315996 3.13212192 1.60384757 2.22968472 1.85614949 3.63360494
  3.51445881 3.45687792 2.64769939 2.29748307]] 1
[[3.98939321 3.14327613 2.59644317 3.18970825 2.77908999 2.69501872
  2.70993843 2.19433999 4.4221759  3.42229425]
 [2.66109537 2.53598422 1.9066874  1.83118671 2.6975719  1.80984507
  1.90

5. 직접 해보기
    - 올바른 크기의 C를 미리 선언하고 적절한 위치에 C_block 을 배치
    - 3의 C_rows와 같은 결과를 얻음
