<a href="https://colab.research.google.com/github/ZIZOUTOUMOU/ZIZOUTOUMOU/blob/main/cal_paralle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
pip install mpi4py

Collecting mpi4py
  Downloading mpi4py-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (16 kB)
Downloading mpi4py-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_5_x86_64.whl (1.4 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.4/1.4 MB[0m [31m47.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mpi4py
Successfully installed mpi4py-4.1.1


In [8]:
pip install mpi


Collecting mpi
  Downloading mpi-1.0.0-py3-none-any.whl.metadata (2.4 kB)
Downloading mpi-1.0.0-py3-none-any.whl (106 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mpi
Successfully installed mpi-1.0.0


In [4]:
import numpy as np
import time

N = 800  # fixed size

def matmul_sequential(A, B):
    C = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            s = 0
            for k in range(N):
                s += A[i, k] * B[k, j]
            C[i, j] = s
    return C

if __name__ == "__main__":
    A = np.random.rand(N, N)
    B = np.random.rand(N, N)

    t0 = time.time()
    C = matmul_sequential(A, B)
    print("Sequential time =", time.time() - t0)


KeyboardInterrupt: 

In [5]:
import numpy as np
import threading
import time

N = 800  # fixed size

def worker(A, B, C, start, end):
    for i in range(start, end):
        for j in range(N):
            C[i, j] = np.dot(A[i, :], B[:, j])

def matmul_threaded(A, B, nthreads):
    C = np.zeros((N, N))
    threads = []

    chunk = N // nthreads

    for t in range(nthreads):
        s = t * chunk
        e = N if t == nthreads - 1 else (t + 1) * chunk
        thr = threading.Thread(target=worker, args=(A, B, C, s, e))
        threads.append(thr)
        thr.start()

    for thr in threads:
        thr.join()

    return C


if __name__ == "__main__":
    threads = int(input("Number of threads: "))

    A = np.random.rand(N, N)
    B = np.random.rand(N, N)

    t0 = time.time()
    C = matmul_threaded(A, B, threads)
    print("Threaded time =", time.time() - t0)


Number of threads: 6
Threaded time = 3.4852042198181152


In [9]:
import numpy as np
from mpi4py import MPI
import time

N = 800  # fixed

def matmul_mpi():
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    if rank == 0:
        A = np.random.rand(N, N)
        B = np.random.rand(N, N)
    else:
        A = None
        B = np.empty((N, N))

    comm.Bcast(B, root=0)

    rows = N // size
    start = rank * rows
    end = N if rank == size - 1 else (rank + 1) * rows

    A_local = np.zeros((end - start, N))

    if rank == 0:
        for p in range(size):
            s = p * rows
            e = N if p == size - 1 else (p + 1) * rows
            if p == 0:
                A_local[:] = A[s:e, :]
            else:
                comm.Send(A[s:e, :], dest=p)
    else:
        comm.Recv(A_local, source=0)

    C_local = A_local @ B

    if rank == 0:
        C = np.zeros((N, N))
        C[start:end, :] = C_local
        for p in range(1, size):
            s = p * rows
            e = N if p == size - 1 else (p + 1) * rows
            comm.Recv(C[s:e, :], source=p)
        return C
    else:
        comm.Send(C_local, dest=0)
        return None

if __name__ == "__main__":
    t0 = time.time()
    C = matmul_mpi()

    if MPI.COMM_WORLD.Get_rank() == 0:
        print("MPI time =", time.time() - t0)


MPI time = 0.08471179008483887


In [1]:
pip install cupy-cuda12x



In [10]:
import numpy as np
import time

N = 80  # fixed size

def matmul_gpu(A, B):
    import cupy as cp
    A_gpu = cp.asarray(A)
    B_gpu = cp.asarray(B)
    C_gpu = A_gpu @ B_gpu
    return cp.asnumpy(C_gpu)

if __name__ == "__main__":
    A = np.random.rand(N, N)
    B = np.random.rand(N, N)

    t0 = time.time()
    C = matmul_gpu(A, B)
    print("GPU time =", time.time() - t0)


GPU time = 5.054694890975952
