In [1]:
from numba import njit, prange, set_num_threads
from functools import wraps
import numpy as np
import timeit
import time

set_num_threads(4)


def timeme(function):
    """
    Decorator to measure and print the execution time of a function.

    Example:
    >>> @timeme
    >>> def example_function():
    >>>    # Some code here
    >>>
    >>> example_function()  # Will print the execution time upon completion.
    """
    @wraps(function)
    def wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = function(*args, **kwargs)
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        print(f"{function.__name__} took {elapsed_time:.4f} seconds to run.")
        return result

    return wrapper

### Calculando multiplicação de matrizes

In [2]:
def sequential_matrix_multiplication(matrix_A, matrix_B):
    """
    Performs matrix multiplication between two numpy arrays.
    """
    if matrix_A.shape[1] != matrix_B.shape[0]:
        raise ValueError(
            "The number of columns of the first matrix must be equal to the number of rows of the second matrix."
        )

    rows_a = matrix_A.shape[0]
    cols_a = matrix_A.shape[1]
    rows_b = matrix_B.shape[0]
    cols_b = matrix_B.shape[1]

    if cols_a != rows_b:
        raise ValueError("O número de colunas da matriz A deve ser igual ao número de linhas da matriz B.")

    result_matrix = np.zeros((rows_a, cols_b))

    for row_idx in prange(rows_a):
        for col_idx in range(cols_b):
            for shared_dim in range(cols_a):
                result_matrix[row_idx][col_idx] += matrix_A[row_idx][shared_dim] * matrix_B[shared_dim][col_idx]

    return result_matrix


@njit(parallel=True)
def parallel_matrix_multiplication(matrix_A, matrix_B):

    if matrix_A.shape[1] != matrix_B.shape[0]:
        raise ValueError(
            "The number of columns of the first matrix must be equal to the number of rows of the second matrix."
        )

    rows_a = matrix_A.shape[0]
    cols_a = matrix_A.shape[1]
    rows_b = matrix_B.shape[0]
    cols_b = matrix_B.shape[1]

    if cols_a != rows_b:
        raise ValueError("O número de colunas da matriz A deve ser igual ao número de linhas da matriz B.")

    result_matrix = np.zeros((rows_a, cols_b))

    for row_idx in prange(rows_a):
        for col_idx in range(cols_b):
            for shared_dim in range(cols_a):
                result_matrix[row_idx][col_idx] += matrix_A[row_idx][shared_dim] * matrix_B[shared_dim][col_idx]

    return result_matrix

In [3]:
n = 500
matrix_A = np.random.randint(0, 100, size=(n, n))
matrix_B = np.random.randint(0, 100, size=(n, n))

# Medindo o tempo de execução da solução sequencial
@timeme
def run_sequential():
    sequential_matrix_multiplication(matrix_A, matrix_B)


# Medindo o tempo de execução da solução paralela
@timeme
def run_parallel():
    parallel_matrix_multiplication(matrix_A, matrix_B)

run_parallel()
run_sequential()

run_parallel took 1.1014 seconds to run.
run_sequential took 75.7429 seconds to run.


### Calculando o tempo de execução com número de threads diferentes

In [4]:
def run_parallel_with_threads(n_threads):
    set_num_threads(n_threads)
    run_parallel()


def test_parallel_function(function, threads_to_run):
    for n_threads in threads_to_run:
        print(f"Running with {n_threads} threads...")
        function(n_threads)

test_parallel_function(run_parallel_with_threads, [1, 2, 4, 8, 9, 16])

Running with 1 threads...
run_parallel took 0.1350 seconds to run.
Running with 2 threads...
run_parallel took 0.0664 seconds to run.
Running with 4 threads...
run_parallel took 0.0510 seconds to run.
Running with 8 threads...
run_parallel took 0.0608 seconds to run.
Running with 9 threads...
run_parallel took 0.0312 seconds to run.
Running with 16 threads...
run_parallel took 0.0269 seconds to run.


### Calculando o Speedup

In [5]:
def speedup(sequential_time, parallel_time):
    return sequential_time / parallel_time

times_to_run = 10
sequential_time = timeit.timeit(run_sequential, number=times_to_run) / times_to_run
parallel_time = timeit.timeit(run_parallel, number=times_to_run) / times_to_run

print(f"Speedup: {speedup(sequential_time, parallel_time)}")

run_sequential took 75.6771 seconds to run.
run_sequential took 75.3748 seconds to run.
run_sequential took 76.4751 seconds to run.
run_sequential took 76.3233 seconds to run.
run_sequential took 76.7240 seconds to run.
run_sequential took 75.7890 seconds to run.
run_sequential took 76.3227 seconds to run.
run_sequential took 75.6754 seconds to run.
run_sequential took 74.2315 seconds to run.
run_sequential took 74.7499 seconds to run.
run_parallel took 0.0619 seconds to run.
run_parallel took 0.0488 seconds to run.
run_parallel took 0.0332 seconds to run.
run_parallel took 0.0328 seconds to run.
run_parallel took 0.0329 seconds to run.
run_parallel took 0.0329 seconds to run.
run_parallel took 0.0300 seconds to run.
run_parallel took 0.0256 seconds to run.
run_parallel took 0.0845 seconds to run.
run_parallel took 0.0800 seconds to run.
Speedup: 1635.263377615618


In [6]:
import platform
import psutil
import cpuinfo
import os

def get_cpu_info():
    cpu_freq = psutil.cpu_freq()
    cpu_count = psutil.cpu_count(logical=False)
    cpu_cores = os.cpu_count()
    return f"CPU: {platform.processor()} | Clock: {cpu_freq.current:.2f}MHz | Cores: {cpu_count} | Threads: {cpu_cores}"

def get_cache_info():
    cpu_info = cpuinfo.get_cpu_info()
    cache_sizes = {
        "L1": cpu_info.get("l1_data_cache_size"),
        "L2": cpu_info.get("l2_cache_size"),
        "L3": cpu_info.get("l3_cache_size")
    }
    cache_info = ", ".join([f"{level} {size}" for level, size in cache_sizes.items() if size])
    return f"Cache: {cache_info}"

def get_memory_info():
    virtual_memory = psutil.virtual_memory()
    total_memory = virtual_memory.total / (1024 ** 3)  # Convert to GB
    return f"Memória RAM: {total_memory:.2f}GB"

def get_os_info():
    os_info = f"Sistema Operacional: {platform.system()} {platform.release()} ({platform.version()})"
    return os_info

def print_system_info():
    print(get_cpu_info())
    print(get_cache_info())
    print(get_memory_info())
    print(get_os_info())


print_system_info()

CPU: Intel64 Family 6 Model 154 Stepping 3, GenuineIntel | Clock: 2300.00MHz | Cores: 14 | Threads: 20
Cache: L2 4194304, L3 25165824
Memória RAM: 15.69GB
Sistema Operacional: Windows 10 (10.0.22621)


# Produto Escalar

In [17]:
set_num_threads(4)

def dot_product(u, v):
    result = 0
    for i in range(u.shape[0]):
        result += u[i] * v[i]
    return result


@njit(parallel=True)
def dot_product_parallel(u, v):
    result = 0
    for i in prange(u.shape[0]):
        result += u[i] * v[i]
    return result

In [18]:
u = np.random.rand(100)
v = np.random.rand(100)

@timeme
def run_dot_sequential():
    dot_product(u, v)


@timeme
def run_dot_parallel():
    dot_product_parallel(u, v)


run_dot_sequential()
run_dot_parallel()

run_dot_sequential took 0.0000 seconds to run.
run_dot_parallel took 0.3875 seconds to run.


In [19]:
def run_dot_parallel_with_threads(n_threads):
    set_num_threads(n_threads)
    run_dot_parallel()


test_parallel_function(run_parallel_with_threads, [1, 2, 4, 8, 9, 16])

Running with 1 threads...
run_parallel took 0.1598 seconds to run.
Running with 2 threads...
run_parallel took 0.0828 seconds to run.
Running with 4 threads...
run_parallel took 0.0527 seconds to run.
Running with 8 threads...
run_parallel took 0.0404 seconds to run.
Running with 9 threads...
run_parallel took 0.0420 seconds to run.
Running with 16 threads...
run_parallel took 0.0364 seconds to run.


In [20]:
sequential_time = timeit.timeit(run_dot_sequential, number=times_to_run) / times_to_run
parallel_time = timeit.timeit(run_dot_parallel, number=times_to_run) / times_to_run

print(f"Speedup: {speedup(sequential_time, parallel_time)}")

run_dot_sequential took 0.0001 seconds to run.
run_dot_sequential took 0.0001 seconds to run.
run_dot_sequential took 0.0001 seconds to run.
run_dot_sequential took 0.0001 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0005 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
Speedup: 0.4518975443330027


In [21]:
print_system_info()

CPU: Intel64 Family 6 Model 154 Stepping 3, GenuineIntel | Clock: 2300.00MHz | Cores: 14 | Threads: 20
Cache: L2 4194304, L3 25165824
Memória RAM: 15.69GB
Sistema Operacional: Windows 10 (10.0.22621)
