In [1]:
from numba import njit, prange, set_num_threads
from functools import wraps
import numpy as np
import timeit
import time

set_num_threads(4)


def timeme(function):
    """
    Decorator to measure and print the execution time of a function.

    This decorator wraps the provided function and measures its execution time
    using the time.perf_counter() function. Upon completion, it prints the
    function name and the elapsed time in seconds with 4 decimal places.

    Args:
    function (Callable): The function to be timed.

    Returns:
    Callable: The wrapped function with added timing functionality.

    Example:
    >>> @timeme
    >>> def example_function():
    >>>    # Some code here
    >>>
    >>> example_function()  # Will print the execution time upon completion.
    """
    @wraps(function)
    def wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = function(*args, **kwargs)
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        print(f"{function.__name__} took {elapsed_time:.4f} seconds to run.")
        return result

    return wrapper

### Calculando multiplicação de matrizes

In [2]:
def sequential_matrix_multiplication(matrix_A, matrix_B):
    """
    Performs matrix multiplication between two numpy arrays.

    Args:
    matrix_A (numpy.ndarray): The first matrix, with dimensions (rows_A, cols_A).
    matrix_B (numpy.ndarray): The second matrix, with dimensions (rows_B, cols_B).

    Returns:
    numpy.ndarray: The resulting matrix after multiplication, with dimensions (rows_A, cols_B).

    Raises:
    ValueError: If the number of columns in matrix_A does not match the number of rows in matrix_B.

    Example:

    >>> import numpy as np
    >>>
    >>> matrix_A = np.array([
    >>>    [1, 2, 3],
    >>>    [4, 5, 6]
    >>> ])
    >>>
    >>> matrix_B = np.array([
    >>>    [7, 8],
    >>>    [9, 10],
    >>>    [11, 12]
    >>> ])
    >>>
    >>> result_matrix = matrix_multiply(matrix_A, matrix_B)

    result_matrix would be:
    array([[ 58,  64],
           [139, 154]])
    """
    if matrix_A.shape[1] != matrix_B.shape[0]:
        raise ValueError(
            "The number of columns of the first matrix must be equal to the number of rows of the second matrix."
        )

    rows_a = matrix_A.shape[0]
    cols_a = matrix_A.shape[1]
    rows_b = matrix_B.shape[0]
    cols_b = matrix_B.shape[1]

    if cols_a != rows_b:
        raise ValueError("O número de colunas da matriz A deve ser igual ao número de linhas da matriz B.")

    result_matrix = np.zeros((rows_a, cols_b))

    for row_idx in prange(rows_a):
        for col_idx in range(cols_b):
            for shared_dim in range(cols_a):
                result_matrix[row_idx][col_idx] += matrix_A[row_idx][shared_dim] * matrix_B[shared_dim][col_idx]

    return result_matrix


@njit(parallel=True)
def parallel_matrix_multiplication(matrix_A, matrix_B):

    if matrix_A.shape[1] != matrix_B.shape[0]:
        raise ValueError(
            "The number of columns of the first matrix must be equal to the number of rows of the second matrix."
        )

    rows_a = matrix_A.shape[0]
    cols_a = matrix_A.shape[1]
    rows_b = matrix_B.shape[0]
    cols_b = matrix_B.shape[1]

    if cols_a != rows_b:
        raise ValueError("O número de colunas da matriz A deve ser igual ao número de linhas da matriz B.")

    result_matrix = np.zeros((rows_a, cols_b))

    for row_idx in prange(rows_a):
        for col_idx in range(cols_b):
            for shared_dim in range(cols_a):
                result_matrix[row_idx][col_idx] += matrix_A[row_idx][shared_dim] * matrix_B[shared_dim][col_idx]

    return result_matrix

In [3]:
n = 500
matrix_A = np.random.randint(0, 100, size=(n, n))
matrix_B = np.random.randint(0, 100, size=(n, n))

# Medindo o tempo de execução da solução sequencial
@timeme
def run_sequential():
    sequential_matrix_multiplication(matrix_A, matrix_B)


# Medindo o tempo de execução da solução paralela
@timeme
def run_parallel():
    parallel_matrix_multiplication(matrix_A, matrix_B)

run_parallel()
run_sequential()

### Calculando o tempo de execução com número de threads diferentes

In [6]:
def run_parallel_with_threads(n_threads):
    set_num_threads(n_threads)
    run_parallel()

run_parallel_with_threads(1)
run_parallel_with_threads(2)
run_parallel_with_threads(4)
run_parallel_with_threads(8)
run_parallel_with_threads(9)
run_parallel_with_threads(16)

run_parallel took 0.0988 seconds to run.
run_parallel took 0.0530 seconds to run.
run_parallel took 0.0273 seconds to run.
run_parallel took 0.0279 seconds to run.
run_parallel took 0.0265 seconds to run.
run_parallel took 0.0185 seconds to run.


### Calculando o Speedup

In [7]:
def speedup(sequential_time, parallel_time):
    return sequential_time / parallel_time

sequential_time = timeit.timeit(run_sequential, number=10) / 10
parallel_time = timeit.timeit(run_parallel, number=10) / 10

print(f"Speedup: {speedup(sequential_time, parallel_time)}")

run_sequential took 55.5915 seconds to run.
run_sequential took 54.0645 seconds to run.
run_sequential took 53.5986 seconds to run.
run_sequential took 53.0502 seconds to run.
run_sequential took 53.2894 seconds to run.
run_sequential took 52.7536 seconds to run.
run_sequential took 53.4580 seconds to run.
run_sequential took 53.0444 seconds to run.
run_sequential took 53.2295 seconds to run.
run_sequential took 54.2504 seconds to run.
run_parallel took 0.0154 seconds to run.
run_parallel took 0.0147 seconds to run.
run_parallel took 0.0157 seconds to run.
run_parallel took 0.0151 seconds to run.
run_parallel took 0.0178 seconds to run.
run_parallel took 0.0153 seconds to run.
run_parallel took 0.0148 seconds to run.
run_parallel took 0.0165 seconds to run.
run_parallel took 0.0155 seconds to run.
run_parallel took 0.0145 seconds to run.
Speedup: 3449.312169184997


In [11]:
import platform
import psutil
import cpuinfo
import os

def get_cpu_info():
    cpu_freq = psutil.cpu_freq()
    cpu_count = psutil.cpu_count(logical=False)
    cpu_cores = os.cpu_count()
    return f"CPU: {platform.processor()} | Clock: {cpu_freq.current:.2f}MHz | Cores: {cpu_count} | Threads: {cpu_cores}"

def get_cache_info():
    cpu_info = cpuinfo.get_cpu_info()
    cache_sizes = {
        "L1": cpu_info.get("l1_data_cache_size"),
        "L2": cpu_info.get("l2_cache_size"),
        "L3": cpu_info.get("l3_cache_size")
    }
    cache_info = ", ".join([f"{level} {size}" for level, size in cache_sizes.items() if size])
    return f"Cache: {cache_info}"

def get_memory_info():
    virtual_memory = psutil.virtual_memory()
    total_memory = virtual_memory.total / (1024 ** 3)  # Convert to GB
    return f"Memória RAM: {total_memory:.2f}GB"

def get_os_info():
    os_info = f"Sistema Operacional: {platform.system()} {platform.release()} ({platform.version()})"
    return os_info

def print_system_info():
    print(get_cpu_info())
    print(get_cache_info())
    print(get_memory_info())
    print(get_os_info())


print_system_info()

CPU: Intel64 Family 6 Model 154 Stepping 3, GenuineIntel | Clock: 2300.00MHz | Cores: 14 | Threads: 20
Cache: L2 4194304, L3 25165824
Memória RAM: 15.69GB
Sistema Operacional: Windows 10 (10.0.22621)


# Produto Escalar

In [18]:
set_num_threads(4)

def dot_product(u, v):
    return sum(u_i * v_i for u_i, v_i in zip(u, v))


@njit(parallel=True)
def dot_product_parallel(u, v, num_threads=4):
    result = 0
    for i in prange(u.shape[0]):
        result += u[i] * v[i]
    return result

In [19]:
u = np.random.rand(100)
v = np.random.rand(100)

@timeme
def run_dot_sequential():
    dot_product(u, v)


@timeme
def run_dot_parallel():
    dot_product_parallel(u, v)


run_dot_sequential()
run_dot_parallel()

run_dot_sequential took 0.0001 seconds to run.
run_dot_parallel took 0.2663 seconds to run.


In [21]:
def run_dot_parallel_with_threads(n_threads):
    set_num_threads(n_threads)
    run_dot_parallel()

run_dot_parallel_with_threads(1)
run_dot_parallel_with_threads(2)
run_dot_parallel_with_threads(4)
run_dot_parallel_with_threads(8)
run_dot_parallel_with_threads(16)

run_dot_parallel took 0.0003 seconds to run.
run_dot_parallel took 0.0006 seconds to run.
run_dot_parallel took 0.0005 seconds to run.
run_dot_parallel took 0.0009 seconds to run.
run_dot_parallel took 0.0014 seconds to run.
CPU: Intel64 Family 6 Model 154 Stepping 3, GenuineIntel | Clock: 2300.00MHz | Cores: 14 | Threads: 20
Cache: L2 4194304, L3 25165824
Memória RAM: 15.69GB
Sistema Operacional: Windows 10 (10.0.22621)


In [24]:
sequential_time = timeit.timeit(run_dot_sequential, number=10) / 10
parallel_time = timeit.timeit(run_dot_parallel, number=10) / 10

print(f"Speedup: {speedup(sequential_time, parallel_time)}")

run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_sequential took 0.0000 seconds to run.
run_dot_parallel took 0.0005 seconds to run.
run_dot_parallel took 0.0003 seconds to run.
run_dot_parallel took 0.0002 seconds to run.
run_dot_parallel took 0.0001 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
run_dot_parallel took 0.0000 seconds to run.
Speedup: 0.31305791595286303


In [25]:
print_system_info()

CPU: Intel64 Family 6 Model 154 Stepping 3, GenuineIntel | Clock: 2300.00MHz | Cores: 14 | Threads: 20
Cache: L2 4194304, L3 25165824
Memória RAM: 15.69GB
Sistema Operacional: Windows 10 (10.0.22621)
