<a href="https://colab.research.google.com/github/diegomrodrigues/project_euler/blob/main/Euler_1_Tensorflow_DIstributed_Strategy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

# Define um limite para o nosso problema
limite = 1000

# Cria um tensor com todos os números naturais até 'limite-1' (999)
numeros = tf.range(limite)

# Cria uma máscara booleana para filtrar múltiplos de 3 ou 5
mascara = (numeros % 3 == 0) | (numeros % 5 == 0)

# Aplica a máscara para obter apenas os múltiplos de 3 ou 5
multiplos = tf.boolean_mask(numeros, mascara)

# Calcula a soma dos elementos filtrados
soma = tf.reduce_sum(multiplos)

# Executa o cálculo
print("A soma dos múltiplos de 3 ou 5 abaixo de 1000 é:", soma.numpy())

A soma dos múltiplos de 3 ou 5 abaixo de 1000 é: 233168


In [2]:
import tensorflow as tf

@tf.function
def soma_multiplos(limite):
    # Cria um tensor com todos os números naturais até 'limite-1'
    numeros = tf.range(limite)

    # Cria uma máscara booleana para filtrar múltiplos de 3 ou 5
    mascara = (numeros % 3 == 0) | (numeros % 5 == 0)

    # Aplica a máscara para obter apenas os múltiplos de 3 ou 5
    multiplos = tf.boolean_mask(numeros, mascara)

    # Calcula a soma dos elementos filtrados
    return tf.reduce_sum(multiplos)

# Executa a função tf.function para o problema específico
limite = 1000
soma = soma_multiplos(limite)
print("A soma dos múltiplos de 3 ou 5 abaixo de 1000 é:", soma.numpy())


A soma dos múltiplos de 3 ou 5 abaixo de 1000 é: 233168


In [3]:
import tensorflow as tf

@tf.function
def soma_multiplos_otimizada(limite):
    # Cria um tensor com todos os números naturais até 'limite-1'
    numeros = tf.range(limite)

    # Calcula a soma diretamente usando operações vetoriais
    # A condição produz um tensor de 0s e 1s, que são multiplicados pelos números e somados
    return tf.reduce_sum(numeros * tf.cast((numeros % 3 == 0) | (numeros % 5 == 0), tf.int32))

# Executa a função tf.function otimizada para o problema específico
limite = 1000
soma = soma_multiplos_otimizada(limite)
print("A soma dos múltiplos de 3 ou 5 abaixo de 1000 é:", soma.numpy())


A soma dos múltiplos de 3 ou 5 abaixo de 1000 é: 233168


In [7]:
import tensorflow as tf

# Define a estratégia de distribuição para múltiplas GPUs
strategy = tf.distribute.MirroredStrategy()

print('Número de dispositivos: {}'.format(strategy.num_replicas_in_sync))

# Cria o cálculo dentro do escopo da estratégia
with strategy.scope():
    @tf.function
    def soma_multiplos_otimizada(limite):
        # Cria um tensor com todos os números naturais até 'limite-1'
        numeros = tf.range(limite)

        # Calcula a soma diretamente usando operações vetoriais
        return tf.reduce_sum(numeros * tf.cast((numeros % 3 == 0) | (numeros % 5 == 0), tf.int32))

    # Define o limite para o problema específico
    limite = 10**9

# Executa a função otimizada em múltiplas GPUs
soma = soma_multiplos_otimizada(limite)
print("A soma dos múltiplos de 3 ou 5 abaixo de 1000 é:", soma.numpy())


Número de dispositivos: 1
A soma dos múltiplos de 3 ou 5 abaixo de 1000 é: 631780268


In [5]:
import tensorflow as tf

LIMIT = 10**16

strategy = tf.distribute.MirroredStrategy()
print('Número de dispositivos: {}'.format(strategy.num_replicas_in_sync))

with strategy.scope():
    @tf.function(jit_compile=True)
    def sum_multiples_optimized(limit):
        """
        Calcula a soma de todos os múltiplos de 3 ou 5 abaixo do limite especificado.

        Args:
            limit (int): O limite superior (exclusivo) até o qual calcular a soma.

        Returns:
            tf.Tensor: A soma dos múltiplos de 3 ou 5 abaixo do limite.
        """
        numbers = tf.range(limit)
        is_multiple_of_3_or_5 = tf.math.logical_or(tf.math.mod(numbers, 3) == 0,
                                                   tf.math.mod(numbers, 5) == 0)
        multiples = numbers * tf.cast(is_multiple_of_3_or_5, tf.int64)
        return tf.reduce_sum(multiples)

sum_result = sum_multiples_optimized(LIMIT)
print(f"A soma dos múltiplos de 3 ou 5 abaixo de {LIMIT} é: {sum_result.numpy()}")

Número de dispositivos: 1


FailedPreconditionError: Number of physical blocks (70368744177664) does not fit in an i32 in tiling scheme: dims_in_elems = {1, 70368744177664, 143}, tile_sizes = {1, 1, 16}, num_threads = {1, 1, 32}, indexing_order = strided, vector_size = 1, thread_id_virtual_scaling = 1, tiling_dimensions = {1, 2} [Op:__inference_sum_multiples_optimized_119]

In [6]:
import tensorflow as tf

# Define a estratégia de distribuição para múltiplas GPUs
strategy = tf.distribute.MirroredStrategy()

print('Número de dispositivos: {}'.format(strategy.num_replicas_in_sync))

# Define o limite para o problema específico
LIMIT = 10**16

# Cria o cálculo dentro do escopo da estratégia
with strategy.scope():
    @tf.function(jit_compile=True)
    def sum_multiples_optimized(limit):
        """
        Calcula a soma de todos os múltiplos de 3 ou 5 abaixo do limite especificado.

        Args:
            limit (int): O limite superior (exclusivo) até o qual calcular a soma.

        Returns:
            tf.Tensor: A soma dos múltiplos de 3 ou 5 abaixo do limite.
        """
        numbers = tf.range(limit)
        is_multiple_of_3_or_5 = tf.math.logical_or(tf.math.mod(numbers, 3) == 0,
                                                   tf.math.mod(numbers, 5) == 0)
        multiples = numbers * tf.cast(is_multiple_of_3_or_5, tf.int64)
        return tf.reduce_sum(multiples)

# Executa a função otimizada em múltiplas GPUs
sum_result = sum_multiples_optimized(LIMIT)
print(f"A soma dos múltiplos de 3 ou 5 abaixo de {LIMIT} é: {sum_result.numpy()}")

Número de dispositivos: 1


FailedPreconditionError: Number of physical blocks (70368744177664) does not fit in an i32 in tiling scheme: dims_in_elems = {1, 70368744177664, 143}, tile_sizes = {1, 1, 16}, num_threads = {1, 1, 32}, indexing_order = strided, vector_size = 1, thread_id_virtual_scaling = 1, tiling_dimensions = {1, 2} [Op:__inference_sum_multiples_optimized_144]

In [None]:
import tensorflow as tf

# Define a estratégia de distribuição para múltiplas GPUs
strategy = tf.distribute.MirroredStrategy()

print('Número de dispositivos: {}'.format(strategy.num_replicas_in_sync))

# Define o limite para o problema específico
LIMIT = 10**16
CHUNK_SIZE = 10**12  # Define o tamanho do bloco para cada GPU processar

# Cria o cálculo dentro do escopo da estratégia
with strategy.scope():
    @tf.function(jit_compile=True)
    def compute_chunk(start, end):
        """ Computa a soma dos múltiplos de 3 ou 5 em um intervalo específico. """
        sum_multiples = tf.constant(0, dtype=tf.int64)
        for number in tf.range(start, end):
            if tf.math.logical_or(number % 3 == 0, number % 5 == 0):
                sum_multiples += number
        return sum_multiples

    def sum_multiples_optimized(limit):
        """ Distribui o cálculo da soma dos múltiplos de 3 ou 5 por múltiplas GPUs. """
        num_chunks = (limit + CHUNK_SIZE - 1) // CHUNK_SIZE
        results = []

        for chunk_index in range(num_chunks):
            start = chunk_index * CHUNK_SIZE
            end = tf.minimum((chunk_index + 1) * CHUNK_SIZE, limit)
            results.append(strategy.run(compute_chunk, args=(start, end)))

        # Soma os resultados de todos os chunks
        total_sum = tf.reduce_sum(strategy.experimental_local_results(results))
        return total_sum

# Executa a função otimizada
sum_result = sum_multiples_optimized(LIMIT)
print(f"A soma dos múltiplos de 3 ou 5 abaixo de {LIMIT} é: {sum_result.numpy()}")


Número de dispositivos: 1




In [1]:
import tensorflow as tf

# Define the distribution strategy for multiple GPUs
strategy = tf.distribute.MirroredStrategy()

print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

# Define the limit for the specific problem
LIMIT = 10**16
BATCH_SIZE = 10**8  # Adjustable based on available memory and GPU count

# Utilizing vectorized operations for batch processing
@tf.function(jit_compile=True)
def compute_multiples_sum(batch_start, batch_end):
    numbers = tf.range(batch_start, batch_end, dtype=tf.int64)
    mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
    selected_numbers = tf.boolean_mask(numbers, mask)
    return tf.reduce_sum(selected_numbers)

# Distributing the batch computation across multiple GPUs
def distributed_sum_multiples(limit):
    num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
    total_sum = tf.constant(0, dtype=tf.int64)

    for batch_index in tf.range(num_batches):
        batch_start = batch_index * BATCH_SIZE
        batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
        batch_sum = strategy.run(compute_multiples_sum, args=(batch_start, batch_end))
        total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

    return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


Number of devices: 1


InvalidArgumentError: Requires start <= limit when delta > 0: 1900000000/1874919424

Stack trace for op definition: 
File "/usr/lib/python3.10/threading.py", line 973, in _bootstrap
File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
File "<ipython-input-1-56c8d46c0f38>", line 15, in compute_multiples_sum

	 [[{{node range}}]]
	tf2xla conversion failed while converting __inference_compute_multiples_sum_67[_XlaMustCompile=true,config_proto=6001324581131673121,executor_type=11160318154034397263]. Run with TF_DUMP_GRAPH_PREFIX=/path/to/dump/dir and --vmodule=xla_compiler=2 to obtain a dump of the compiled functions. [Op:__inference_compute_multiples_sum_67]

In [3]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))

# Initialize TensorFlow distribution strategy
strategy = tf.distribute.MirroredStrategy()

# Define the limit for the computation
LIMIT = 10**16  # Reduced for demonstration, adjust as needed based on memory constraints
BATCH_SIZE = 10**5  # This size is reasonable for demonstration purposes

@tf.function
def compute_multiples_sum(batch_start, batch_end):
    numbers = tf.range(batch_start, batch_end, dtype=tf.int64)
    mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
    selected_numbers = tf.boolean_mask(numbers, mask)
    return tf.reduce_sum(selected_numbers)

# Using TensorFlow distribution strategy
with strategy.scope():
    def distributed_sum_multiples(limit):
        num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
        total_sum = tf.constant(0, dtype=tf.int64)

        for batch_index in tf.range(num_batches):
            batch_start = batch_index * BATCH_SIZE
            batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
            batch_sum = strategy.run(compute_multiples_sum, args=(batch_start, batch_end))
            total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

        return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


TensorFlow version: 2.15.0
Number of GPUs available: 1


ResourceExhaustedError: {{function_node __wrapped__Range_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[100000000000] and type int64 on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node Range}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:Range] name: 

In [6]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))

# Initialize TensorFlow distribution strategy
strategy = tf.distribute.MirroredStrategy()

# Define the limit for the computation
LIMIT = 10**10  # Adjust as needed
BATCH_SIZE = 10**5  # Reduce if necessary

def compute_multiples_sum(batch_start, batch_end):
    # Explicitly place the range operation on CPU
    with tf.device('/CPU:0'):
        numbers = tf.range(batch_start, batch_end, dtype=tf.int32)  # Using tf.int32 to save memory
    mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
    selected_numbers = tf.boolean_mask(numbers, mask)
    return tf.reduce_sum(selected_numbers)

# Using TensorFlow distribution strategy
with strategy.scope():
    @tf.function
    def distributed_sum_multiples(limit):
        num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
        total_sum = tf.constant(0, dtype=tf.int32)  # Using tf.int32 to save memory

        for batch_index in tf.range(num_batches):
            batch_start = batch_index * BATCH_SIZE
            batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
            batch_sum = strategy.run(compute_multiples_sum, args=(batch_start, batch_end))
            total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

        return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


TensorFlow version: 2.15.0
Number of GPUs available: 1


InvalidArgumentError: Graph execution error:

Detected at node while/range defined at (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 973, in _bootstrap

  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner

  File "<ipython-input-4-6e1f0a01843c>", line 16, in compute_multiples_sum

Detected at node while/range defined at (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 973, in _bootstrap

  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner

  File "<ipython-input-4-6e1f0a01843c>", line 16, in compute_multiples_sum

2 root error(s) found.
  (0) INVALID_ARGUMENT:  Requires start <= limit when delta > 0: 1410100000/1410065408
	 [[{{node while/range}}]]
	 [[while/body/_1/while/range/_26]]
  (1) INVALID_ARGUMENT:  Requires start <= limit when delta > 0: 1410100000/1410065408
	 [[{{node while/range}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_sum_multiples_2230]

In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))

# Initialize TensorFlow distribution strategy
strategy = tf.distribute.MirroredStrategy()

# Define the limit for the computation
LIMIT = 10**12  # Adjust as needed
BATCH_SIZE = 10**5  # Reduce if necessary, consider dynamic adjustment

def compute_multiples_sum(batch_start, batch_end):
    # Place the range operation explicitly on CPU to save GPU memory
    with tf.device('/CPU:0'):
        numbers = tf.range(batch_start, batch_end, dtype=tf.int32)  # Using tf.int32
    mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
    selected_numbers = tf.boolean_mask(numbers, mask)
    return tf.reduce_sum(selected_numbers)

# Using TensorFlow distribution strategy
with strategy.scope():
    @tf.function
    def distributed_sum_multiples(limit):
        num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
        total_sum = tf.constant(0, dtype=tf.int32)

        for batch_index in tf.range(num_batches):
            batch_start = batch_index * BATCH_SIZE
            batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
            if batch_start < batch_end:  # Ensure batch_start is less than batch_end
                batch_sum = strategy.run(compute_multiples_sum, args=(batch_start, batch_end))
                total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

        return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


TensorFlow version: 2.15.0
Number of GPUs available: 1


In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))

# Initialize TensorFlow distribution strategy
strategy = tf.distribute.MirroredStrategy()

# Define the limit for the computation
LIMIT = 10**12  # Adjust as needed
BATCH_SIZE = 10**5  # Reduce if necessary, consider dynamic adjustment

def compute_multiples_sum(batch_start, batch_end):
    # Execute range operation on CPU to manage memory use more efficiently
    with tf.device('/CPU:0'):
        numbers = tf.range(batch_start, batch_end, dtype=tf.int32)  # Using tf.int32

    # Execute mathematical operations on GPU for better performance
    with tf.device('/GPU:0'):
        mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
        selected_numbers = tf.boolean_mask(numbers, mask)
        return tf.reduce_sum(selected_numbers)

# Using TensorFlow distribution strategy
with strategy.scope():
    @tf.function
    def distributed_sum_multiples(limit):
        num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
        total_sum = tf.constant(0, dtype=tf.int32)

        for batch_index in tf.range(num_batches):
            batch_start = batch_index * BATCH_SIZE
            batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
            if batch_start < batch_end:  # Ensure batch_start is less than batch_end
                batch_sum = strategy.run(compute_multiples_sum, args=(batch_start, batch_end))
                total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

        return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


TensorFlow version: 2.15.0
Number of GPUs available: 1


In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Number of GPUs available:", len(tf.config.list_physical_devices('GPU')))

# Initialize TensorFlow distribution strategy
strategy = tf.distribute.MirroredStrategy()

# Define the limit for the computation
LIMIT = 10**12
BATCH_SIZE = 10**5

def prepare_data(batch_start, batch_end):
    # This function runs on CPU for data preparation
    with tf.device('/CPU:0'):
        numbers = tf.range(batch_start, batch_end, dtype=tf.int32)
        mask = tf.logical_or(tf.equal(numbers % 3, 0), tf.equal(numbers % 5, 0))
        selected_numbers = tf.boolean_mask(numbers, mask)
    return selected_numbers

def sum_numbers(selected_numbers):
    # This function runs on GPU for computation
    with tf.device('/GPU:0'):
        return tf.reduce_sum(selected_numbers)

# Using TensorFlow distribution strategy
with strategy.scope():
    @tf.function
    def distributed_sum_multiples(limit):
        num_batches = (limit + BATCH_SIZE - 1) // BATCH_SIZE
        total_sum = tf.constant(0, dtype=tf.int32)

        for batch_index in tf.range(num_batches):
            batch_start = batch_index * BATCH_SIZE
            batch_end = tf.minimum((batch_index + 1) * BATCH_SIZE, limit)
            if batch_start < batch_end:  # Ensure batch_start is less than batch_end
                selected_numbers = strategy.run(prepare_data, args=(batch_start, batch_end))
                batch_sum = strategy.run(sum_numbers, args=(selected_numbers,))
                total_sum += strategy.reduce(tf.distribute.ReduceOp.SUM, batch_sum, axis=None)

        return total_sum

# Execute the optimized function
sum_result = distributed_sum_multiples(LIMIT)
print(f"The sum of multiples of 3 or 5 below {LIMIT} is: {sum_result.numpy()}")


TensorFlow version: 2.15.0
Number of GPUs available: 1
