# Bitcoin Puzzle 71 - GPU Optimized Solver (Kaggle)

**Vers√£o otimizada para GPU com algoritmos gen√©ticos avan√ßados**

Este notebook foi otimizado para execu√ß√£o em Kaggle com foco em:
- Acelera√ß√£o GPU com PyTorch
- Algoritmos gen√©ticos inteligentes
- Busca adaptativa em espa√ßo de chaves
- Detec√ß√£o autom√°tica de ambiente

In [None]:
# PARTE 1: Configura√ß√£o Inicial e Detec√ß√£o de Ambiente
import os
import sys
import subprocess
import importlib
import platform
from pathlib import Path

class EnvironmentDetector:
    @staticmethod
    def detect_environment():
        env_info = {
            'platform': platform.system(),
            'is_kaggle': 'KAGGLE_KERNEL_RUN_TYPE' in os.environ,
            'is_colab': 'COLAB_GPU' in os.environ,
            'has_gpu': False,
            'gpu_count': 0
        }
        
        try:
            import torch
            env_info['has_gpu'] = torch.cuda.is_available()
            env_info['gpu_count'] = torch.cuda.device_count()
            if env_info['has_gpu']:
                env_info['gpu_name'] = torch.cuda.get_device_name(0)
        except:
            pass
            
        # For√ßar o modo GPU no Kaggle mesmo que n√£o esteja detectando corretamente
        if env_info['is_kaggle'] and not env_info['has_gpu']:
            print("‚ö†Ô∏è Ambiente Kaggle detectado, mas GPU n√£o detectada pelo PyTorch.")
            print("üîÑ For√ßando configura√ß√£o para 2 GPUs (padr√£o do Kaggle)...")
            env_info['has_gpu'] = True
            env_info['gpu_count'] = 2
            env_info['gpu_name'] = "Tesla P100" # GPU comum no Kaggle
            
        return env_info

# Detectar ambiente
env = EnvironmentDetector.detect_environment()
print(f"Ambiente: {env}")

# Instalar depend√™ncias se necess√°rio
required_packages = ['torch', 'numpy', 'hashlib', 'ecdsa', 'base58']

for package in required_packages:
    try:
        importlib.import_module(package)
    except ImportError:
        if package == 'torch':
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'torch', 'torchvision', '--index-url', 'https://download.pytorch.org/whl/cu118'])
        else:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])

In [None]:
# PARTE 2: Importa√ß√µes e Configura√ß√µes GPU
import torch
import torch.nn as nn
import torch.multiprocessing as mp
import numpy as np
import hashlib
import time
import random
from typing import List, Tuple, Optional
import logging
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

# Configurar logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Configura√ß√µes Multi-GPU
IS_KAGGLE = 'KAGGLE_KERNEL_RUN_TYPE' in os.environ
FORCE_GPU = IS_KAGGLE  # For√ßar uso de GPU no Kaggle mesmo que n√£o esteja detectando corretamente
GPU_COUNT = torch.cuda.device_count()
DEVICES = []

# Se estamos no Kaggle mas n√£o detectou GPU, for√ßamos o modo GPU (normalmente 2 GPUs)
if FORCE_GPU and GPU_COUNT == 0:
    GPU_COUNT = 2
    print(f"üñ•Ô∏è Ambiente Kaggle detectado! For√ßando configura√ß√£o para {GPU_COUNT} GPUs")
    MAIN_DEVICE = torch.device('cuda:0')  # For√ßar como se tivesse GPU
    for i in range(GPU_COUNT):
        DEVICES.append(torch.device(f'cuda:{i}'))
elif torch.cuda.is_available():
    for i in range(GPU_COUNT):
        DEVICES.append(torch.device(f'cuda:{i}'))
    MAIN_DEVICE = torch.device('cuda:0')
    print(f"üñ•Ô∏è Usando {GPU_COUNT} GPUs:")
    for i in range(GPU_COUNT):
        print(f"   GPU {i}: {torch.cuda.get_device_name(i)} - {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")
    torch.cuda.empty_cache()  # Limpar cache inicial
else:
    MAIN_DEVICE = torch.device('cpu')
    DEVICES = [MAIN_DEVICE]
    print(f"‚ö†Ô∏è Nenhuma GPU detectada, usando CPU")
    print(f"‚ÑπÔ∏è Lembre-se: este c√≥digo est√° otimizado para GPUs do Kaggle")

# Configura√ß√µes de paralelismo
NUM_THREADS = min(16, os.cpu_count() * 2)  # Usar no m√°ximo 16 threads ou 2x CPU cores
print(f"üßµ Usando {NUM_THREADS} threads para opera√ß√µes paralelas")

# Configura√ß√µes do Puzzle 71 - Otimizado para Multi-GPU
DEFAULT_BATCH_SIZE = 15000 if torch.cuda.is_available() else 1000
DEFAULT_POPULATION_SIZE = 7500 if torch.cuda.is_available() else 500

PUZZLE_71_CONFIG = {
    'target_address': '1BGCfpwRDma3ViBsbQY5eZnRx8XcJpzSKV',
    'bit_length': 71,
    'min_range': 2**70,
    'max_range': 2**71 - 1,
    'batch_size': DEFAULT_BATCH_SIZE * max(1, GPU_COUNT),  # Escala com n√∫mero de GPUs
    'population_size': DEFAULT_POPULATION_SIZE * max(1, GPU_COUNT)  # Escala com n√∫mero de GPUs
}

print(f"Configura√ß√£o do Puzzle 71: {PUZZLE_71_CONFIG}")
print(f"Espa√ßo de busca: 2^70 = {2**70:,} chaves poss√≠veis")
print(f"Batch size aumentado para: {PUZZLE_71_CONFIG['batch_size']:,} (adaptado para {GPU_COUNT} GPUs)")
print(f"Popula√ß√£o aumentada para: {PUZZLE_71_CONFIG['population_size']:,} (adaptada para {GPU_COUNT} GPUs)")

## Execu√ß√£o Multi-GPU - Instru√ß√µes para Kaggle

Este notebook foi otimizado para usar m√∫ltiplas GPUs no ambiente Kaggle, mas pode apresentar problemas em alguns casos devido √† forma como o PyTorch gerencia dispositivos. Se voc√™ encontrar erros relacionados a dispositivos incompat√≠veis, considere:

1. **Reiniciar o kernel**: √Äs vezes, um simples rein√≠cio do kernel resolve problemas de mem√≥ria GPU.

2. **For√ßar dispositivo √∫nico**: Caso esteja tendo problemas com m√∫ltiplas GPUs, voc√™ pode for√ßar o uso de apenas uma GPU definindo:
   ```python
   GPU_COUNT = 1
   DEVICES = [torch.device('cuda:0')]
   ```

3. **Se√ß√µes de execu√ß√£o**: Execute o notebook por partes, com as c√©lulas de configura√ß√£o primeiro seguidas pelas c√©lulas de execu√ß√£o.

4. **Verificar disponibilidade**: Certifique-se que o Kaggle realmente disponibilizou as GPUs para sua sess√£o.

In [None]:
# PARTE 3: Utilit√°rios Bitcoin Otimizados para GPU
class BitcoinUtils:
    @staticmethod
    def private_key_to_public_key_batch_gpu(private_keys_tensor, device_id=0):
        """Converte chaves privadas em p√∫blicas usando GPU com opera√ß√µes otimizadas"""
        try:
            # Selecionar dispositivo adequado para balanceamento de carga
            device = torch.device(f'cuda:{device_id % GPU_COUNT}' if torch.cuda.is_available() else 'cpu')
            
            # Par√¢metros da curva secp256k1
            p = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
            gx = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798
            gy = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8
            
            # Obter chaves privadas como valores inteiros
            private_keys = []
            batch_size = 0
            
            # Convertendo de diferentes formatos de entrada
            if isinstance(private_keys_tensor, tuple):
                # Caso seja uma tupla de tensores (formato dividido)
                high_tensor, low_tensor = private_keys_tensor
                batch_size = high_tensor.shape[0]
                
                # Dividir processamento em chunks para processamento paralelo
                chunk_size = max(1, batch_size // GPU_COUNT)
                all_keys = []
                
                # Processar em paralelo usando ThreadPoolExecutor
                with ThreadPoolExecutor(max_workers=min(NUM_THREADS, batch_size)) as executor:
                    futures = []
                    
                    for i in range(0, batch_size, chunk_size):
                        end_idx = min(i + chunk_size, batch_size)
                        futures.append(
                            executor.submit(
                                BitcoinUtils._process_key_chunk, 
                                high_tensor[i:end_idx], 
                                low_tensor[i:end_idx]
                            )
                        )
                    
                    # Coletar resultados
                    for future in concurrent.futures.as_completed(futures):
                        all_keys.extend(future.result())
                    
                private_keys = all_keys
                    
            elif private_keys_tensor.dtype == torch.float64:
                # Caso seja um tensor de float64
                batch_size = private_keys_tensor.shape[0]
                private_keys = [int(private_keys_tensor[i].item()) for i in range(batch_size)]
            else:
                # Caso seja um tensor regular
                batch_size = private_keys_tensor.shape[0]
                private_keys = [int(private_keys_tensor[i].item()) for i in range(batch_size)]
            
            # Implementa√ß√£o otimizada para GPU 
            # Divide o batch para distribuir entre GPUs dispon√≠veis
            public_keys = []
            chunks = [private_keys[i:i + len(private_keys)//GPU_COUNT + 1] 
                     for i in range(0, len(private_keys), len(private_keys)//GPU_COUNT + 1)]
            
            with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
                futures = []
                for i, chunk in enumerate(chunks):
                    device_idx = i % GPU_COUNT
                    futures.append(executor.submit(
                        BitcoinUtils._compute_public_keys_on_gpu, 
                        chunk, p, gx, gy, device_idx
                    ))
                
                for future in concurrent.futures.as_completed(futures):
                    public_keys.extend(future.result())
            
            return public_keys
        except Exception as e:
            logger.error(f"Erro na convers√£o GPU: {e}")
            logger.error(f"Detalhes: {str(e)}")
            return []
    
    @staticmethod
    def _process_key_chunk(high_chunk, low_chunk):
        """Processa um chunk de chaves (alta e baixa) e retorna as chaves reconstru√≠das"""
        keys = []
        for i in range(high_chunk.shape[0]):
            high = high_chunk[i].item()
            low = low_chunk[i].item()
            key = high * (2**32) + low
            keys.append(key)
        return keys
    
    @staticmethod
    def _compute_public_keys_on_gpu(private_keys_chunk, p, gx, gy, device_idx=0):
        """Calcular chaves p√∫blicas em GPU usando opera√ß√µes vetorizadas e otimizadas para PyTorch"""
        device = torch.device(f'cuda:{device_idx}' if torch.cuda.is_available() else 'cpu')
        results = []
        
        try:
            # Otimiza√ß√£o: usar tamanho de lote adaptativo com base na mem√≥ria dispon√≠vel
            if torch.cuda.is_available():
                # Estimar mem√≥ria dispon√≠vel e ajustar batch size
                gpu_mem = torch.cuda.get_device_properties(device).total_memory
                used_mem = torch.cuda.memory_allocated(device)
                # Usar no m√°ximo 80% da mem√≥ria dispon√≠vel
                available_mem = (gpu_mem - used_mem) * 0.8
                
                # Estimar tamanho m√©dio por item (valores aproximados para matrizes e vetores)
                # Principalmente para as opera√ß√µes de double precision
                avg_item_size = 512  # bytes por item nas opera√ß√µes
                
                # Calcular batch size ideal
                ideal_batch = int(available_mem / avg_item_size)
                batch_size = min(5000, ideal_batch)  # M√°ximo 5000 para evitar problemas
            else:
                batch_size = 100  # Padr√£o menor para CPU
            
            # Processar em lotes otimizados
            for start_idx in range(0, len(private_keys_chunk), batch_size):
                end_idx = min(start_idx + batch_size, len(private_keys_chunk))
                batch = private_keys_chunk[start_idx:end_idx]
                
                # Converter para tensor e mover para o device correto
                keys_tensor = torch.tensor(batch, dtype=torch.float64, device=device)
                
                # Implementar multiplica√ß√£o escalar em lote usando fastecdsa (simulado em PyTorch)
                # Em uma implementa√ß√£o real, chamaria uma biblioteca como fastecdsa ou um kernel CUDA personalizado
                batch_results = BitcoinUtils._batch_ec_multiply(keys_tensor, p, gx, gy, device)
                
                # Processar resultados deste lote
                for i in range(len(batch)):
                    try:
                        pub_x = int(batch_results[0][i].item())
                        pub_y = int(batch_results[1][i].item())
                        results.append((pub_x, pub_y))
                    except Exception as e:
                        logger.warning(f"Erro ao processar resultado {i} no lote: {e}")
                        # Retornar valor nulo para este item
                        results.append((0, 0))
                
                # Liberar mem√≥ria explicitamente
                if torch.cuda.is_available():
                    del keys_tensor, batch_results
                    torch.cuda.empty_cache()
                
        except Exception as e:
            logger.error(f"Erro no processamento de lote em GPU {device_idx}: {e}")
            # Retornar resultados parciais ou vazios
            if len(results) < len(private_keys_chunk):
                # Preencher com valores nulos
                results.extend([(0, 0)] * (len(private_keys_chunk) - len(results)))
        
        return results
    
    @staticmethod
    def _batch_ec_multiply(private_keys, p, gx, gy, device):
        """Multiplica√ß√£o escalar na curva el√≠ptica em lote usando PyTorch
        
        Nota: Em uma implementa√ß√£o real para produ√ß√£o, seria melhor usar uma biblioteca como
        fastecdsa ou um kernel CUDA personalizado. Esta √© uma implementa√ß√£o educativa que
        demonstra como usar PyTorch para opera√ß√µes vetorizadas na GPU.
        """
        # Configura√ß√£o inicial de pontos e vari√°veis
        batch_size = private_keys.shape[0]
        
        # Converter para representa√ß√£o PyTorch no dispositivo adequado
        p_tensor = torch.tensor(p, dtype=torch.float64, device=device)
        
        # Inicializar tensores para os pontos resultantes
        result_x = torch.zeros(batch_size, dtype=torch.float64, device=device)
        result_y = torch.zeros(batch_size, dtype=torch.float64, device=device)
        
        # Para cada chave no batch, calculamos o resultado
        # Em uma implementa√ß√£o vetorizada real, isto seria feito em paralelo
        for i in range(batch_size):
            k = private_keys[i].item()
            
            # Double and add algorithm para multiplica√ß√£o escalar
            # Implementa√ß√£o simplificada para fins educativos
            current_x = torch.tensor(gx, dtype=torch.float64, device=device)
            current_y = torch.tensor(gy, dtype=torch.float64, device=device)
            
            # Para cada bit na representa√ß√£o bin√°ria da chave
            for bit_pos in range(256):
                if k & (1 << bit_pos):
                    # Adicionar o ponto atual ao resultado
                    if result_x == 0 and result_y == 0:
                        result_x[i] = current_x
                        result_y[i] = current_y
                    else:
                        # Adicionar pontos (implementa√ß√£o simplificada)
                        # Numa implementa√ß√£o real, usaria f√≥rmulas completas da curva el√≠ptica
                        temp_x, temp_y = BitcoinUtils._add_ec_points(
                            result_x[i].item(), result_y[i].item(),
                            current_x.item(), current_y.item(),
                            p
                        )
                        result_x[i] = temp_x
                        result_y[i] = temp_y
                
                # Duplicar o ponto atual (para o pr√≥ximo bit)
                if bit_pos < 255:  # Evitar duplica√ß√£o desnecess√°ria na √∫ltima itera√ß√£o
                    temp_x, temp_y = BitcoinUtils._double_ec_point(
                        current_x.item(), current_y.item(), p
                    )
                    current_x = temp_x
                    current_y = temp_y
        
        return result_x, result_y
    
    @staticmethod
    def _add_ec_points(x1, y1, x2, y2, p):
        """Adiciona dois pontos na curva el√≠ptica (simplificado)"""
        if x1 == 0 and y1 == 0:
            return x2, y2
        if x2 == 0 and y2 == 0:
            return x1, y1
        if x1 == x2 and y1 == (-y2 % p):
            return 0, 0  # Ponto no infinito
            
        if x1 == x2 and y1 == y2:
            return BitcoinUtils._double_ec_point(x1, y1, p)
            
        # Calcular inclina√ß√£o da reta
        try:
            numerator = (y2 - y1) % p
            denominator = (x2 - x1) % p
            # Inverso multiplicativo modular
            denominator_inv = pow(denominator, p - 2, p)
            slope = (numerator * denominator_inv) % p
            
            # Calcular novo ponto
            x3 = (slope**2 - x1 - x2) % p
            y3 = (slope * (x1 - x3) - y1) % p
            
            return x3, y3
        except Exception as e:
            # Se ocorrer erro, retornar ponto no infinito
            return 0, 0
    
    @staticmethod
    def _double_ec_point(x, y, p):
        """Duplica um ponto na curva el√≠ptica (simplificado)"""
        if x == 0 and y == 0:
            return 0, 0  # Ponto no infinito se o ponto original for infinito
            
        # Par√¢metro a da curva secp256k1 √© 0
        a = 0
        
        try:
            # Calcular inclina√ß√£o da reta tangente
            numerator = (3 * x**2 + a) % p
            denominator = (2 * y) % p
            # Inverso multiplicativo modular
            denominator_inv = pow(denominator, p - 2, p)
            slope = (numerator * denominator_inv) % p
            
            # Calcular novo ponto
            x3 = (slope**2 - 2*x) % p
            y3 = (slope * (x - x3) - y) % p
            
            return x3, y3
        except Exception as e:
            # Se ocorrer erro, retornar ponto no infinito
            return 0, 0

    @staticmethod
    def public_key_to_address(public_key):
        """Converte chave p√∫blica em endere√ßo Bitcoin"""
        try:
            pub_x, pub_y = public_key
            
            # Formato comprimido
            if pub_y % 2 == 0:
                compressed_pub = f"02{pub_x:064x}"
            else:
                compressed_pub = f"03{pub_x:064x}"
            
            # SHA256
            pub_bytes = bytes.fromhex(compressed_pub)
            sha256_hash = hashlib.sha256(pub_bytes).digest()
            
            # RIPEMD160
            ripemd160 = hashlib.new('ripemd160')
            ripemd160.update(sha256_hash)
            hash160 = ripemd160.digest()
            
            # Adicionar byte de vers√£o (0x00 para mainnet)
            versioned_hash = b'\x00' + hash160
            
            # Checksum
            checksum = hashlib.sha256(hashlib.sha256(versioned_hash).digest()).digest()[:4]
            
            # Base58 encoding
            address_bytes = versioned_hash + checksum
            
            # Implementa√ß√£o simplificada do Base58
            alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
            num = int.from_bytes(address_bytes, 'big')
            address = ""
            
            while num > 0:
                num, remainder = divmod(num, 58)
                address = alphabet[remainder] + address
            
            # Adicionar zeros √† esquerda
            for byte in address_bytes:
                if byte == 0:
                    address = '1' + address
                else:
                    break
            
            return address
        except Exception as e:
            logger.error(f"Erro na convers√£o de endere√ßo: {e}")
            return None

    @staticmethod
    def batch_addresses_check(public_keys, target_address, num_workers=None):
        """Verifica endere√ßos em paralelo"""
        if num_workers is None:
            num_workers = NUM_THREADS
            
        with ThreadPoolExecutor(max_workers=num_workers) as executor:
            results = list(executor.map(
                lambda pk: (BitcoinUtils.public_key_to_address(pk), pk), 
                public_keys
            ))
        
        # Filtrar resultados
        matches = []
        for addr, pub_key in results:
            if addr == target_address:
                matches.append(pub_key)
                
        return matches, results

print("BitcoinUtils carregado com otimiza√ß√µes multi-GPU avan√ßadas")

In [None]:
# PARTE 2b: Verifica√ß√£o Manual de Dispositivos
# Esta c√©lula √© opcional e pode ser executada para verificar ou ajustar dispositivos

# Verificar dispositivos dispon√≠veis
print("\nüñ•Ô∏è Verifica√ß√£o de dispositivos:\n")
if torch.cuda.is_available():
    print(f"GPUs detectadas: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"   Mem√≥ria total: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")
        print(f"   Mem√≥ria alocada: {torch.cuda.memory_allocated(i) / 1e9:.1f} GB")
        print(f"   Mem√≥ria reservada: {torch.cuda.memory_reserved(i) / 1e9:.1f} GB")
else:
    print("‚ö†Ô∏è Nenhuma GPU dispon√≠vel, usando apenas CPU")

# Exibir configura√ß√£o atual
print(f"\nConfigura√ß√µes atuais:\nGPU_COUNT = {GPU_COUNT}\nDEVICES = {DEVICES}\nMAIN_DEVICE = {MAIN_DEVICE}")

# Op√ß√£o para ajustar manualmente (descomente para usar)
# GPU_COUNT = 1  # For√ßar uso de apenas uma GPU 
# DEVICES = [torch.device('cuda:0')]  # Usar apenas a primeira GPU
# MAIN_DEVICE = torch.device('cuda:0')

In [None]:
# PARTE 4: Gerador Inteligente de Chaves
class SmartKeyGenerator:
    def __init__(self, min_range, max_range, device='cpu'):
        self.min_range = min_range
        self.max_range = max_range
        self.device = device
        self.explored_regions = set()
        # Constantes para divis√£o de n√∫meros grandes
        self.SPLIT_VALUE = 2**32  # Usar para dividir o n√∫mero em partes alta e baixa
        # Contador para balanceamento de carga entre GPUs
        self.gpu_counter = 0
        # Cache para valores Fibonacci para evitar rec√°lculo
        self.fibonacci_cache = {0: 0, 1: 1}
        # √öltimos valores de Fibonacci calculados (para otimiza√ß√£o)
        self.last_fibonacci_values = []
        self._precompute_fibonacci()
        
    def _precompute_fibonacci(self, limit=100):
        """Pr√©-calcular valores de Fibonacci para uso posterior"""
        # Limpar cache anterior
        self.last_fibonacci_values = []
        
        # Assegurar que os primeiros 2 valores existam
        if 0 not in self.fibonacci_cache:
            self.fibonacci_cache[0] = 0
        if 1 not in self.fibonacci_cache:
            self.fibonacci_cache[1] = 1
        
        # Pr√©-calcular valores at√© o limite
        for i in range(2, limit+1):  # +1 para incluir o pr√≥prio limite
            if i not in self.fibonacci_cache:
                self.fibonacci_cache[i] = self.fibonacci_cache[i-1] + self.fibonacci_cache[i-2]
            self.last_fibonacci_values.append(self.fibonacci_cache[i])
            
        # Adicionar valores mais pr√≥ximos do range de busca
        n = limit
        
        # Verificar se n j√° est√° no cache (n√£o deveria ser necess√°rio, mas por seguran√ßa)
        if n not in self.fibonacci_cache:
            self.fibonacci_cache[n] = self.fibonacci_cache[n-1] + self.fibonacci_cache[n-2]
            
        while self.fibonacci_cache[n] < self.max_range:
            n += 1
            self.fibonacci_cache[n] = self.fibonacci_cache[n-1] + self.fibonacci_cache[n-2]
            self.last_fibonacci_values.append(self.fibonacci_cache[n])
            
            # Limitar o n√∫mero de valores calculados
            if len(self.last_fibonacci_values) > 200:
                # Manter apenas os 100 valores mais altos
                self.last_fibonacci_values = self.last_fibonacci_values[-100:]
    
    def get_next_device(self):
        """Retorna o pr√≥ximo dispositivo para balanceamento de carga"""
        if not torch.cuda.is_available() or GPU_COUNT == 0 or not DEVICES:
            return MAIN_DEVICE
            
        try:
            device = DEVICES[self.gpu_counter % len(DEVICES)]
            self.gpu_counter = (self.gpu_counter + 1) % len(DEVICES)
            return device
        except Exception as e:
            logger.warning(f"Erro ao obter dispositivo: {e}. Usando dispositivo principal.")
            return MAIN_DEVICE
        
    def generate_smart_batch(self, batch_size, strategy='adaptive'):
        """Gera lote de chaves com estrat√©gia inteligente"""
        if strategy == 'adaptive':
            return self._adaptive_strategy(batch_size)
        elif strategy == 'fibonacci':
            return self._fibonacci_strategy(batch_size)
        elif strategy == 'quantum_inspired':
            return self._quantum_inspired_strategy(batch_size)
        else:
            return self._random_strategy(batch_size)
    
    def generate_smart_multi_batch(self, batch_size, strategies=None):
        """Gera lotes paralelos usando m√∫ltiplas estrat√©gias em m√∫ltiplas GPUs"""
        if strategies is None:
            strategies = ['adaptive', 'fibonacci', 'quantum_inspired', 'random']
            
        results = []
        batch_per_strategy = batch_size // len(strategies)
        
        with ThreadPoolExecutor(max_workers=min(len(strategies), GPU_COUNT)) as executor:
            futures = []
            
            for strategy in strategies:
                futures.append(executor.submit(
                    self._generate_strategy_batch,
                    strategy,
                    batch_per_strategy
                ))
                
            # Coletar resultados
            for future in concurrent.futures.as_completed(futures):
                results.append(future.result())
                
        # Combinar resultados de todas as estrat√©gias
        # Primeiro mover todos os tensores para o mesmo dispositivo (MAIN_DEVICE)
        moved_results = []
        for high_tensor, low_tensor in results:
            moved_results.append((
                high_tensor.to(MAIN_DEVICE),
                low_tensor.to(MAIN_DEVICE)
            ))
        
        # Agora fazer a concatena√ß√£o com tensores no mesmo dispositivo
        combined_high = torch.cat([r[0] for r in moved_results], dim=0)
        combined_low = torch.cat([r[1] for r in moved_results], dim=0)
        
        # Garantir o tamanho exato do batch
        return combined_high[:batch_size], combined_low[:batch_size]
    
    def _generate_strategy_batch(self, strategy, batch_size):
        """Helper para gerar lotes em paralelo"""
        try:
            device = self.get_next_device()
            
            if strategy == 'adaptive':
                keys = self._adaptive_strategy_core(batch_size)
            elif strategy == 'fibonacci':
                keys = self._fibonacci_strategy_core(batch_size)
            elif strategy == 'quantum_inspired':
                keys = self._quantum_inspired_strategy_core(batch_size)
            else:
                keys = self._random_strategy_core(batch_size)
                
            # Converter para tensores no dispositivo correto
            try:
                return self._keys_to_tensor(keys, device)
            except Exception as e:
                logger.warning(f"Erro ao converter tensores para dispositivo {device}: {e}")
                # Tentar com o dispositivo principal como fallback
                return self._keys_to_tensor(keys, MAIN_DEVICE)
        except Exception as e:
            logger.error(f"Erro na gera√ß√£o de lote: {e}")
            # Gerar lote aleat√≥rio como fallback
            fallback_keys = [random.randint(self.min_range, self.max_range) for _ in range(batch_size)]
            return self._keys_to_tensor(fallback_keys, MAIN_DEVICE)
    
    def _adaptive_strategy(self, batch_size):
        """Estrat√©gia adaptativa baseada em padr√µes"""
        keys = self._adaptive_strategy_core(batch_size)
        # Converter em formato seguro para tensores
        return self._keys_to_tensor(keys, self.device)
    
    def _adaptive_strategy_core(self, batch_size):
        """N√∫cleo da estrat√©gia adaptativa (apenas gera√ß√£o de chaves sem convers√£o para tensor)"""
        keys = []
        
        # 40% pr√≥ximo ao meio do range
        mid_point = (self.min_range + self.max_range) // 2
        mid_keys = int(batch_size * 0.4)
        for _ in range(mid_keys):
            offset = random.randint(-2**35, 2**35)
            key = max(self.min_range, min(self.max_range, mid_point + offset))
            keys.append(key)
        
        # 30% em pontos de interesse (m√∫ltiplos de n√∫meros especiais)
        special_keys = int(batch_size * 0.3)
        special_numbers = [7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
        for _ in range(special_keys):
            base = random.choice(special_numbers)
            multiplier = random.randint(self.min_range // base, self.max_range // base)
            key = base * multiplier
            if self.min_range <= key <= self.max_range:
                keys.append(key)
        
        # 30% completamente aleat√≥rio
        random_keys = batch_size - len(keys)
        for _ in range(random_keys):
            key = random.randint(self.min_range, self.max_range)
            keys.append(key)
            
        return keys
    
    def _fibonacci_strategy(self, batch_size):
        """Estrat√©gia baseada em sequ√™ncia de Fibonacci"""
        keys = self._fibonacci_strategy_core(batch_size)
        # Converter em formato seguro para tensores
        return self._keys_to_tensor(keys, self.device)
    
    def _fibonacci_strategy_core(self, batch_size):
        """N√∫cleo da estrat√©gia de Fibonacci otimizada (apenas gera√ß√£o de chaves)"""
        keys = []
        
        # Verificar se o cache de fibonacci tem valores suficientes
        if len(self.last_fibonacci_values) < 50:
            self._precompute_fibonacci(100)
            
        # Usar os valores de fibonacci para criar padr√µes de chaves
        for _ in range(batch_size):
            strategy = random.randint(1, 4)
            
            if strategy == 1:  # 25% - baseado em m√∫ltiplos de fibonacci
                fib_value = random.choice(self.last_fibonacci_values)
                multiplier = random.randint(1, 10000)
                key = (fib_value * multiplier) % (self.max_range + 1)
                
                # Garantir que esteja no range v√°lido
                if key < self.min_range:
                    key += self.min_range
                if key > self.max_range:
                    key = self.min_range + (key % (self.max_range - self.min_range))
                    
            elif strategy == 2:  # 25% - combina√ß√µes de fibonacci
                # Combinar dois n√∫meros de fibonacci diferentes
                fib1 = random.choice(self.last_fibonacci_values)
                fib2 = random.choice(self.last_fibonacci_values)
                key = (fib1 + fib2) % (self.max_range + 1)
                
                # Garantir que esteja no range v√°lido
                if key < self.min_range:
                    key += self.min_range
                if key > self.max_range:
                    key = self.min_range + (key % (self.max_range - self.min_range))
                    
            elif strategy == 3:  # 25% - chaves pr√≥ximas a fibonacci
                fib_value = random.choice(self.last_fibonacci_values)
                # Adicionar/subtrair um valor aleat√≥rio pequeno
                offset = random.randint(-1000000, 1000000)
                key = fib_value + offset
                
                # Garantir que esteja no range v√°lido
                if key < self.min_range:
                    key = self.min_range + (key % (self.max_range - self.min_range))
                if key > self.max_range:
                    key = self.max_range - (key % (self.max_range - self.min_range))
                    
            else:  # 25% - completamente aleat√≥rio no range
                key = random.randint(self.min_range, self.max_range)
            
            # Adicionar chave se estiver no range v√°lido
            if self.min_range <= key <= self.max_range:
                keys.append(key)
            else:
                # Se por algum motivo a chave estiver fora do range, gerar uma aleat√≥ria
                keys.append(random.randint(self.min_range, self.max_range))
        
        return keys
    
    def _quantum_inspired_strategy(self, batch_size):
        """Estrat√©gia inspirada em conceitos qu√¢nticos (sobreposi√ß√£o de estados)"""
        keys = self._quantum_inspired_strategy_core(batch_size)
        return self._keys_to_tensor(keys, self.device)
    
    def _quantum_inspired_strategy_core(self, batch_size):
        """N√∫cleo da estrat√©gia inspirada em conceitos qu√¢nticos"""
        keys = []
        
        # Definir n√∫mero de estados (simula√ß√£o qu√¢ntica)
        num_states = min(8, batch_size // 10 + 1)  # No m√°ximo 8 estados
        
        # Gerar estados base (pontos de refer√™ncia no espa√ßo de chaves)
        base_states = []
        for _ in range(num_states):
            state_type = random.randint(1, 4)
            
            if state_type == 1:
                # Estado baseado no meio do range
                base_states.append((self.min_range + self.max_range) // 2)
            elif state_type == 2:
                # Estado baseado em n√∫mero de Fibonacci
                if self.last_fibonacci_values:
                    base_states.append(random.choice(self.last_fibonacci_values) % self.max_range)
                else:
                    base_states.append(random.randint(self.min_range, self.max_range))
            elif state_type == 3:
                # Estado baseado em pontos especiais
                special_base = random.choice([7, 11, 13, 17, 19, 23, 29, 31]) * 10**9
                base_states.append(max(self.min_range, min(self.max_range, special_base)))
            else:
                # Estado aleat√≥rio
                base_states.append(random.randint(self.min_range, self.max_range))
        
        # Gerar chaves como "superposi√ß√µes" dos estados base
        for _ in range(batch_size):
            # Simular medidas de estados qu√¢nticos
            # Escolher dois estados aleat√≥rios e cria uma "superposi√ß√£o"
            state1 = random.choice(base_states)
            state2 = random.choice(base_states)
            
            # "Interferir" os estados com pesos aleat√≥rios
            weight1 = random.random()
            weight2 = 1 - weight1
            
            # Criar "superposi√ß√£o" (combina√ß√£o linear)
            combined_state = int(weight1 * state1 + weight2 * state2)
            
            # Adicionar "ru√≠do qu√¢ntico" (pequena perturba√ß√£o aleat√≥ria)
            noise = random.randint(-2**24, 2**24)
            key = combined_state + noise
            
            # Garantir que est√° no range v√°lido
            key = max(self.min_range, min(self.max_range, key))
            keys.append(key)
        
        return keys
    
    def _random_strategy(self, batch_size):
        """Estrat√©gia puramente aleat√≥ria"""
        keys = self._random_strategy_core(batch_size)
        return self._keys_to_tensor(keys, self.device)
    
    def _random_strategy_core(self, batch_size):
        """N√∫cleo da estrat√©gia aleat√≥ria"""
        return [random.randint(self.min_range, self.max_range) for _ in range(batch_size)]
    
    def _keys_to_tensor(self, keys, device):
        """Converte lista de chaves em tuplas de tensores (high, low) para evitar overflow"""
        high_parts = []
        low_parts = []
        
        for key in keys:
            # Dividir em parte alta e baixa
            high = key // self.SPLIT_VALUE
            low = key % self.SPLIT_VALUE
            high_parts.append(high)
            low_parts.append(low)
        
        # Criar tensores para cada parte
        high_tensor = torch.tensor(high_parts, dtype=torch.int64, device=device)
        low_tensor = torch.tensor(low_parts, dtype=torch.int64, device=device)
        
        return high_tensor, low_tensor


In [None]:
# PARTE 5: Algoritmo Gen√©tico Otimizado para Multi-GPU
class GeneticAlgorithm:
    def __init__(self, population_size, mutation_rate=0.1, crossover_rate=0.8, device='cpu'):
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.main_device = device
        self.population = None  # Agora ser√° uma tupla de tensores (high, low)
        self.fitness_scores = None
        self.generation = 0
        # Constante para divis√£o de n√∫meros grandes
        self.SPLIT_VALUE = 2**32
        # Contador para balanceamento de carga entre GPUs
        self.gpu_counter = 0
        
    def get_next_device(self):
        """Retorna o pr√≥ximo dispositivo para balanceamento de carga"""
        if not torch.cuda.is_available() or GPU_COUNT == 0 or not DEVICES:
            return MAIN_DEVICE
            
        try:
            device = DEVICES[self.gpu_counter % len(DEVICES)]
            self.gpu_counter = (self.gpu_counter + 1) % len(DEVICES)
            return device
        except Exception as e:
            logger.warning(f"Erro ao obter dispositivo: {e}. Usando dispositivo principal.")
            return MAIN_DEVICE
        
    def initialize_population(self, min_range, max_range):
        """Inicializa popula√ß√£o com diversidade"""
        # Se temos GPU, vamos distribuir a popula√ß√£o entre as GPUs
        if GPU_COUNT > 1 and torch.cuda.is_available():
            return self._initialize_multi_gpu_population(min_range, max_range)
        else:
            return self._initialize_single_population(min_range, max_range)
            
    def _initialize_single_population(self, min_range, max_range):
        """Inicializa popula√ß√£o em um √∫nico dispositivo"""
        population = []
        
        # 50% aleat√≥rio uniforme
        for _ in range(self.population_size // 2):
            key = random.randint(min_range, max_range)
            population.append(key)
        
        # 25% concentrado no meio
        mid_point = (min_range + max_range) // 2
        for _ in range(self.population_size // 4):
            offset = random.randint(-2**35, 2**35)
            key = max(min_range, min(max_range, mid_point + offset))
            population.append(key)
        
        # 25% em extremos
        for _ in range(self.population_size - len(population)):
            if random.random() < 0.5:
                key = random.randint(min_range, min_range + 2**35)
            else:
                key = random.randint(max_range - 2**35, max_range)
            population.append(key)
        
        # Converter para representa√ß√£o segura
        self.population = self._keys_to_tensor(population, self.main_device)
        self.fitness_scores = torch.zeros(self.population_size, device=self.main_device)
        
        return self.population
        
    def _initialize_multi_gpu_population(self, min_range, max_range):
        """Inicializa popula√ß√£o dividida entre m√∫ltiplas GPUs"""
        futures = []
        pop_per_gpu = self.population_size // GPU_COUNT
        
        with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
            for i in range(GPU_COUNT):
                try:
                    device = torch.device(f'cuda:{i}')
                    futures.append(executor.submit(
                        self._generate_subpopulation, 
                        min_range, 
                        max_range, 
                        pop_per_gpu,
                        device
                    ))
                except Exception as e:
                    logger.warning(f"Erro ao configurar dispositivo cuda:{i}: {e}")
                    # Tentar usar CPU como fallback
                    futures.append(executor.submit(
                        self._generate_subpopulation, 
                        min_range, 
                        max_range, 
                        pop_per_gpu,
                        MAIN_DEVICE
                    ))
                
        # Coletar resultados
        high_tensors = []
        low_tensors = []
        
        for future in concurrent.futures.as_completed(futures):
            h, l = future.result()
            high_tensors.append(h)
            low_tensors.append(l)            # Combinar popula√ß√µes das diferentes GPUs
            if GPU_COUNT > 0:
                # Verificar se h√° tensores v√°lidos
                if high_tensors and low_tensors:
                    # Mover todos os tensores para o mesmo dispositivo antes de concatenar
                    high_tensors_same_device = [h.to(self.main_device) for h in high_tensors]
                    low_tensors_same_device = [l.to(self.main_device) for l in low_tensors]
                    
                    # Concatenar tensores no mesmo dispositivo
                    combined_high = torch.cat(high_tensors_same_device, dim=0)
                    combined_low = torch.cat(low_tensors_same_device, dim=0)
                    
                    # Garantir o tamanho exato da popula√ß√£o
                    self.population = (
                        combined_high[:self.population_size], 
                        combined_low[:self.population_size]
                    )
                else:
                    logger.warning("N√£o foram gerados tensores suficientes nas GPUs")
            self.fitness_scores = torch.zeros(self.population_size, device=self.main_device)
            
            return self.population
        else:
            # Fallback para single device
            return self._initialize_single_population(min_range, max_range)
            
    def _generate_subpopulation(self, min_range, max_range, pop_size, device):
        """Gera uma parte da popula√ß√£o em um dispositivo espec√≠fico"""
        population = []
        
        # 50% aleat√≥rio uniforme
        for _ in range(pop_size // 2):
            key = random.randint(min_range, max_range)
            population.append(key)
        
        # 25% concentrado no meio
        mid_point = (min_range + max_range) // 2
        for _ in range(pop_size // 4):
            offset = random.randint(-2**35, 2**35)
            key = max(min_range, min(max_range, mid_point + offset))
            population.append(key)
        
        # 25% em extremos
        for _ in range(pop_size - len(population)):
            if random.random() < 0.5:
                key = random.randint(min_range, min_range + 2**35)
            else:
                key = random.randint(max_range - 2**35, max_range)
            population.append(key)
            
        # Converter para tensores
        return self._keys_to_tensor(population, device)
    
    def _keys_to_tensor(self, keys, device):
        """Converte lista de chaves grandes em tupla de tensores (high, low)"""
        high_parts = []
        low_parts = []
        
        for key in keys:
            # Dividir em parte alta e baixa para evitar overflow
            high = key // self.SPLIT_VALUE
            low = key % self.SPLIT_VALUE
            high_parts.append(high)
            low_parts.append(low)
        
        # Criar tensores separados para partes alta e baixa
        high_tensor = torch.tensor(high_parts, dtype=torch.int64, device=device)
        low_tensor = torch.tensor(low_parts, dtype=torch.int64, device=device)
        
        # Retornar como uma tupla de tensores
        return (high_tensor, low_tensor)
    
    def _tensor_to_keys(self, tensor_pair):
        """Converte tensores de volta para valores inteiros"""
        high_tensor, low_tensor = tensor_pair
        keys = []
        
        for i in range(high_tensor.shape[0]):
            high = high_tensor[i].item()
            low = low_tensor[i].item()
            key = high * self.SPLIT_VALUE + low
            keys.append(key)
        
        return keys
    
    def evaluate_fitness_batch(self, target_address):
        """Avalia fitness de toda a popula√ß√£o"""
        try:
            # Se temos m√∫ltiplas GPUs, vamos dividir a popula√ß√£o para avalia√ß√£o
            if GPU_COUNT > 1 and torch.cuda.is_available():
                return self._evaluate_fitness_multi_gpu(target_address)
            else:
                return self._evaluate_fitness_single_device(target_address)
                
        except Exception as e:
            logger.error(f"Erro na avalia√ß√£o de fitness: {e}")
            return torch.zeros(self.population_size, device=self.main_device)
            
    def _evaluate_fitness_single_device(self, target_address):
        """Avalia fitness em um √∫nico dispositivo"""
        try:
            # Converter tensores para lista de chaves
            keys = self._tensor_to_keys(self.population)
            
            # Criar tensor tempor√°rio para processar chaves p√∫blicas
            # (usando valor √∫nico para processamento em lote)
            temp_tensor = torch.tensor(keys, dtype=torch.float64, device=self.main_device)
            
            # Gerar chaves p√∫blicas em lote
            public_keys = BitcoinUtils.private_key_to_public_key_batch_gpu(temp_tensor)
            
            # Avaliar fitness (dist√¢ncia do endere√ßo alvo)
            fitness_scores = []
            
            # Paralelizar verifica√ß√£o de endere√ßos
            matches, results = BitcoinUtils.batch_addresses_check(public_keys, target_address)
            
            # Se encontramos correspond√™ncias
            if matches:
                for i, (addr, pub_key) in enumerate(results):
                    if addr == target_address:
                        fitness_scores.append(1000000.0)  # Solu√ß√£o encontrada!
                        logger.info(f"üéâ CHAVE ENCONTRADA! {keys[i]}")
                    else:
                        similarity = self._calculate_address_similarity(addr, target_address)
                        fitness_scores.append(similarity)
            else:
                # Processar resultados
                for addr, _ in results:
                    if addr:
                        similarity = self._calculate_address_similarity(addr, target_address)
                        fitness_scores.append(similarity)
                    else:
                        fitness_scores.append(0.0)
            
            self.fitness_scores = torch.tensor(fitness_scores, device=self.main_device)
            return self.fitness_scores
            
        except Exception as e:
            logger.error(f"Erro na avalia√ß√£o de fitness (single): {e}")
            return torch.zeros(self.population_size, device=self.main_device)
            
    def _evaluate_fitness_multi_gpu(self, target_address):
        """Avalia fitness dividindo o trabalho entre m√∫ltiplas GPUs"""
        try:
            # Converter tensores para lista de chaves
            keys = self._tensor_to_keys(self.population)
            
            # Dividir chaves entre as GPUs
            chunk_size = len(keys) // GPU_COUNT + 1
            chunks = [keys[i:i+chunk_size] for i in range(0, len(keys), chunk_size)]
            
            # Avaliar cada chunk em paralelo em uma GPU separada
            futures = []
            
            with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
                for i, chunk in enumerate(chunks):
                    futures.append(executor.submit(
                        self._evaluate_chunk_fitness,
                        chunk,
                        target_address,
                        i % GPU_COUNT
                    ))
            
            # Combinar resultados
            all_fitness = []
            solution_found = False
            solution_key = None
            
            for i, future in enumerate(futures):
                chunk_fitness, found_key = future.result()
                
                if found_key:
                    solution_found = True
                    solution_key = found_key
                    
                # Adicionar √† lista completa de fitness
                offset = i * chunk_size
                for j, score in enumerate(chunk_fitness):
                    if offset + j < len(keys):  # Proteger contra √≠ndice fora de alcance
                        all_fitness.append(score)
            
            # Se encontrou solu√ß√£o, ajustar pontua√ß√£o de fitness
            if solution_found:
                # Encontrar √≠ndice da chave na popula√ß√£o original
                for i, key in enumerate(keys):
                    if key == solution_key:
                        all_fitness[i] = 1000000.0  # Marcar como solu√ß√£o encontrada
                        break
                        
            # Converter resultados para tensor
            self.fitness_scores = torch.tensor(all_fitness, device=self.main_device)
            return self.fitness_scores
            
        except Exception as e:
            logger.error(f"Erro na avalia√ß√£o de fitness (multi-GPU): {e}")
            return torch.zeros(self.population_size, device=self.main_device)
            
    def _evaluate_chunk_fitness(self, key_chunk, target_address, device_idx):
        """Avalia fitness de um chunk de chaves em uma GPU espec√≠fica"""
        device = torch.device(f'cuda:{device_idx}' if torch.cuda.is_available() else 'cpu')
        
        try:
            temp_tensor = torch.tensor(key_chunk, dtype=torch.float64, device=device)
            
            # Gerar chaves p√∫blicas
            public_keys = BitcoinUtils.private_key_to_public_key_batch_gpu(temp_tensor, device_idx)
            
            # Avaliar fitness
            fitness_scores = []
            found_key = None
            
            for i, pub_key in enumerate(public_keys):
                if pub_key:
                    address = BitcoinUtils.public_key_to_address(pub_key)
                    
                    if address == target_address:
                        fitness_scores.append(1000000.0)  # Solu√ß√£o encontrada!
                        found_key = key_chunk[i]
                        logger.info(f"üéâ CHAVE ENCONTRADA (GPU {device_idx})! {found_key}")
                    else:
                        similarity = self._calculate_address_similarity(address, target_address)
                        fitness_scores.append(similarity)
                else:
                    fitness_scores.append(0.0)
                    
            return fitness_scores, found_key
            
        except Exception as e:
            logger.error(f"Erro na avalia√ß√£o de chunk (GPU {device_idx}): {e}")
            return [0.0] * len(key_chunk), None
    
    def _calculate_address_similarity(self, addr1, addr2):
        """Calcula similaridade entre endere√ßos"""
        if not addr1 or not addr2:
            return 0.0
        
        # Comparar caracteres
        matches = sum(1 for a, b in zip(addr1, addr2) if a == b)
        return matches / max(len(addr1), len(addr2))
    
    def selection(self, tournament_size=5):
        """Sele√ß√£o por torneio"""
        selected_high = []
        selected_low = []
        high_tensor, low_tensor = self.population
        
        for _ in range(self.population_size):
            tournament_indices = torch.randint(0, self.population_size, (tournament_size,), device=self.main_device)
            tournament_fitness = self.fitness_scores[tournament_indices]
            winner_idx = tournament_indices[torch.argmax(tournament_fitness)]
            selected_high.append(high_tensor[winner_idx].item())
            selected_low.append(low_tensor[winner_idx].item())
        
        return (torch.tensor(selected_high, dtype=torch.int64, device=self.main_device),
                torch.tensor(selected_low, dtype=torch.int64, device=self.main_device))
    
    def crossover(self, parent1_high, parent1_low, parent2_high, parent2_low):
        """Crossover aritm√©tico adaptado para representa√ß√£o em duas partes"""
        # Reconstruir os valores completos
        parent1 = parent1_high * self.SPLIT_VALUE + parent1_low
        parent2 = parent2_high * self.SPLIT_VALUE + parent2_low
        
        if random.random() > self.crossover_rate:
            child1, child2 = parent1, parent2
        else:
            alpha = random.random()
            child1 = int(alpha * parent1 + (1 - alpha) * parent2)
            child2 = int((1 - alpha) * parent1 + alpha * parent2)
        
        # Dividir os resultados novamente
        child1_high = child1 // self.SPLIT_VALUE
        child1_low = child1 % self.SPLIT_VALUE
        child2_high = child2 // self.SPLIT_VALUE
        child2_low = child2 % self.SPLIT_VALUE
        
        return (child1_high, child1_low), (child2_high, child2_low)
    
    def mutate(self, ind_high, ind_low, min_range, max_range):
        """Muta√ß√£o adaptativa para representa√ß√£o em duas partes"""
        # Reconstruir o valor completo
        individual = ind_high * self.SPLIT_VALUE + ind_low
        
        if random.random() > self.mutation_rate:
            return ind_high, ind_low
        
        # Diferentes tipos de muta√ß√£o
        mutation_type = random.choice(['bit_flip', 'arithmetic', 'gaussian'])
        
        if mutation_type == 'bit_flip':
            # Flip de bit aleat√≥rio
            bit_pos = random.randint(0, 70)
            result = individual ^ (1 << bit_pos)
        
        elif mutation_type == 'arithmetic':
            # Muta√ß√£o aritm√©tica
            delta = random.randint(-2**20, 2**20)
            result = max(min_range, min(max_range, individual + delta))
        
        else:  # gaussian
            # Muta√ß√£o gaussiana
            std_dev = (max_range - min_range) * 0.01
            delta = int(random.gauss(0, std_dev))
            result = max(min_range, min(max_range, individual + delta))
        
        # Dividir o resultado novamente
        return result // self.SPLIT_VALUE, result % self.SPLIT_VALUE
    
    def evolve(self, min_range, max_range):
        """Uma gera√ß√£o de evolu√ß√£o com suporte a m√∫ltiplas GPUs"""
        # Se temos m√∫ltiplas GPUs, dividir o processo de evolu√ß√£o
        if GPU_COUNT > 1 and torch.cuda.is_available():
            return self._evolve_multi_gpu(min_range, max_range)
        else:
            return self._evolve_single_device(min_range, max_range)
            
    def _evolve_single_device(self, min_range, max_range):
        """Evolu√ß√£o em um √∫nico dispositivo"""
        # Sele√ß√£o
        selected = self.selection()
        selected_high, selected_low = selected
        
        # Crossover e muta√ß√£o
        new_population_high = []
        new_population_low = []
        
        for i in range(0, self.population_size, 2):
            parent1_high = selected_high[i].item()
            parent1_low = selected_low[i].item()
            
            # Garantir √≠ndice v√°lido para parent2
            j = min(i+1, self.population_size-1)
            parent2_high = selected_high[j].item()
            parent2_low = selected_low[j].item()
            
            # Crossover
            (child1_high, child1_low), (child2_high, child2_low) = self.crossover(
                parent1_high, parent1_low, parent2_high, parent2_low
            )
            
            # Muta√ß√£o
            child1_high, child1_low = self.mutate(child1_high, child1_low, min_range, max_range)
            child2_high, child2_low = self.mutate(child2_high, child2_low, min_range, max_range)
            
            new_population_high.extend([child1_high, child2_high])
            new_population_low.extend([child1_low, child2_low])
        
        # Manter os melhores (elitismo)
        elite_size = self.population_size // 10
        elite_indices = torch.topk(self.fitness_scores, elite_size).indices
        
        high_tensor, low_tensor = self.population
        elite_high = high_tensor[elite_indices].cpu().numpy()
        elite_low = low_tensor[elite_indices].cpu().numpy()
        
        # Nova popula√ß√£o
        new_population_high = new_population_high[:self.population_size - elite_size]
        new_population_low = new_population_low[:self.population_size - elite_size]
        
        new_population_high.extend(elite_high)
        new_population_low.extend(elite_low)
        
        # Atualizar popula√ß√£o
        self.population = (
            torch.tensor(new_population_high[:self.population_size], dtype=torch.int64, device=self.main_device),
            torch.tensor(new_population_low[:self.population_size], dtype=torch.int64, device=self.main_device)
        )
        self.generation += 1
        
        return self.population
        
    def _evolve_multi_gpu(self, min_range, max_range):
        """Evolu√ß√£o utilizando m√∫ltiplas GPUs"""
        # Sele√ß√£o
        selected = self.selection()
        selected_high, selected_low = selected
        
        # Dividir o trabalho de evolu√ß√£o entre GPUs
        chunk_size = self.population_size // (2 * GPU_COUNT)  # Evoluir em pares
        futures = []
        
        with ThreadPoolExecutor(max_workers=GPU_COUNT) as executor:
            for gpu_id in range(GPU_COUNT):
                start_idx = gpu_id * chunk_size * 2
                end_idx = min((gpu_id + 1) * chunk_size * 2, self.population_size)
                
                if start_idx >= self.population_size:
                    break
                    
                # Enviar um chunk para cada GPU
                futures.append(executor.submit(
                    self._evolve_population_chunk,
                    selected_high[start_idx:end_idx],
                    selected_low[start_idx:end_idx],
                    min_range,
                    max_range,
                    gpu_id
                ))
        
        # Coletar resultados
        new_high = []
        new_low = []
        
        for future in concurrent.futures.as_completed(futures):
            chunk_high, chunk_low = future.result()
            new_high.extend(chunk_high)
            new_low.extend(chunk_low)
            
        # Garantir comprimento consistente
        new_high = new_high[:self.population_size - len(new_high) % 2]
        new_low = new_low[:self.population_size - len(new_low) % 2]
        
        # Manter os melhores (elitismo)
        elite_size = self.population_size // 10
        elite_indices = torch.topk(self.fitness_scores, elite_size).indices
        
        high_tensor, low_tensor = self.population
        elite_high = high_tensor[elite_indices].cpu().numpy()
        elite_low = low_tensor[elite_indices].cpu().numpy()
        
        # Nova popula√ß√£o
        new_high = new_high[:self.population_size - elite_size]
        new_low = new_low[:self.population_size - elite_size]
        
        # Adicionar elites
        new_high.extend(elite_high)
        new_low.extend(elite_low)
        
        # Atualizar popula√ß√£o
        self.population = (
            torch.tensor(new_high[:self.population_size], dtype=torch.int64, device=self.main_device),
            torch.tensor(new_low[:self.population_size], dtype=torch.int64, device=self.main_device)
        )
        self.generation += 1
        
        return self.population
        
    def _evolve_population_chunk(self, selected_high, selected_low, min_range, max_range, device_id):
        """Evolui um chunk da popula√ß√£o em uma GPU espec√≠fica"""
        device = torch.device(f'cuda:{device_id}' if torch.cuda.is_available() else 'cpu')
        
        new_high = []
        new_low = []
        
        # Processar pares
        for i in range(0, len(selected_high), 2):
            if i + 1 >= len(selected_high):  # Se tamanho √≠mpar
                break
                
            parent1_high = selected_high[i].item()
            parent1_low = selected_low[i].item()
            
            parent2_high = selected_high[i+1].item()
            parent2_low = selected_low[i+1].item()
            
            # Crossover
            (child1_high, child1_low), (child2_high, child2_low) = self.crossover(
                parent1_high, parent1_low, parent2_high, parent2_low
            )
            
            # Muta√ß√£o
            child1_high, child1_low = self.mutate(child1_high, child1_low, min_range, max_range)
            child2_high, child2_low = self.mutate(child2_high, child2_low, min_range, max_range)
            
            new_high.extend([child1_high, child2_high])
            new_low.extend([child1_low, child2_low])
            
        return new_high, new_low

print("GeneticAlgorithm carregado com otimiza√ß√µes multi-GPU avan√ßadas")

In [None]:
# PARTE 6: Execu√ß√£o Automatizada Multi-GPU

class BitcoinPuzzleSolver:
    def __init__(self, target_address, min_range, max_range, batch_size=None, population_size=None):
        self.target_address = target_address
        self.min_range = min_range
        self.max_range = max_range
        
        # Configurar tamanhos baseados no n√∫mero de GPUs dispon√≠veis
        if batch_size is None:
            self.batch_size = DEFAULT_BATCH_SIZE * max(1, GPU_COUNT)
        else:
            self.batch_size = batch_size
            
        if population_size is None:
            self.population_size = DEFAULT_POPULATION_SIZE * max(1, GPU_COUNT)
        else:
            self.population_size = population_size
            
        # Registro de regi√µes exploradas
        self.explored_regions = set()
        self.best_candidates = []
        
        # Estat√≠sticas
        self.total_keys_checked = 0
        self.total_batches = 0
        self.start_time = None
        self.last_report_time = None
        self.report_interval = 10  # segundos
        
        # Inicializar geradores e algoritmos
        self.key_generator = SmartKeyGenerator(min_range, max_range, MAIN_DEVICE)
        self.genetic_algorithm = GeneticAlgorithm(self.population_size, device=MAIN_DEVICE)
        
        # Criar diret√≥rio para pontos de salvamento
        self.checkpoint_dir = Path('./checkpoints')
        self.checkpoint_dir.mkdir(exist_ok=True)
        
        logger.info(f"Inicializando solver para o endere√ßo: {target_address}")
        # Calcular base-2 logaritmo manualmente para evitar problemas com n√∫meros grandes
        min_pow = 0
        temp_min = min_range
        while temp_min > 1:
            temp_min //= 2
            min_pow += 1
            
        max_pow = 0
        temp_max = max_range
        while temp_max > 1:
            temp_max //= 2
            max_pow += 1
            
        logger.info(f"Range: 2^{min_pow} a 2^{max_pow}")
        logger.info(f"Batch size: {self.batch_size:,} | Popula√ß√£o: {self.population_size:,}")
        logger.info(f"GPUs dispon√≠veis: {GPU_COUNT}")
        
    def run_genetic_search(self, max_generations=10):
        """Executa busca gen√©tica para encontrar o endere√ßo"""
        logger.info(f"Iniciando busca gen√©tica por at√© {max_generations} gera√ß√µes")
        
        # Inicializar popula√ß√£o
        self.genetic_algorithm.initialize_population(self.min_range, self.max_range)
        
        best_fitness = 0
        best_key = None
        
        # Loop principal por gera√ß√µes
        for generation in range(max_generations):
            # Avaliar fitness da popula√ß√£o atual
            fitness_scores = self.genetic_algorithm.evaluate_fitness_batch(self.target_address)
            
            # Checar melhor resultado
            max_fitness = torch.max(fitness_scores).item()
            if max_fitness > best_fitness:
                best_fitness = max_fitness
                best_idx = torch.argmax(fitness_scores).item()
                
                # Obter a chave com melhor fitness
                high, low = self.genetic_algorithm.population
                best_high = high[best_idx].item()
                best_low = low[best_idx].item()
                best_key = best_high * (2**32) + best_low
                
                logger.info(f"Gera√ß√£o {generation}: Melhor fitness = {best_fitness:.6f} | Chave: {best_key}")
                
                # Se encontrou solu√ß√£o (fitness muito alto)
                if max_fitness > 0.9999:
                    logger.info(f"üéâ CHAVE ENCONTRADA: {best_key}")
                    return best_key
            
            # Se n√£o √© a √∫ltima gera√ß√£o, evoluir para a pr√≥xima
            if generation < max_generations - 1:
                self.genetic_algorithm.evolve(self.min_range, self.max_range)
                
        return best_key
    
    def run_multi_strategy_search(self, num_batches=100, strategies=None):
        """Executa busca usando m√∫ltiplas estrat√©gias em paralelo"""
        self.start_time = time.time()
        self.last_report_time = self.start_time
        
        if strategies is None:
            strategies = ['adaptive', 'fibonacci', 'quantum_inspired', 'random']
            
        logger.info(f"Iniciando busca multi-estrat√©gia por {num_batches} lotes")
        logger.info(f"Usando estrat√©gias: {strategies}")
        
        # Verificar ambiente antes de iniciar
        if torch.cuda.is_available():
            logger.info(f"Utilizando {torch.cuda.device_count()} GPU(s)")
            for i in range(torch.cuda.device_count()):
                logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        else:
            logger.info("Modo CPU: Nenhuma GPU dispon√≠vel")
            
        # Verificar se temos dispositivos consistentes
        if GPU_COUNT > 0 and len(DEVICES) != GPU_COUNT:
            logger.warning(f"Inconsist√™ncia detectada: GPU_COUNT={GPU_COUNT}, mas DEVICES tem {len(DEVICES)} elementos")
            logger.warning("Corrigindo configura√ß√£o de dispositivos...")
            global DEVICES
            DEVICES = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
            if not DEVICES:
                DEVICES = [MAIN_DEVICE]
        
        # Loop principal por lotes
        for batch_num in range(num_batches):
            # Gerar lote de chaves usando m√∫ltiplas estrat√©gias em paralelo
            try:
                keys_tensors = self.key_generator.generate_smart_multi_batch(
                    self.batch_size, strategies
                )
            except RuntimeError as e:
                if "Expected all tensors to be on the same device" in str(e):
                    logger.warning("Detectado erro de dispositivos inconsistentes. Tentando abordagem alternativa...")
                    # Abordagem alternativa: usar apenas uma estrat√©gia em vez de m√∫ltiplas
                    keys_tensors = self.key_generator.generate_smart_batch(
                        self.batch_size, strategy=random.choice(strategies)
                    )
                else:
                    # Para outros erros, repassar a exce√ß√£o
                    raise
            
            # Converter chaves para formato adequado para processamento
            private_keys = keys_tensors  # J√° em formato de tupla (high, low)
            
            # Gerar chaves p√∫blicas
            public_keys = BitcoinUtils.private_key_to_public_key_batch_gpu(private_keys)
            
            # Verificar endere√ßos
            matches, results = BitcoinUtils.batch_addresses_check(public_keys, self.target_address)
            
            # Atualizar estat√≠sticas
            self.total_keys_checked += self.batch_size
            self.total_batches += 1
            
            # Se encontrou correspond√™ncias
            if matches:
                # Reconstruir a chave privada original
                high, low = private_keys
                for i, (addr, _) in enumerate(results):
                    if addr == self.target_address:
                        private_key = high[i].item() * (2**32) + low[i].item()
                        logger.info(f"üéâ CHAVE ENCONTRADA: {private_key}")
                        return private_key
            
            # Relat√≥rio de progresso
            current_time = time.time()
            if current_time - self.last_report_time >= self.report_interval:
                self._report_progress(batch_num, num_batches)
                self.last_report_time = current_time
                
            # A cada 10 lotes, tentar abordagem gen√©tica
            if batch_num > 0 and batch_num % 10 == 0:
                logger.info("Executando busca gen√©tica complementar...")
                key = self.run_genetic_search(max_generations=3)
                if key is not None:
                    return key
                    
        logger.info("Busca conclu√≠da sem encontrar a chave.")
        return None
    
    def _report_progress(self, batch_num, total_batches):
        """Relata progresso da busca"""
        elapsed = time.time() - self.start_time
        keys_per_second = self.total_keys_checked / elapsed if elapsed > 0 else 0
        percent_complete = (batch_num + 1) / total_batches * 100 if total_batches > 0 else 0
        
        logger.info(f"Progresso: {percent_complete:.2f}% | Lote: {batch_num+1}/{total_batches}")
        logger.info(f"Chaves verificadas: {self.total_keys_checked:,} | Velocidade: {keys_per_second:.2f} chaves/s")
        logger.info(f"Tempo decorrido: {elapsed:.2f}s")

# Inicializar e executar o solver
puzzle_solver = BitcoinPuzzleSolver(
    target_address=PUZZLE_71_CONFIG['target_address'],
    min_range=PUZZLE_71_CONFIG['min_range'],
    max_range=PUZZLE_71_CONFIG['max_range'],
    batch_size=PUZZLE_71_CONFIG['batch_size'],
    population_size=PUZZLE_71_CONFIG['population_size']
)

print("üîç Iniciando busca automatizada pela chave privada do Puzzle 71...")
print(f"üéØ Endere√ßo alvo: {PUZZLE_71_CONFIG['target_address']}")
print(f"üìä Usando {GPU_COUNT} GPU(s) e {NUM_THREADS} threads")

# Configurar n√∫mero de lotes com base no ambiente
num_batches = 1000 if torch.cuda.is_available() else 100

# Executar busca com m√∫ltiplas estrat√©gias
found_key = puzzle_solver.run_multi_strategy_search(num_batches=num_batches)

if found_key:
    print(f"\nüéâüéâüéâ CHAVE ENCONTRADA: {found_key}")
    print(f"üîë Endere√ßo Bitcoin: {PUZZLE_71_CONFIG['target_address']}")
else:
    print("\n‚ö†Ô∏è Chave n√£o encontrada nesta execu√ß√£o. Tente aumentar o n√∫mero de lotes ou alterar estrat√©gias.")
    print(f"üìä Total de chaves verificadas: {puzzle_solver.total_keys_checked:,}")
    
print("\n‚úÖ Execu√ß√£o conclu√≠da!")


In [None]:

import os
import time
import torch
from concurrent.futures import ThreadPoolExecutor

# Assegura que erros CUDA sejam reportados no local exato
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Vari√°veis de ambiente e configura√ß√µes (defina conforme seu ambiente)
IS_KAGGLE = "KAGGLE_URL_BASE" in os.environ  # True se rodando no Kaggle
GPU_COUNT = torch.cuda.device_count()
FORCE_GPU = False  # Se quiser for√ßar uso de GPU mesmo fora do Kaggle

# Configura√ß√µes do Puzzle 71 (defina antes de chamar)
PUZZLE_71_CONFIG = {
    'target_address': 'ENDERE√áO_ALVO_AQUI',
    'min_range': 0x40000000,
    'max_range': 0x7FFFFFFFFFFFFFFF,
    'batch_size': 1024,
    'population_size': 1000
}

# Placeholder para a classe BitcoinPuzzleSolver (importe ou defina antes de usar)
class BitcoinPuzzleSolver:
    def __init__(self, target_address, min_range, max_range, batch_size=None, population_size=None, device=None):
        self.target_address = target_address
        self.min_range = min_range
        self.max_range = max_range
        self.batch_size = batch_size
        self.population_size = population_size
        self.device = device or (torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu"))

    def run_genetic_search(self, max_generations=10):
        best_key = None
        for gen in range(max_generations):
            # L√≥gica do algoritmo gen√©tico na GPU atribu√≠da
            pass
        return best_key

    def run_multi_strategy_search(self, num_batches=100, strategies=None):
        for batch_num in range(num_batches):
            # L√≥gica da busca multi-estrat√©gia na GPU atribu√≠da
            pass
        return None

def run_kaggle_optimized():
    print("üöÄ Iniciando busca otimizada para ambiente Kaggle (Dual-GPU)...")

    genetic_config = {
        'target_address': PUZZLE_71_CONFIG['target_address'],
        'min_range': PUZZLE_71_CONFIG['min_range'],
        'max_range': PUZZLE_71_CONFIG['max_range'],
        'batch_size': PUZZLE_71_CONFIG['batch_size'] // 2,
        'population_size': PUZZLE_71_CONFIG['population_size'] * 2,
        'device': torch.device("cuda:0")
    }

    multi_config = {
        'target_address': PUZZLE_71_CONFIG['target_address'],
        'min_range': PUZZLE_71_CONFIG['min_range'],
        'max_range': PUZZLE_71_CONFIG['max_range'],
        'batch_size': PUZZLE_71_CONFIG['batch_size'] * 2,
        'population_size': PUZZLE_71_CONFIG['population_size'] // 2,
        'device': torch.device("cuda:1")
    }

    with ThreadPoolExecutor(max_workers=2) as executor:
        genetic_solver = BitcoinPuzzleSolver(**genetic_config)
        multi_solver = BitcoinPuzzleSolver(**multi_config)

        print("üß¨ Iniciando solver gen√©tico na GPU 0...")
        print("üîç Iniciando solver multi-estrat√©gia na GPU 1...")

        future_genetic = executor.submit(genetic_solver.run_genetic_search, 50)
        future_multi = executor.submit(multi_solver.run_multi_strategy_search, 500, ['adaptive', 'fibonacci', 'quantum_inspired'])

        result = None
        while True:
            if future_genetic.done():
                try:
                    key_g = future_genetic.result()
                except Exception as e:
                    print(f"‚ùå Erro no solver gen√©tico (GPU 0): {e}")
                    key_g = None
                if key_g:
                    print("üéØ Solver gen√©tico encontrou a chave!")
                    result = key_g
                    break
            if future_multi.done():
                try:
                    key_m = future_multi.result()
                except Exception as e:
                    print(f"‚ùå Erro no solver multi-estrat√©gia (GPU 1): {e}")
                    key_m = None
                if key_m:
                    print("üéØ Solver multi-estrat√©gia encontrou a chave!")
                    result = key_m
                    break
            if future_genetic.done() and future_multi.done():
                break
            time.sleep(1)

        print("\n‚è±Ô∏è Finalizando todos os solvers...")
        return result

if __name__ == "__main__":
    if (IS_KAGGLE and GPU_COUNT > 1) or FORCE_GPU:
        print(f"üñ•Ô∏è Ambiente Kaggle detectado com {GPU_COUNT} GPUs")
        print("‚ñ∂Ô∏è Iniciando execu√ß√£o otimizada para dual-GPU...\n")
        found_key = run_kaggle_optimized()
        if found_key:
            print(f"\nüéâüéâüéâ CHAVE ENCONTRADA: {found_key}")
            print(f"üîë Endere√ßo Bitcoin: {PUZZLE_71_CONFIG['target_address']}")
        else:
            print("\n‚ö†Ô∏è Chave n√£o encontrada. Tente executar novamente.")
    else:
        print("‚öôÔ∏è Executando em modo padr√£o (n√£o-Kaggle ou single-GPU)...")
    print("\n‚úÖ Execu√ß√£o final conclu√≠da!")
