## ‚ö†Ô∏è IMPORTANTE: Execute APENAS no Google Colab

Se voc√™ abriu este notebook localmente no VS Code, ele **n√£o funcionar√°**! Este notebook requer GPU e s√≥ funciona no Google Colab.

**Para usar no Colab:**
1. Fa√ßa upload no Google Drive
2. Abra com Google Colab
3. Execute as c√©lulas em ordem

In [None]:
# üîç DIAGN√ìSTICO: Verificar se est√° no Colab e se resultados do Windows existem
import os

print("="*70)
print("VERIFICA√á√ÉO DE AMBIENTE")
print("="*70)

# Verificar se est√° no Colab
try:
    import google.colab
    print("‚úÖ Executando no Google Colab")
except:
    print("‚ùå N√ÉO est√° no Colab! Este notebook s√≥ funciona no Colab com GPU.")
    print("   Abra este arquivo no Google Colab, n√£o no VS Code local.")

# Verificar diret√≥rio atual
print(f"\nüìÇ Diret√≥rio atual: {os.getcwd()}")

# Verificar se o reposit√≥rio existe
if os.path.exists('k-means-1d'):
    print("‚úÖ Reposit√≥rio k-means-1d encontrado")
    os.chdir('k-means-1d')
    print(f"   Mudou para: {os.getcwd()}")
else:
    print("‚ö†Ô∏è  Reposit√≥rio k-means-1d n√£o encontrado")
    print("   Execute a c√©lula de clone primeiro!")

# Verificar pasta results
print(f"\nüìÅ Conte√∫do de results/:")
if os.path.exists('results'):
    files = os.listdir('results')
    if files:
        for f in sorted(files):
            size = os.path.getsize(f'results/{f}')
            print(f"   - {f} ({size} bytes)")
    else:
        print("   (pasta vazia)")
else:
    print("   ‚ùå Pasta results/ n√£o existe")

# Verificar arquivo espec√≠fico do Windows
windows_csv = 'results/resultados_windows.csv'
if os.path.exists(windows_csv):
    print(f"\n‚úÖ ARQUIVO DO WINDOWS ENCONTRADO!")
    print(f"   Tamanho: {os.path.getsize(windows_csv)} bytes")
    
    # Mostrar primeiras linhas
    with open(windows_csv, 'r') as f:
        lines = f.readlines()[:5]
    print(f"   Primeiras linhas:")
    for line in lines:
        print(f"     {line.strip()}")
else:
    print(f"\n‚ùå Arquivo {windows_csv} N√ÉO encontrado!")
    print("   Poss√≠veis causas:")
    print("   1. Voc√™ n√£o executou 'git pull origin master'")
    print("   2. Voc√™ n√£o est√° na pasta k-means-1d")
    print("   3. O arquivo n√£o foi commitado corretamente")
    
    print("\nüîß SOLU√á√ÉO: Execute isso:")
    print("   !git pull origin master")
    print("   !ls -lh results/")

print("\n" + "="*70)

## 1Ô∏è‚É£ Setup - Clonar Reposit√≥rio e Verificar GPU

In [None]:
# Clonar reposit√≥rio do GitHub
!git clone https://github.com/gabrielamds/k-means-1d.git
%cd k-means-1d

# Verificar GPU dispon√≠vel
!nvidia-smi --query-gpu=name,memory.total,compute_cap --format=csv

## üìã Coletar Informa√ß√µes do Ambiente

Esta c√©lula coleta informa√ß√µes detalhadas do ambiente de execu√ß√£o para o relat√≥rio acad√™mico.

In [None]:
import json
import subprocess
import os

# Coletar informa√ß√µes do ambiente
env_info = {}

# GPU
gpu_info = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], 
                         capture_output=True, text=True).stdout.strip().split(',')
env_info['gpu_name'] = gpu_info[0].strip()
env_info['gpu_memory'] = gpu_info[1].strip()

# CUDA Version
cuda_ver = subprocess.run(['nvcc', '--version'], capture_output=True, text=True).stdout
env_info['cuda_version'] = cuda_ver.split('release ')[-1].split(',')[0] if 'release' in cuda_ver else 'N/A'

# CPU
cpu_info = subprocess.run(['lscpu'], capture_output=True, text=True).stdout
for line in cpu_info.split('\n'):
    if 'Model name:' in line:
        env_info['cpu_model'] = line.split(':')[1].strip()
    elif 'CPU(s):' in line and 'NUMA' not in line and 'On-line' not in line:
        env_info['cpu_cores'] = line.split(':')[1].strip()

# RAM
mem_info = subprocess.run(['free', '-h'], capture_output=True, text=True).stdout.split('\n')[1]
env_info['ram_total'] = mem_info.split()[1]

# GCC Version
gcc_ver = subprocess.run(['gcc', '--version'], capture_output=True, text=True).stdout.split('\n')[0]
env_info['gcc_version'] = gcc_ver.split('gcc ')[-1].split()[0] if 'gcc' in gcc_ver else 'N/A'

# OpenMPI Version
mpi_ver = subprocess.run(['mpirun', '--version'], capture_output=True, text=True).stdout.split('\n')[0]
env_info['mpi_version'] = mpi_ver.strip()

# Salvar em JSON
os.makedirs('results', exist_ok=True)
with open('results/ambiente.json', 'w') as f:
    json.dump(env_info, f, indent=2)

# Mostrar informa√ß√µes
print("="*60)
print("INFORMA√á√ïES DO AMBIENTE")
print("="*60)
for key, value in env_info.items():
    print(f"{key:20s}: {value}")
print("="*60)
print("‚úì Salvo em: results/ambiente.json")

## 2Ô∏è‚É£ Gerar Datasets

Gera os mesmos dados usados no Windows (seeds 42, 43, 44):

In [None]:
# Gerar os 3 datasets
%cd data
!python3 generate_data.py --N 10000 --K 4 --output dados_pequeno --seed 42
!python3 generate_data.py --N 100000 --K 8 --output dados_medio --seed 43
!python3 generate_data.py --N 1000000 --K 16 --output dados_grande --seed 44

# Confirmar que foram criados
print("\nDatasets criados:")
!ls -lh *.csv
%cd ..

## 3Ô∏è‚É£ Compilar CUDA

In [None]:
%%bash
cd cuda
nvcc -O2 -arch=sm_75 kmeans_1d_cuda.cu -o kmeans_1d_cuda
echo "‚úì CUDA compilado com sucesso"
ls -lh kmeans_1d_cuda

---

# üìä PARTE 1: CUDA PURO

## Benchmark CUDA - 3 Datasets

In [None]:
%%bash
echo "========================================="
echo "BENCHMARK: CUDA"
echo "========================================="

echo ""
echo "--- Dataset PEQUENO (10K, K=4) ---"
cuda/kmeans_1d_cuda data/dados_pequeno.csv data/dados_pequeno_centroides_init.csv 50 1e-6 256

echo ""
echo "--- Dataset M√âDIO (100K, K=8) ---"
cuda/kmeans_1d_cuda data/dados_medio.csv data/dados_medio_centroides_init.csv 50 1e-6 256

echo ""
echo "--- Dataset GRANDE (1M, K=16) ---"
cuda/kmeans_1d_cuda data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 256

## Testar Diferentes Configura√ß√µes (Threads per Block)

In [None]:
%%bash
echo "=== Variando Threads per Block (Dataset Grande) ==="
for TPB in 128 256 512 1024; do
    echo ""
    echo "Threads/Block: $TPB"
    cuda/kmeans_1d_cuda data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 $TPB | grep "Tempo"
done

## üìä An√°lise Detalhada: Impacto do Block Size

Testa sistematicamente diferentes block sizes para an√°lise acad√™mica.

In [None]:
import subprocess
import re
import csv
import os
import time

print("="*60)
print("AN√ÅLISE: IMPACTO DO BLOCK SIZE NO CUDA")
print("="*60)
print("‚ö†Ô∏è  Esta c√©lula S√ì funciona no Google Colab com GPU!")
print("="*60)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# VERIFICA√á√ïES OBRIGAT√ìRIAS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

can_proceed = True

# 1. GPU
try:
    gpu_check = subprocess.run('nvidia-smi', shell=True, capture_output=True, text=True)
    if gpu_check.returncode == 0:
        print("‚úì GPU detectada")
    else:
        print("‚ùå GPU n√£o encontrada")
        can_proceed = False
except:
    print("‚ùå nvidia-smi n√£o dispon√≠vel (n√£o est√° no Colab?)")
    can_proceed = False

# 2. Execut√°vel CUDA
if os.path.exists('cuda/kmeans_1d_cuda'):
    print("‚úì Execut√°vel CUDA compilado")
else:
    print("‚ùå Execut√°vel CUDA n√£o encontrado")
    print("   Execute a c√©lula de compila√ß√£o primeiro!")
    can_proceed = False

# 3. Dataset
if os.path.exists('data/dados_grande.csv') and os.path.exists('data/dados_grande_centroides_init.csv'):
    print("‚úì Datasets dispon√≠veis")
else:
    print("‚ùå Datasets n√£o encontrados")
    print("   Execute a c√©lula de gera√ß√£o de datasets primeiro!")
    can_proceed = False

print("="*60)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# SE TUDO OK, EXECUTAR AN√ÅLISE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

if not can_proceed:
    print("\n‚ùå N√ÉO √â POSS√çVEL EXECUTAR A AN√ÅLISE")
    print("\nüìã Corrija os problemas acima e tente novamente.")
    print("\nLEMBRETE: Este notebook s√≥ funciona no Google Colab!")
else:
    print("\n‚úì Todos os requisitos OK! Iniciando an√°lise...\n")
    
    block_sizes = [64, 128, 256, 512, 1024]
    results = []

    for bs in block_sizes:
        print(f"üìä Block size {bs:4d}...", end=" ", flush=True)
        
        # Construir comando com redirecionamento expl√≠cito
        cmd = f"./cuda/kmeans_1d_cuda data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 {bs} 2>&1"
        
        try:
            # Usar Popen para melhor controle do output
            process = subprocess.Popen(
                cmd,
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1,
                universal_newlines=True
            )
            
            # Aguardar com timeout
            stdout, _ = process.communicate(timeout=30)
            returncode = process.returncode
            
            if returncode != 0:
                print(f"‚ùå ERRO (c√≥digo {returncode})")
                # Mostrar mais detalhes do erro
                lines = stdout.split('\n')
                if len(lines) > 5:
                    print(f"   √öltimas linhas:")
                    for line in lines[-5:]:
                        if line.strip():
                            print(f"     {line[:80]}")
                continue
            
            # Procurar linha com "Tempo" no output completo
            tempo_match = None
            sse_match = None
            
            for line in stdout.split('\n'):
                # Procurar por "Tempo Total:", "Tempo total:", "Tempo:", etc.
                if 'Tempo' in line and 'ms' in line:
                    # Extrair n√∫mero antes de "ms"
                    match = re.search(r'([\d.]+)\s*ms', line)
                    if match and not tempo_match:
                        tempo_match = match
                
                # Procurar SSE
                if 'SSE' in line:
                    match = re.search(r'([\d.]+)', line)
                    if match and not sse_match:
                        sse_match = match
            
            if tempo_match:
                tempo = float(tempo_match.group(1))
                sse = float(sse_match.group(1)) if sse_match else 0.0
                print(f"‚úì {tempo:6.2f} ms")
                results.append({'blocksize': bs, 'tempo_ms': tempo, 'sse': sse})
            else:
                print(f"‚ùå Tempo n√£o encontrado no output")
                # Mostrar amostra do output para debug
                print(f"   Tamanho output: {len(stdout)} bytes")
                if len(stdout) > 0:
                    sample = stdout[:300].replace('\n', ' ')
                    print(f"   In√≠cio: {sample[:100]}...")
                    
        except subprocess.TimeoutExpired:
            print(f"‚ùå TIMEOUT")
            process.kill()
        except Exception as e:
            print(f"‚ùå ERRO: {str(e)[:50]}")

    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # MOSTRAR APENAS RESULTADOS V√ÅLIDOS
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    if len(results) >= 3:  # Pelo menos 3 medi√ß√µes v√°lidas
        os.makedirs('results', exist_ok=True)
        with open('results/cuda_blocksize.csv', 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=['blocksize', 'tempo_ms', 'sse'])
            writer.writeheader()
            writer.writerows(results)
        
        print("\n" + "="*60)
        print(f"RESULTADOS V√ÅLIDOS ({len(results)}/5):")
        print("="*60)
        print(f"{'Block Size':<15} {'Tempo (ms)':<15} {'SSE'}")
        print("-"*60)
        for r in results:
            print(f"{r['blocksize']:<15} {r['tempo_ms']:<15.2f} {r['sse']:.2f}")
        
        best = min(results, key=lambda x: x['tempo_ms'])
        worst = max(results, key=lambda x: x['tempo_ms'])
        speedup = worst['tempo_ms'] / best['tempo_ms']
        
        print("="*60)
        print(f"‚úì Melhor:  {best['blocksize']} threads ‚Üí {best['tempo_ms']:.2f} ms")
        print(f"‚úì Pior:    {worst['blocksize']} threads ‚Üí {worst['tempo_ms']:.2f} ms")
        print(f"‚úì Ganho:   {speedup:.2f}x")
        print(f"‚úì Salvo:   results/cuda_blocksize.csv")
        print("="*60)
    else:
        print("\n" + "="*60)
        print(f"‚ùå AN√ÅLISE INCOMPLETA ({len(results)}/5 v√°lidos)")
        print("="*60)
        if len(results) > 0:
            print("\nResultados parciais obtidos:")
            for r in results:
                print(f"  - Block {r['blocksize']}: {r['tempo_ms']:.2f} ms")
        print("\n‚ö†Ô∏è  Arquivo N√ÉO foi salvo (necess√°rio m√≠nimo 3 medi√ß√µes)")
        print("\nüîç Poss√≠veis causas:")
        print("   - Execut√°vel travando durante execu√ß√£o")
        print("   - Mem√≥ria GPU insuficiente")
        print("   - Arquivos CSV corrompidos")
        print("\nüí° Tente executar manualmente:")
        print("   !./cuda/kmeans_1d_cuda data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 256")

---

# √∞≈∏‚Äú≈† PARTE 2: HYBRID (OpenMP + CUDA)

## Compilar OpenMP + CUDA

In [None]:
%%bash
cd hybrid
nvcc -O2 -arch=sm_75 -Xcompiler -fopenmp kmeans_1d_omp_cuda.cu -o kmeans_1d_omp_cuda
echo "√¢≈ì‚Äú OpenMP + CUDA compilado"
ls -lh kmeans_1d_omp_cuda

## Benchmark OpenMP + CUDA

In [None]:
%%bash
echo "========================================="
echo "BENCHMARK: HYBRID (OpenMP + CUDA)"
echo "========================================="

datasets=("pequeno:10K:4" "medio:100K:8" "grande:1M:16")

for ds in "${datasets[@]}"; do
    IFS=':' read -r name N K <<< "$ds"
    echo ""
    echo "=== Dataset: ${name^^} (N=$N, K=$K) ==="
    
    for THREADS in 1 2 4 8; do
        echo "  OpenMP $THREADS threads + CUDA:"
        OMP_NUM_THREADS=$THREADS hybrid/kmeans_1d_omp_cuda \
            data/dados_$name.csv data/dados_${name}_centroides_init.csv \
            50 1e-6 256 | grep "Tempo"
    done
done

---

# √∞≈∏‚Äú≈† PARTE 4: HYBRID (MPI + CUDA)

In [None]:
%%bash
echo "========================================="
echo "BENCHMARK: HYBRID (OpenMP + MPI)"
echo "========================================="

datasets=("pequeno:10K:4" "medio:100K:8" "grande:1M:16")

for ds in "${datasets[@]}"; do
    IFS=':' read -r name N K <<< "$ds"
    echo ""
    echo "=== Dataset: ${name^^} (N=$N, K=$K) ==="
    
    echo "  1 processo MPI x 2 threads OpenMP:"
    OMP_NUM_THREADS=2 mpirun -np 1 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
    
    echo "  2 processos MPI x 1 thread OpenMP:"
    OMP_NUM_THREADS=1 mpirun -np 2 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
    
    echo "  2 processos MPI x 2 threads OpenMP:"
    OMP_NUM_THREADS=2 mpirun -np 2 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
done

## 3. Benchmark OpenMP + MPI (autom√É¬°tico com salvamento)

In [None]:
%%bash
cd hybrid
mpicc -O2 -fopenmp kmeans_1d_omp_mpi.c -o kmeans_1d_omp_mpi -lm
echo "√¢≈ì‚Äú OpenMP + MPI compilado"
ls -lh kmeans_1d_omp_mpi

## 2. Compilar OpenMP + MPI

---

# √∞≈∏‚Äú≈† PARTE 3: HYBRID (OpenMP + MPI) - CPU apenas

## 1. Instalar OpenMPI

In [None]:
%%bash
echo "========================================="
echo "BENCHMARK: HYBRID (OpenMP + MPI)"
echo "========================================="

datasets=("pequeno:10K:4" "medio:100K:8" "grande:1M:16")

for ds in "${datasets[@]}"; do
    IFS=':' read -r name N K <<< "$ds"
    echo ""
    echo "=== Dataset: ${name^^} (N=$N, K=$K) ==="
    
    # Teste com diferentes combina√É¬ß√É¬µes MPI x OpenMP
    echo ""
    echo "  1 processo MPI x 2 threads OpenMP:"
    OMP_NUM_THREADS=2 mpirun -np 1 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
    
    echo "  2 processos MPI x 1 thread OpenMP:"
    OMP_NUM_THREADS=1 mpirun -np 2 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
    
    echo "  2 processos MPI x 2 threads OpenMP:"
    OMP_NUM_THREADS=2 mpirun -np 2 --allow-run-as-root --oversubscribe \
        hybrid/kmeans_1d_omp_mpi \
        data/dados_$name.csv data/dados_${name}_centroides_init.csv \
        50 1e-6 | grep "Tempo"
done

In [None]:
import subprocess
import re
import csv
import os

print("========================================")
print("EXECUTANDO BENCHMARKS: OpenMP+MPI")
print("========================================")

# Criar diret√É¬≥rio de resultados se n√É¬£o existir
os.makedirs('results', exist_ok=True)

# Armazenar resultados
resultados = []

datasets = [
    ("pequeno", "10K", "4"),
    ("medio", "100K", "8"),
    ("grande", "1M", "16")
]

configs = [
    ("2t1p", 1, 2),  # 1 processo, 2 threads
    ("1t2p", 2, 1),  # 2 processos, 1 thread
    ("2t2p", 2, 2),  # 2 processos, 2 threads
]

for name, N, K in datasets:
    print(f"\n=== Dataset: {name.upper()} (N={N}, K={K}) ===")
    
    for config_name, nprocs, nthreads in configs:
        print(f"  Config {config_name} ({nprocs}p x {nthreads}t): ", end="", flush=True)
        
        cmd = [
            "mpirun", "-np", str(nprocs), 
            "--allow-run-as-root", "--oversubscribe",
            "hybrid/kmeans_1d_omp_mpi",
            f"data/dados_{name}.csv",
            f"data/dados_{name}_centroides_init.csv",
            "50", "1e-6"
        ]
        
        env = os.environ.copy()
        env["OMP_NUM_THREADS"] = str(nthreads)
        
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, env=env, timeout=60)
            
            # Extrair tempo do output (flex√É¬≠vel: aceita "Tempo:" ou "Tempo total:")
            match = re.search(r'Tempo(?:\s+total)?:\s*([\d.]+)\s*ms', result.stdout)
            if match:
                tempo = float(match.group(1))
                print(f"{tempo:.2f} ms")
                
                # Extrair SSE
                sse_match = re.search(r'SSE final:\s*([\d.]+)', result.stdout)
                sse = float(sse_match.group(1)) if sse_match else 0.0
                
                resultados.append({
                    'dataset': name,
                    'implementacao': 'OpenMP+MPI',
                    'config': config_name,
                    'tempo_ms': tempo,
                    'sse': sse
                })
            else:
                print("ERRO: n√É¬£o encontrou tempo no output")
                print(result.stdout)
        except Exception as e:
            print(f"ERRO: {e}")

# Salvar resultados em CSV
if resultados:
    csv_file = 'results/resultados_openmp_mpi.csv'
    with open(csv_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['dataset', 'implementacao', 'config', 'tempo_ms', 'sse'])
        writer.writeheader()
        writer.writerows(resultados)
    
    print(f"\n√¢≈ì‚Äú Resultados salvos em: {csv_file}")
    print(f"√¢≈ì‚Äú Total de {len(resultados)} medi√É¬ß√É¬µes coletadas")
else:
    print("\n√¢≈°¬†√Ø¬∏¬è  Nenhum resultado foi coletado!")

In [None]:
!apt-get update -qq
!apt-get install -y openmpi-bin libopenmpi-dev -qq
print("√¢≈ì‚Äú OpenMPI instalado")

## Compilar MPI + CUDA

In [None]:
%%bash
cd hybrid
nvcc -O2 -arch=sm_75 -I/usr/lib/x86_64-linux-gnu/openmpi/include \
    kmeans_1d_mpi_cuda.cu -o kmeans_1d_mpi_cuda \
    -L/usr/lib/x86_64-linux-gnu/openmpi/lib -lmpi
echo "√¢≈ì‚Äú MPI + CUDA compilado"
ls -lh kmeans_1d_mpi_cuda

## Benchmark MPI + CUDA

In [None]:
%%bash
echo "========================================="
echo "BENCHMARK: HYBRID (MPI + CUDA)"
echo "========================================="

datasets=("pequeno:10K:4" "medio:100K:8" "grande:1M:16")

for ds in "${datasets[@]}"; do
    IFS=':' read -r name N K <<< "$ds"
    echo ""
    echo "=== Dataset: ${name^^} (N=$N, K=$K) ==="
    
    for PROCS in 1 2 4; do
        echo "  MPI $PROCS processos + CUDA:"
        mpirun -np $PROCS --allow-run-as-root --oversubscribe hybrid/kmeans_1d_mpi_cuda \
            data/dados_$name.csv data/dados_${name}_centroides_init.csv \
            50 1e-6 256 | grep "Tempo"
    done
done

---

# √∞≈∏‚Äú≈† COMPARA√É‚Ä°√É∆íO FINAL

In [None]:
import subprocess
import re
import pandas as pd

def extract_time(cmd):
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    match = re.search(r'Tempo.*?:\s*([\d.]+)\s*ms', result.stdout)
    return float(match.group(1)) if match else None

print("="*70)
print("RESUMO COMPARATIVO - Dataset GRANDE (1M pontos, K=16)")
print("="*70 + "\n")

results = []

# CUDA Puro
print("Testando CUDA...")
time_cuda = extract_time("cuda/kmeans_1d_cuda data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 256")
results.append(('CUDA (256 TPB)', time_cuda))

# OpenMP + CUDA
for t in [1, 2, 4, 8]:
    print(f"Testando OpenMP({t}t) + CUDA...")
    time_hybrid = extract_time(
        f"OMP_NUM_THREADS={t} hybrid/kmeans_1d_omp_cuda "
        f"data/dados_grande.csv data/dados_grande_centroides_init.csv "
        f"50 1e-6 {t} 256"
    )
    results.append((f'OpenMP({t}t) + CUDA', time_hybrid))

# OpenMP + MPI (CPU apenas)
print("Testando OpenMP(2t) + MPI(1p)...")
time_hybrid = extract_time(
    "OMP_NUM_THREADS=2 mpirun -np 1 --allow-run-as-root --oversubscribe "
    "hybrid/kmeans_1d_omp_mpi "
    "data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6"
)
results.append(('OpenMP(2t) + MPI(1p)', time_hybrid))

print("Testando OpenMP(1t) + MPI(2p)...")
time_hybrid = extract_time(
    "OMP_NUM_THREADS=1 mpirun -np 2 --allow-run-as-root --oversubscribe "
    "hybrid/kmeans_1d_omp_mpi "
    "data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6"
)
results.append(('OpenMP(1t) + MPI(2p)', time_hybrid))

print("Testando OpenMP(2t) + MPI(2p)...")
time_hybrid = extract_time(
    "OMP_NUM_THREADS=2 mpirun -np 2 --allow-run-as-root --oversubscribe "
    "hybrid/kmeans_1d_omp_mpi "
    "data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6"
)
results.append(('OpenMP(2t) + MPI(2p)', time_hybrid))

# MPI + CUDA
for p in [1, 2, 4]:
    print(f"Testando MPI({p}p) + CUDA...")
    time_hybrid = extract_time(
        f"mpirun -np {p} --allow-run-as-root --oversubscribe "
        f"hybrid/kmeans_1d_mpi_cuda "
        f"data/dados_grande.csv data/dados_grande_centroides_init.csv 50 1e-6 256"
    )
    results.append((f'MPI({p}p) + CUDA', time_hybrid))

# Criar tabela
df = pd.DataFrame(results, columns=['Implementa√É¬ß√É¬£o', 'Tempo (ms)'])
df = df[df['Tempo (ms)'].notna()]

print("\n" + "="*70)
print(df.to_string(index=False))
print("="*70)

# Encontrar o mais r√É¬°pido
if not df.empty:
    fastest = df.loc[df['Tempo (ms)'].idxmin()]
    print(f"\n√∞≈∏¬è‚Ä† Mais r√É¬°pido: {fastest['Implementa√É¬ß√É¬£o']} com {fastest['Tempo (ms)']:.2f} ms")

---

## √¢≈ì‚Ä¶ Benchmark Completo!

Todos os testes foram executados com:
- √¢≈ì‚Ä¶ CUDA puro (GPU)
- √¢≈ì‚Ä¶ OpenMP + CUDA (CPU multi-thread + GPU)
- √¢≈ì‚Ä¶ MPI + CUDA (Distribu√É¬≠do + GPU)

**Pr√É¬≥ximos passos:**
1. Analisar os resultados
2. Comparar com resultados do Windows (Serial, OpenMP, MPI)
3. Gerar gr√É¬°ficos de speedup
4. Documentar no relat√É¬≥rio

---

# √∞≈∏‚Äô¬æ SALVAR TODOS OS RESULTADOS

Execute esta c√É¬©lula para consolidar todos os resultados em um √É¬∫nico arquivo CSV que o script de an√É¬°lise ir√É¬° usar automaticamente.

---

# √¢≈ì‚Ä¶ VALIDA√É‚Ä°√É∆íO DE RESULTADOS

Verifica a consist√É¬™ncia do SSE entre todas as implementa√É¬ß√É¬µes.

In [None]:
import pandas as pd
import numpy as np
import os

print("="*70)
print("VALIDA√É‚Ä°√É∆íO: CONSIST√É≈†NCIA DE SSE ENTRE IMPLEMENTA√É‚Ä°√É‚Ä¢ES")
print("="*70)

# Verificar se existem resultados consolidados
results_files = []
if os.path.exists('results/resultados_colab.csv'):
    results_files.append('results/resultados_colab.csv')
if os.path.exists('results/resultados_windows.csv'):
    results_files.append('results/resultados_windows.csv')
if os.path.exists('results/resultados_openmp_mpi.csv'):
    results_files.append('results/resultados_openmp_mpi.csv')

if not results_files:
    print("\n√¢≈°¬†√Ø¬∏¬è  Nenhum arquivo de resultados encontrado!")
    print("Execute as c√É¬©lulas de benchmark antes da valida√É¬ß√É¬£o.")
else:
    # Carregar todos os resultados
    dfs = []
    for f in results_files:
        df = pd.read_csv(f)
        dfs.append(df)
    
    df_all = pd.concat(dfs, ignore_index=True)
    
    # Validar por dataset
    for dataset in ['pequeno', 'medio', 'grande']:
        df_ds = df_all[df_all['dataset'] == dataset].copy()
        
        if df_ds.empty:
            continue
        
        # Filtrar SSE v√É¬°lidos (> 0)
        df_ds = df_ds[df_ds['sse'] > 0]
        
        if df_ds.empty:
            continue
        
        print(f"\n{'='*70}")
        print(f"Dataset: {dataset.upper()}")
        print('='*70)
        
        # Agrupar por implementa√É¬ß√É¬£o
        sse_summary = df_ds.groupby('implementacao')['sse'].agg(['mean', 'std', 'min', 'max']).reset_index()
        
        print(f"\n{'Implementa√É¬ß√É¬£o':<20} {'SSE M√É¬©dio':<15} {'Desvio Padr√É¬£o':<15}")
        print('-'*70)
        for _, row in sse_summary.iterrows():
            print(f"{row['implementacao']:<20} {row['mean']:<15.2f} {row['std']:<15.6f}")
        
        # Calcular varia√É¬ß√É¬£o percentual
        sse_min = sse_summary['mean'].min()
        sse_max = sse_summary['mean'].max()
        variacao = ((sse_max - sse_min) / sse_min) * 100
        
        print(f"\n{'='*70}")
        print(f"Varia√É¬ß√É¬£o entre implementa√É¬ß√É¬µes: {variacao:.6f}%")
        
        if variacao < 0.001:
            print("√¢≈ì‚Ä¶ VALIDADO: Todas implementa√É¬ß√É¬µes convergem para mesmo resultado")
        elif variacao < 0.1:
            print("√¢≈°¬†√Ø¬∏¬è  ACEIT√É¬ÅVEL: Pequenas diferen√É¬ßas num√É¬©ricas (toler√É¬¢ncia OK)")
        else:
            print("√¢¬ù≈í ATEN√É‚Ä°√É∆íO: Diferen√É¬ßas significativas detectadas!")

print(f"\n{'='*70}")
print("√¢≈ì‚Äú Valida√É¬ß√É¬£o completa")

In [None]:
import subprocess
import re
import csv
import os
import pandas as pd

print("="*60)
print("EXECUTAR E CONSOLIDAR TODOS OS BENCHMARKS")
print("="*60)

# Criar diret√É¬≥rio de resultados
os.makedirs('results', exist_ok=True)

all_results = []

datasets = [
    ("pequeno", "10K", "4"),
    ("medio", "100K", "8"),
    ("grande", "1M", "16")
]

# ============================================
# 1. CUDA PURO
# ============================================
print("\n[1/4] CUDA...")
for name, N, K in datasets:
    print(f"  {name.upper()}: ", end="", flush=True)
    cmd = f"cuda/kmeans_1d_cuda data/dados_{name}.csv data/dados_{name}_centroides_init.csv 50 1e-6 256"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    
    match = re.search(r'Tempo(?:\s+total)?:\s*([\d.]+)\s*ms', result.stdout)
    sse_match = re.search(r'SSE final:\s*([\d.]+)', result.stdout)
    
    if match:
        tempo = float(match.group(1))
        sse = float(sse_match.group(1)) if sse_match else 0.0
        print(f"{tempo:.2f} ms")
        all_results.append({'dataset': name, 'implementacao': 'CUDA', 'config': '-', 'tempo_ms': tempo, 'sse': sse})
    else:
        print("ERRO")

# ============================================
# 2. OpenMP + CUDA
# ============================================
print("\n[2/4] OpenMP+CUDA...")
for name, N, K in datasets:
    print(f"  {name.upper()}")
    for threads in [1, 2, 4, 8]:
        print(f"    {threads}t: ", end="", flush=True)
        env = os.environ.copy()
        env['OMP_NUM_THREADS'] = str(threads)
        cmd = f"hybrid/kmeans_1d_omp_cuda data/dados_{name}.csv data/dados_{name}_centroides_init.csv 50 1e-6 256"
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, env=env)
        
        match = re.search(r'Tempo(?:\s+total)?:\s*([\d.]+)\s*ms', result.stdout)
        sse_match = re.search(r'SSE final:\s*([\d.]+)', result.stdout)
        
        if match:
            tempo = float(match.group(1))
            sse = float(sse_match.group(1)) if sse_match else 0.0
            print(f"{tempo:.2f} ms")
            all_results.append({'dataset': name, 'implementacao': 'OpenMP+CUDA', 'config': f'{threads}t', 'tempo_ms': tempo, 'sse': sse})
        else:
            print("ERRO")

# ============================================
# 3. MPI + CUDA
# ============================================
print("\n[3/4] MPI+CUDA...")
for name, N, K in datasets:
    print(f"  {name.upper()}: ", end="", flush=True)
    cmd = f"mpirun -np 1 --allow-run-as-root hybrid/kmeans_1d_mpi_cuda data/dados_{name}.csv data/dados_{name}_centroides_init.csv 50 1e-6 256"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    
    match = re.search(r'Tempo(?:\s+total)?:\s*([\d.]+)\s*ms', result.stdout)
    sse_match = re.search(r'SSE final:\s*([\d.]+)', result.stdout)
    
    if match:
        tempo = float(match.group(1))
        sse = float(sse_match.group(1)) if sse_match else 0.0
        print(f"{tempo:.2f} ms")
        all_results.append({'dataset': name, 'implementacao': 'MPI+CUDA', 'config': '1p', 'tempo_ms': tempo, 'sse': sse})
    else:
        print("ERRO")

# ============================================
# 4. Carregar OpenMP+MPI (se existir)
# ============================================
print("\n[4/4] OpenMP+MPI...")
if os.path.exists('results/resultados_openmp_mpi.csv'):
    df_omp_mpi = pd.read_csv('results/resultados_openmp_mpi.csv')
    print(f"  √¢≈ì‚Äú Carregado: {len(df_omp_mpi)} medi√É¬ß√É¬µes")
    all_results.extend(df_omp_mpi.to_dict('records'))
else:
    print("  √¢≈°¬†√Ø¬∏¬è  Arquivo n√É¬£o encontrado. Execute a c√É¬©lula de OpenMP+MPI primeiro!")

# ============================================
# SALVAR RESULTADOS
# ============================================
if all_results:
    df_final = pd.DataFrame(all_results)
    output_file = 'results/resultados_colab.csv'
    df_final.to_csv(output_file, index=False)
    
    print(f"\n{'='*60}")
    print(f"√¢≈ì‚Ä¶ RESULTADOS CONSOLIDADOS")
    print(f"{'='*60}")
    print(f"Arquivo: {output_file}")
    print(f"Total: {len(df_final)} medi√É¬ß√É¬µes")
    print(f"\nResumo por implementa√É¬ß√É¬£o:")
    print(df_final.groupby('implementacao').size())
    print(f"\n{'='*60}")
    print("√¢≈ì‚Ä¶ Agora execute a pr√É¬≥xima c√É¬©lula para an√É¬°lise completa")
    print(f"{'='*60}")
else:
    print("\n√¢≈°¬†√Ø¬∏¬è  NENHUM RESULTADO COLETADO!")


---

# √∞≈∏‚ÄúÀÜ AN√É¬ÅLISE ACAD√É≈†MICA COMPLETA

Executa o script de an√É¬°lise completa que gera todos os gr√É¬°ficos e relat√É¬≥rio detalhado.

In [None]:
!python scripts/analise_academica.py

---

# √∞≈∏‚Äú≈† VISUALIZAR RESULTADOS NO COLAB

Visualize todos os gr√É¬°ficos e o relat√É¬≥rio diretamente no notebook.

In [None]:
from IPython.display import Image, display, Markdown
import os

print("="*70)
print("VISUALIZANDO GR√É¬ÅFICOS GERADOS")
print("="*70)

# Lista de gr√É¬°ficos
graficos = [
    ('01_speedup_comparativo.png', '√∞≈∏‚Äú≈† Speedup Comparativo - Todas Implementa√É¬ß√É¬µes'),
    ('02_throughput.png', '√¢≈°¬° Throughput (Milh√É¬µes de Pontos/Segundo)'),
    ('03_openmp_scaling.png', '√∞≈∏‚Äù‚Äû Escalabilidade OpenMP'),
    ('04_mpi_scaling.png', '√∞≈∏≈í¬ê Escalabilidade MPI'),
    ('05_cuda_blocksize.png', '√∞≈∏≈Ω¬Ø CUDA: Impacto do Block Size'),
    ('06_hibridas_comparacao.png', '√∞≈∏‚Äù‚Ç¨ Compara√É¬ß√É¬£o de Implementa√É¬ß√É¬µes H√É¬≠bridas')
]

# Mostrar cada gr√É¬°fico
for arquivo, titulo in graficos:
    caminho = f'results/{arquivo}'
    if os.path.exists(caminho):
        print(f"\n{'='*70}")
        print(f"{titulo}")
        print('='*70)
        display(Image(filename=caminho, width=900))
    else:
        print(f"\n√¢≈°¬†√Ø¬∏¬è  Gr√É¬°fico n√É¬£o encontrado: {arquivo}")

print("\n" + "="*70)
print("√¢≈ì‚Äú Visualiza√É¬ß√É¬£o completa!")
print("="*70)

## √∞≈∏‚Äú‚Äû Visualizar Relat√É¬≥rio Completo

In [None]:
from IPython.display import Markdown, display
import os

# Ler e exibir o relat√É¬≥rio
relatorio_path = 'results/RELATORIO_COMPLETO.md'

if os.path.exists(relatorio_path):
    print("="*70)
    print("RELAT√É‚ÄúRIO COMPLETO - K-MEANS 1D PARALELO")
    print("="*70)
    print()
    
    with open(relatorio_path, 'r', encoding='utf-8') as f:
        conteudo = f.read()
    
    # Exibir como Markdown formatado
    display(Markdown(conteudo))
    
    print("\n" + "="*70)
    print("√¢≈ì‚Äú Relat√É¬≥rio exibido com sucesso!")
    print("="*70)
else:
    print("√¢≈°¬†√Ø¬∏¬è  Relat√É¬≥rio n√É¬£o encontrado!")
    print("Execute a c√É¬©lula de an√É¬°lise completa primeiro.")

## √∞≈∏‚Äú≈† Resumo R√É¬°pido dos Resultados

In [None]:
import pandas as pd
import os

print("="*70)
print("RESUMO R√É¬ÅPIDO - MELHORES RESULTADOS")
print("="*70)

# Carregar resultados
results_files = []
if os.path.exists('results/resultados_colab.csv'):
    results_files.append(pd.read_csv('results/resultados_colab.csv'))
if os.path.exists('results/resultados_windows.csv'):
    results_files.append(pd.read_csv('results/resultados_windows.csv'))

if results_files:
    df = pd.concat(results_files, ignore_index=True)
    
    # Dataset grande
    df_grande = df[df['dataset'] == 'grande'].copy()
    
    if not df_grande.empty:
        # Melhor de cada implementa√É¬ß√É¬£o
        print("\n√∞≈∏¬è‚Ä† DATASET GRANDE (1M pontos) - Melhor configura√É¬ß√É¬£o:")
        print("="*70)
        
        for impl in ['Serial', 'OpenMP', 'MPI', 'OpenMP+MPI', 'CUDA', 'OpenMP+CUDA', 'MPI+CUDA']:
            df_impl = df_grande[df_grande['implementacao'] == impl]
            if not df_impl.empty:
                best_idx = df_impl['tempo_ms'].idxmin()
                tempo = df_impl.loc[best_idx, 'tempo_ms']
                config = df_impl.loc[best_idx, 'config']
                sse = df_impl.loc[best_idx, 'sse']
                
                # Calcular speedup
                t_serial = df_grande[df_grande['implementacao'] == 'Serial']['tempo_ms'].min()
                if pd.notna(t_serial) and t_serial > 0:
                    speedup = t_serial / tempo
                    print(f"{impl:15s} | {config:8s} | {tempo:8.2f} ms | {speedup:5.2f}x | SSE: {sse:,.0f}")
                else:
                    print(f"{impl:15s} | {config:8s} | {tempo:8.2f} ms | SSE: {sse:,.0f}")
        
        # Campe√É¬£o absoluto
        best_overall = df_grande.loc[df_grande['tempo_ms'].idxmin()]
        print("\n" + "="*70)
        print(f"√∞≈∏¬•‚Ä° CAMPE√É∆íO ABSOLUTO: {best_overall['implementacao']} ({best_overall['config']})")
        print(f"   Tempo: {best_overall['tempo_ms']:.2f} ms")
        if pd.notna(t_serial) and t_serial > 0:
            print(f"   Speedup: {t_serial/best_overall['tempo_ms']:.2f}x vs Serial")
        print("="*70)
    
    # Informa√É¬ß√É¬µes do ambiente
    if os.path.exists('results/ambiente.json'):
        import json
        with open('results/ambiente.json', 'r') as f:
            env = json.load(f)
        
        print("\n√∞≈∏‚Äú‚Äπ AMBIENTE:")
        print("="*70)
        print(f"GPU:  {env.get('gpu_name', 'N/A')} ({env.get('gpu_memory', 'N/A')})")
        print(f"CPU:  {env.get('cpu_model', 'N/A')[:50]}...")
        print(f"CUDA: {env.get('cuda_version', 'N/A')}")
        print("="*70)
    
else:
    print("\n√¢≈°¬†√Ø¬∏¬è  Nenhum arquivo de resultados encontrado!")
    print("Execute os benchmarks primeiro.")

print("\n√¢≈ì‚Äú An√É¬°lise r√É¬°pida conclu√É¬≠da!")

---

# √∞≈∏‚Äú¬• DOWNLOAD DOS RESULTADOS

Use esta c√É¬©lula para baixar todos os gr√É¬°ficos e o relat√É¬≥rio completo para seu computador.

In [None]:
from google.colab import files

# Download dos gr√É¬°ficos
print("Baixando gr√É¬°ficos...")
files.download('results/01_speedup_comparativo.png')
files.download('results/02_throughput.png')
files.download('results/03_openmp_scaling.png')
files.download('results/04_mpi_scaling.png')
files.download('results/05_cuda_blocksize.png')
files.download('results/06_hibridas_comparacao.png')

# Download do relat√É¬≥rio
print("\nBaixando relat√É¬≥rio...")
files.download('results/RELATORIO_COMPLETO.md')

# Download dos CSVs
print("\nBaixando dados brutos...")
files.download('results/resultados_colab.csv')
files.download('results/ambiente.json')
files.download('results/cuda_blocksize.csv')

print("\n√¢≈ì‚Ä¶ Download completo! Arquivos prontos para o trabalho acad√É¬™mico.")