# An√°lise de Resultados de Benchmark

Este notebook analisa todos os CSVs de resultados e gera um resumo estat√≠stico.

## Objetivo
Calcular estat√≠sticas (m√©dia, desvio padr√£o, min, max) agrupadas por:
- Sistema (gRPC ou RabbitMQ)
- Opera√ß√£o (list, upload, download)
- Tamanho do arquivo (KB)
- N√∫mero de clientes


In [None]:
import csv
import os
import statistics
from pathlib import Path
from collections import defaultdict
import pandas as pd


## Configura√ß√£o


In [None]:
# Configura√ß√£o de diret√≥rios
project_root = Path('..')
results_dir = project_root / 'results'
output_file = results_dir / 'results_summary.csv'

print(f"üìÅ Diret√≥rio de resultados: {results_dir.absolute()}")
print(f"üìÑ Arquivo de sa√≠da: {output_file.absolute()}")


## Leitura dos Arquivos CSV


In [None]:
def read_csv_files(results_dir):
    """L√™ todos os arquivos CSV de resultados."""
    all_results = []
    results_path = Path(results_dir)
    
    if not results_path.exists():
        print(f"‚ùå Diret√≥rio {results_dir} n√£o encontrado!")
        return []
    
    csv_files = list(results_path.glob("benchmark_*.csv"))
    
    if not csv_files:
        print(f"‚ö†Ô∏è  Nenhum arquivo CSV encontrado em {results_dir}")
        return []
    
    print(f"üìä Encontrados {len(csv_files)} arquivo(s) CSV")
    
    for csv_file in csv_files:
        try:
            with open(csv_file, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    # Ignora linhas vazias ou inv√°lidas
                    if not row.get('timestamp') or not row.get('success'):
                        continue
                    
                    # Apenas opera√ß√µes bem-sucedidas
                    if row.get('success', '').lower() != 'true':
                        continue
                    
                    all_results.append({
                        'system': row.get('system', ''),
                        'operation': row.get('operation', ''),
                        'file_size_kb': int(row.get('file_size_kb', 0)),
                        'clients': int(row.get('clients', 0)),
                        'rtt_ms': float(row.get('rtt_ms', 0))
                    })
        except Exception as e:
            print(f"‚ö†Ô∏è  Erro ao ler {csv_file.name}: {e}")
            continue
    
    print(f"‚úÖ Total de {len(all_results)} resultados v√°lidos processados")
    return all_results

# L√™ todos os CSVs
results = read_csv_files(results_dir)


## C√°lculo de Estat√≠sticas


In [None]:
def calculate_statistics(results):
    """Calcula estat√≠sticas agrupadas por sistema, opera√ß√£o, tamanho e clientes."""
    # Agrupa resultados por chave √∫nica
    grouped = defaultdict(list)
    
    for result in results:
        key = (
            result['system'],
            result['operation'],
            result['file_size_kb'],
            result['clients']
        )
        grouped[key].append(result['rtt_ms'])
    
    # Calcula estat√≠sticas para cada grupo
    summary = []
    
    for (system, operation, file_size_kb, clients), rtt_values in grouped.items():
        if len(rtt_values) < 2:
            # Precisa de pelo menos 2 valores para desvio padr√£o
            stddev = 0.0
        else:
            stddev = statistics.stdev(rtt_values)
        
        summary.append({
            'system': system,
            'operation': operation,
            'file_size_kb': file_size_kb,
            'clients': clients,
            'mean_ms': statistics.mean(rtt_values),
            'stddev_ms': stddev,
            'min_ms': min(rtt_values),
            'max_ms': max(rtt_values),
            'count': len(rtt_values)
        })
    
    # Ordena por sistema, opera√ß√£o, tamanho, clientes
    summary.sort(key=lambda x: (x['system'], x['operation'], x['file_size_kb'], x['clients']))
    
    return summary

# Calcula estat√≠sticas
print("üìà Calculando estat√≠sticas...")
summary = calculate_statistics(results)
print(f"‚úÖ {len(summary)} combina√ß√µes √∫nicas calculadas")


## Visualiza√ß√£o dos Dados


In [None]:
# Converte para DataFrame para visualiza√ß√£o
df_summary = pd.DataFrame(summary)

# Mostra primeiras linhas
print("üìä Primeiras 10 linhas do resumo:")
display(df_summary.head(10))


In [None]:
# Estat√≠sticas gerais
print("‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ")
print("  Estat√≠sticas Gerais")
print("‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ")

systems = df_summary['system'].unique()
operations = df_summary['operation'].unique()

print(f"Sistemas: {', '.join(sorted(systems))}")
print(f"Opera√ß√µes: {', '.join(sorted(operations))}")
print(f"Total de combina√ß√µes: {len(df_summary)}")
print(f"Total de resultados processados: {df_summary['count'].sum():,}")


In [None]:
# Resumo por sistema
print("\nüìä Resumo por Sistema:")
display(df_summary.groupby('system').agg({
    'mean_ms': 'mean',
    'count': 'sum'
}).round(3))


In [None]:
# Resumo por opera√ß√£o
print("\nüìä Resumo por Opera√ß√£o:")
display(df_summary.groupby('operation').agg({
    'mean_ms': 'mean',
    'count': 'sum'
}).round(3))


## Salvando o CSV de Resumo


In [None]:
def write_summary_csv(summary, output_file):
    """Escreve o CSV de resumo."""
    fieldnames = ['system', 'operation', 'file_size_kb', 'clients', 
                  'mean_ms', 'stddev_ms', 'min_ms', 'max_ms']
    
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        for row in summary:
            writer.writerow({
                'system': row['system'],
                'operation': row['operation'],
                'file_size_kb': row['file_size_kb'],
                'clients': row['clients'],
                'mean_ms': f"{row['mean_ms']:.3f}",
                'stddev_ms': f"{row['stddev_ms']:.3f}",
                'min_ms': f"{row['min_ms']:.3f}",
                'max_ms': f"{row['max_ms']:.3f}"
            })
    
    print(f"‚úÖ Resumo salvo em: {output_file}")
    print(f"üìä Total de {len(summary)} combina√ß√µes √∫nicas")

# Salva o CSV
write_summary_csv(summary, output_file)
