Deve ser executado no colab, só por questão de que foi usado o openmp por lá

In [None]:
from sklearn.datasets import load_iris
import numpy as np

data = load_iris()
np.savetxt("iris_data.txt", data.data, fmt="%.4f")
np.savetxt("iris_labels.txt", data.target, fmt="%d")

In [None]:
%%writefile benchmark_openmp_dunn.c
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include <math.h>
#include <float.h>
#include <string.h>

#define MAX_POINTS 200
#define MAX_CLUSTERS 10
#define DIM 4

float euclidean_distance(float *a, float *b, int dim) {
    float sum = 0.0f;
    for (int i = 0; i < dim; i++) {
        float diff = a[i] - b[i];
        sum += diff * diff;
    }
    return sqrtf(sum);
}

float dunn_index_openmp(float *data, int *labels, int n, int dim,
                        int *out_clusters, int *out_counts, float *out_diameters, float *out_delta) {
    float delta_min = FLT_MAX;
    float diameter_max = 0.0f;
    int unique_labels[MAX_CLUSTERS];
    int num_clusters = 0;

    // Descobre os rótulos únicos e suas contagens
    for (int i = 0; i < n; i++) {
        int found = 0;
        for (int j = 0; j < num_clusters; j++) {
            if (labels[i] == unique_labels[j]) {
                out_counts[j]++;
                found = 1;
                break;
            }
        }
        if (!found) {
            unique_labels[num_clusters] = labels[i];
            out_counts[num_clusters] = 1;
            num_clusters++;
        }
    }

    // Copia clusters para retorno
    for (int i = 0; i < num_clusters; i++) {
        out_clusters[i] = unique_labels[i];
    }

    // Distância mínima entre clusters (delta)
    #pragma omp parallel for collapse(2) reduction(min:delta_min)
    for (int i = 0; i < num_clusters; i++) {
        for (int j = i + 1; j < num_clusters; j++) {
            for (int p = 0; p < n; p++) {
                if (labels[p] != unique_labels[i]) continue;
                for (int q = 0; q < n; q++) {
                    if (labels[q] != unique_labels[j]) continue;
                    float dist = euclidean_distance(&data[p * dim], &data[q * dim], dim);
                    if (dist < delta_min) delta_min = dist;
                }
            }
        }
    }

    // Diâmetro de cada cluster
    #pragma omp parallel for
    for (int i = 0; i < num_clusters; i++) {
        float local_diameter = 0.0f;
        for (int p = 0; p < n; p++) {
            if (labels[p] != unique_labels[i]) continue;
            for (int q = p + 1; q < n; q++) {
                if (labels[q] != unique_labels[i]) continue;
                float dist = euclidean_distance(&data[p * dim], &data[q * dim], dim);
                if (dist > local_diameter) local_diameter = dist;
            }
        }
        out_diameters[i] = local_diameter;
        #pragma omp critical
        {
            if (local_diameter > diameter_max)
                diameter_max = local_diameter;
        }
    }

    *out_delta = delta_min;
    return delta_min / diameter_max;
}

int main() {
    float data[MAX_POINTS * DIM];
    int labels[MAX_POINTS];
    int sizes[] = {30, 60, 90, 120, 150};
    int dim = 4;

    FILE *f_data = fopen("iris_data.txt", "r");
    FILE *f_label = fopen("iris_labels.txt", "r");
    if (!f_data || !f_label) {
        printf("Erro ao abrir arquivos iris_data.txt ou iris_labels.txt.\n");
        return 1;
    }

    for (int i = 0; i < 150; i++) {
        if (fscanf(f_label, "%d", &labels[i]) != 1) {
            fprintf(stderr, "Erro ao ler label %d\n", i);
            return 1;
        }
        for (int j = 0; j < dim; j++) {
            if (fscanf(f_data, "%f", &data[i * dim + j]) != 1) {
                fprintf(stderr, "Erro ao ler dado (%d, %d)\n", i, j);
                return 1;
            }
        }
    }
    fclose(f_data);
    fclose(f_label);

    FILE *log = fopen("resultado_benchmark.tsv", "w");
    fprintf(log, "tamanho\ttempo\tindice_dunn\tdelta_min\n");

    printf("Tamanho\tTempo_OpenMP(s)\tDunn_Index\n");
    for (int s = 0; s < 5; s++) {
        int size = sizes[s];

        // Verifica se há ao menos 2 clusters diferentes
        int has_diff = 0;
        for (int i = 0; i < size - 1; i++) {
            for (int j = i + 1; j < size; j++) {
                if (labels[i] != labels[j]) {
                    has_diff = 1;
                    break;
                }
            }
            if (has_diff) break;
        }
        if (!has_diff) continue;

        int clusters[MAX_CLUSTERS] = {0};
        int counts[MAX_CLUSTERS] = {0};
        float diameters[MAX_CLUSTERS] = {0.0f};
        float delta_min = 0.0f;

        double start = omp_get_wtime();
        float dunn = dunn_index_openmp(data, labels, size, dim, clusters, counts, diameters, &delta_min);
        double end = omp_get_wtime();

        printf("%d\t%.6f\t\t%.4f\n", size, end - start, dunn);
        fprintf(log, "%d\t%.6f\t%.4f\t%.4f\n", size, end - start, dunn, delta_min);

        printf("Clusters:\n");
        for (int i = 0; i < MAX_CLUSTERS && counts[i] > 0; i++) {
            printf("  Label %d: %d pontos, diâmetro = %.4f\n", clusters[i], counts[i], diameters[i]);
        }
        printf("Distância mínima entre clusters = %.4f\n", delta_min);
        printf("--------------------------------------\n");
    }

    fclose(log);
    return 0;
}

In [None]:
!gcc -fopenmp benchmark_openmp_dunn.c -o bench_openmp -O2 -lm
!./bench_openmp

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Carregar os dados
df = pd.read_csv("resultado_benchmark_openmp.tsv", sep="\t")

# Plot
fig, ax1 = plt.subplots(figsize=(10, 6))

# Tempo
ax1.plot(df["tamanho"], df["tempo"], marker='o', label="Tempo (s)", color="tab:blue")
ax1.set_xlabel("Tamanho do Dataset")
ax1.set_ylabel("Tempo de execução (s)", color="tab:blue")
ax1.tick_params(axis='y', labelcolor="tab:blue")

# Índice de Dunn
ax2 = ax1.twinx()
ax2.plot(df["tamanho"], df["indice_dunn"], marker='x', label="Índice de Dunn", color="tab:red")
ax2.set_ylabel("Índice de Dunn", color="tab:red")
ax2.tick_params(axis='y', labelcolor="tab:red")

plt.title("Benchmark OpenMP - Tempo vs Índice de Dunn")
fig.tight_layout()
plt.grid(True)
plt.savefig("benchmark_dunn_openmp.png")
plt.show()