In [1]:
import (
    "fmt"
    "log"
    "os"

    "gonum.org/v1/gonum/floats"
    "github.com/go-gota/gota/dataframe"
    "github.com/go-gota/gota/series"
)

In [2]:
type centroid []float64

In [3]:
irisFile, err := os.Open("iris.csv")
if err != nil {
    log.Fatal(err)
}
irisDF := dataframe.ReadCSV(irisFile)
irisFile.Close()

In [4]:
speciesNames := []string{
    "Iris-setosa",
    "Iris-versicolor",
    "Iris-virginica",
}

In [5]:
centroids := make(map[string]centroid)

clusters := make(map[string]dataframe.DataFrame)

for _, species := range speciesNames {

    filter := dataframe.F{
        Colname:    "species",
        Comparator: series.Eq,
        Comparando: species,
    }
    filtered := irisDF.Filter(filter)

    clusters[species] = filtered

    summaryDF := filtered.Describe()

    var c centroid
    for _, feature := range summaryDF.Names() {

        if feature == "column" || feature == "species" {
            continue
        }
        c = append(c, summaryDF.Col(feature).Float()[0])
    }

    centroids[species] = c
}

In [6]:
func dfFloatRow(df dataframe.DataFrame, names []string, idx int) []float64 {
    var row []float64
    for _, name := range names {
        row = append(row, df.Col(name).Float()[idx])
    }
    return row
}

In [7]:
labels := irisDF.Col("species").Records()
floatColumns := []string{
    "sepal_length",
    "sepal_width",
    "petal_length",
    "petal_width",
}

var silhouette float64

for idx, label := range labels {

    var a float64

    for i := 0; i < clusters[label].Nrow(); i++ {

        current := dfFloatRow(irisDF, floatColumns, idx)
        other := dfFloatRow(clusters[label], floatColumns, i)

        a += floats.Distance(current, other, 2) / float64(clusters[label].Nrow())
    }

    var otherCluster string
    var distanceToCluster float64
    for _, species := range speciesNames {

        if species == label {
            continue
        }

        distanceForThisCluster := floats.Distance(centroids[label], centroids[species], 2)

        if distanceToCluster == 0.0 || distanceForThisCluster < distanceToCluster {
            otherCluster = species
            distanceToCluster = distanceForThisCluster
        }
    }

    var b float64

    for i := 0; i < clusters[otherCluster].Nrow(); i++ {

        current := dfFloatRow(irisDF, floatColumns, idx)
        other := dfFloatRow(clusters[otherCluster], floatColumns, i)

        b += floats.Distance(current, other, 2) / float64(clusters[otherCluster].Nrow())
    }

    if a > b {
        silhouette += ((b - a) / a) / float64(len(labels))
    }
    silhouette += ((b - a) / b) / float64(len(labels))
}

_, _ = fmt.Printf("\nAverage Silhouette Coefficient: %0.2f\n\n", silhouette)


Average Silhouette Coefficient: 0.51

