# Logistic Regression for Iris Classification 

In [19]:
import "github.com/go-gota/gota/dataframe"

In [20]:
func getDataframe(file string) (dataframe.DataFrame, error) {
    var df dataframe.DataFrame 
    f, err := os.Open(file)
    if err != nil {
        return df, err
    }
    defer f.Close()

    return dataframe.ReadCSV(f), nil
}

In [22]:
%%
df, err := getDataframe("../data/iris.csv")
if err != nil {
    fmt.Println(err)
    return
}

fmt.Println(df)
fmt.Println(df.Describe())

[150x5] DataFrame

    Sepal Length Sepal Width Petal Length Petal Width Species
 0: 5.100000     3.500000    1.400000     0.200000    Iris-setosa
 1: 4.900000     3.000000    1.400000     0.200000    Iris-setosa
 2: 4.700000     3.200000    1.300000     0.200000    Iris-setosa
 3: 4.600000     3.100000    1.500000     0.200000    Iris-setosa
 4: 5.000000     3.600000    1.400000     0.200000    Iris-setosa
 5: 5.400000     3.900000    1.700000     0.400000    Iris-setosa
 6: 4.600000     3.400000    1.400000     0.300000    Iris-setosa
 7: 5.000000     3.400000    1.500000     0.200000    Iris-setosa
 8: 4.400000     2.900000    1.400000     0.200000    Iris-setosa
 9: 4.900000     3.100000    1.500000     0.100000    Iris-setosa
    ...          ...         ...          ...         ...
    <float>      <float>     <float>      <float>     <string>

[8x6] DataFrame

    column   Sepal Length Sepal Width Petal Length Petal Width Species
 0: mean     5.843333     3.054000    3.758667   

Kita akan ubah klasifikasi Iris-setosa dan non Iris-setosa. Ubah Species Iris-setosa dengan true, dan yang bukan menjadi false.

In [95]:
func norm(file string) {
    minMax := []map[string]float64{
        map[string]float64{ "min" : 4.3, "max" : 7.9 },
        map[string]float64{ "min" : 2.0, "max" : 4.4 },
        map[string]float64{ "min" : 1.0, "max" : 6.9 },
        map[string]float64{ "min" : 0.1, "max" : 2.5 },
    } 
    
    // Open the loan dataset file.
    f, err := os.Open("../data/iris.csv")
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()
    
    // Create a new CSV reader reading from the opened file.
    reader := csv.NewReader(f)
    reader.FieldsPerRecord = 5
    
    // Read in all of the CSV records
    rawCSVData, err := reader.ReadAll()
    if err != nil {
        log.Fatal(err)
    }
    
    // Create the output file.
    f, err = os.Create("01-clean-iris-"+file+".csv")
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()
    
    // Create a CSV writer.
    w := csv.NewWriter(f)
    
    // Sequentially move the rows writing out the parsed values.
    for idx, record := range rawCSVData {
    
        // Skip the header row.
        if idx == 0 {
            // Write the header to the output file.
            if err := w.Write(record); err != nil {
                log.Fatal(err)
            }
            continue
        }

        if file == "setosa" || (file == "versicolor" && (record[4] == "Iris-versicolor" || record[4] == "Iris-virginica")) {
            // Initialize a slice to hold our parsed values.
            outRecord := make([]string, 5)
            for i, v := range record {
                if i == 4 {
                    if v == "Iris-"+file {
                        outRecord[i] = "1.0"
                    } else {
                        outRecord[i] = "0.0"
                    }
                    continue
                }
                value, err := strconv.ParseFloat(v, 64)
                if err != nil {
                    log.Fatal(err)
                }
                outRecord[i] = strconv.FormatFloat((value-minMax[i]["min"])/(minMax[i]["max"]-minMax[i]["min"]), 'f', 4, 64)
            }
            
            // Write the record to the output file.
            if err := w.Write(outRecord); err != nil {
                log.Fatal(err)
            }
        }
    }
    
    // Write any buffered data to the underlying writer (standard output).
    w.Flush()
    
    if err := w.Error(); err != nil {
        log.Fatal(err)
    }
}

In [96]:
%%
norm("setosa")
norm("versicolor")

## Membagi Data menjadi Training dan Test Set

In [97]:
func splitData(file string) {
    df, err := getDataframe("01-clean-iris-"+file+".csv")
    if err != nil {
      fmt.Println(err)
      return
    }
    
    //df = df.Select([]int{0,2})
    
    trainingNum := (4 * df.Nrow()) / 5
    
    // Shuffling data
    allIdx := make([]int, df.Nrow())
    for i := range allIdx {
      allIdx[i] = i
    }
    rand.Shuffle(len(allIdx), func(i, j int) { allIdx[i], allIdx[j] = allIdx[j], allIdx[i] })
    
    // Gunakan indeks acak untuk split
    trainingIdx := allIdx[:trainingNum]
    testIdx := allIdx[trainingNum:]
    
    trainingDF := df.Subset(trainingIdx)
    testDF := df.Subset(testIdx)
    
    setMap := map[int]dataframe.DataFrame{
        0: trainingDF,
        1: testDF,
    }
    
    for idx, setName := range []string{"01-train-iris-"+file+".csv", "01-test-iris-"+file+".csv"} {
        f, err := os.Create(setName)
        if err != nil {
            log.Fatal(err)
        }
        defer f.Close()
        opt := []dataframe.WriteOption{dataframe.WriteHeader(false)}
        w := bufio.NewWriter(f)
        if err := setMap[idx].WriteCSV(w, opt...); err != nil {
            log.Fatal(err)
        }
    }
}

In [98]:
%%
splitData("setosa")
splitData("versicolor")

## Melatih dan Menguji Model Regresi Logistik

In [11]:
import (
    "fmt"
    "github.com/cdipaolo/goml/base"
    "github.com/cdipaolo/goml/linear"
) 

var mSetosa *linear.Logistic
var mVersicolor *linear.Logistic

type ConfusionMatrix struct {
	positive      int
	negative      int
	truePositive  int
	trueNegative  int
	falsePositive int
	falseNegative int
	recall        float64
	precision     float64
	accuracy      float64
}

func (cm ConfusionMatrix) String() string {
	return fmt.Sprintf("\tPositives: %d\n\tNegatives: %d\n\tTrue Positives: %d\n\tTrue Negatives: %d\n\tFalse Positives: %d\n\tFalse Negatives: %d\n\n\tRecall: %.2f\n\tPrecision: %.2f\n\tAccuracy: %.2f\n",
		cm.positive, cm.negative, cm.truePositive, cm.trueNegative, cm.falsePositive, cm.falseNegative, cm.recall, cm.precision, cm.accuracy)
}

func tryValues(learningRate float64, regularization float64, iterations int, decisionBoundary float64, xTrain, xTest [][]float64, yTrain, yTest []float64) (*ConfusionMatrix, *linear.Logistic, error) {
	cm := ConfusionMatrix{}
	for _, y := range yTest {
		if y == 1.0 {
			cm.positive++
		}
		if y == 0.0 {
			cm.negative++
		}
	}

	// Instantiate and Learn the Model
	model := linear.NewLogistic(base.BatchGA, learningRate, regularization, iterations, xTrain, yTrain)
	model.Output = ioutil.Discard
	err := model.Learn()
	if err != nil {
		return nil, nil, err
	}

	// Evaluate the Model on the Test data
	for i := range xTest {
		prediction, err := model.Predict(xTest[i])
		if err != nil {
			return nil, nil, err
		}
		y := int(yTest[i])
		positive := prediction[0] >= decisionBoundary

		if y == 1 && positive {
			cm.truePositive++
		}
		if y == 1 && !positive {
			cm.falseNegative++
		}
		if y == 0 && positive {
			cm.falsePositive++
		}
		if y == 0 && !positive {
			cm.trueNegative++
		}
	}

	// Calculate Evaluation Metrics
	cm.recall = float64(cm.truePositive) / float64(cm.positive)
	cm.precision = float64(cm.truePositive) / (float64(cm.truePositive) + float64(cm.falsePositive))
	cm.accuracy = float64(float64(cm.truePositive)+float64(cm.trueNegative)) / float64(float64(cm.positive)+float64(cm.negative))
	return &cm, model, nil
}

func train(file string, db float64) *linear.Logistic {
    xTrain, yTrain, err := base.LoadDataFromCSV("./01-train-iris-"+file+".csv")
    if err != nil {
        fmt.Println(err)
        return nil
    }
    
    xTest, yTest, err := base.LoadDataFromCSV("./01-test-iris-"+file+".csv")
    if err != nil {
        fmt.Println(err)
        return nil
    }
    
    //db := float64(0.45)
    cm, model, err := tryValues(0.0001, 0.0, 1000, db, xTrain, xTest, yTrain, yTest)
    if err != nil {
        fmt.Println(err)
        return nil 
    }
    
    fmt.Printf("with Model: %s\n\n", model)
    fmt.Printf("with Confusion Matrix:\n%s\n\n", cm)

    return model
}

func isSetosa(d[]float64, db float64) bool {
    prediction, err := mSetosa.Predict(d)
    if err != nil {
        fmt.Println(err)
        return false
    }  

    return prediction[0] >= db
}

func isVersicolor(d []float64, db float64) bool {
    prediction, err := mVersicolor.Predict(d)
    if err != nil {
        fmt.Println(err)
        return false
    }  

    return prediction[0] >= db
}

func predic(data []float64) string {
    data = norm(data)
    if x := isSetosa(data, 0.5); x {
        return "Iris-setosa"
    } else if x := isVersicolor(data, 0.465); x {
        return "Iris-versicolor"
    } else {
        return "Iris-virginica"
    } 
}

func norm(data []float64) []float64 {
    minMax := []map[string]float64{
        map[string]float64{ "min" : 4.3, "max" : 7.9 },
        map[string]float64{ "min" : 2.0, "max" : 4.4 },
        map[string]float64{ "min" : 1.0, "max" : 6.9 },
        map[string]float64{ "min" : 0.1, "max" : 2.5 },
    } 
    
    for i, value := range data {
        data[i] = (value-minMax[i]["min"])/(minMax[i]["max"]-minMax[i]["min"])
    }

    return data
}

func main() {
    mSetosa = train("setosa", 0.5) 
    mVersicolor = train("versicolor", 0.465)
    
    data := []float64{5.1,3.5,1.4,0.2}
    fmt.Println(predic(data))
    
    data2 := []float64{7.0,3.2,4.7,1.4}
    fmt.Println(predic(data2))
    
    data3 := []float64{6.3,3.3,6.0,2.5}
    fmt.Println(predic(data3))
    
}

with Model: h(θ,x) = 1 / (1 + exp(-θx))
θx = 0.069 + -0.62503(x[1]) + 0.56786(x[2]) + -1.13825(x[3]) + -1.15268(x[4])

with Confusion Matrix:
	Positives: 10
	Negatives: 20
	True Positives: 10
	True Negatives: 20
	False Positives: 0
	False Negatives: 0

	Recall: 1.00
	Precision: 1.00
	Accuracy: 1.00


with Model: h(θ,x) = 1 / (1 + exp(-θx))
θx = 0.319 + -0.14578(x[1]) + -0.02283(x[2]) + -0.20026(x[3]) + -0.36193(x[4])

with Confusion Matrix:
	Positives: 9
	Negatives: 11
	True Positives: 9
	True Negatives: 10
	False Positives: 1
	False Negatives: 0

	Recall: 1.00
	Precision: 0.90
	Accuracy: 0.95


Iris-setosa
[0.46914450852605266]
Iris-versicolor
[0.4240442293612896]
Iris-virginica
