# Machine Learning in Go

### Reading in CSV data

In [1]:
import (
    "encoding/csv"
    "fmt"
    "os"
    "io"
    "strconv"
    "log"
)

In [2]:
{
    f, err := os.Open("iris.csv")
    if err != nil {
        fmt.Println(err)
    }
    defer f.Close()

    // read
    r := csv.NewReader(f)
    r.FieldsPerRecord = -1

    // [I] hold successfully parsed rows
    /*
    var rawCSV [][]string

    for {
        record, err := r.Read()
        if err == io.EOF {
            break
        }
        rawCSV = append(rawCSV, record)
    }
    */
    
    // [II] read in all
    rawCSV, err := r.ReadAll()
    if err != nil {
        fmt.Println(err)
    }
    
    fmt.Println(rawCSV)
}

[[5.1 3.5 1.4 0.2 Iris-setosa] [4.9 3.0 1.4 0.2 Iris-setosa] [4.7 3.2 1.3 0.2 Iris-setosa] [4.6 3.1 1.5 0.2 Iris-setosa] [5.0 3.6 1.4 0.2 Iris-setosa] [5.4 3.9 1.7 0.4 Iris-setosa] [4.6 3.4 1.4 0.3 Iris-setosa] [5.0 3.4 1.5 0.2 Iris-setosa] [4.4 2.9 1.4 0.2 Iris-setosa] [4.9 3.1 1.5 0.1 Iris-setosa] [5.4 3.7 1.5 0.2 Iris-setosa] [4.8 3.4 1.6 0.2 Iris-setosa] [4.8 3.0 1.4 0.1 Iris-setosa] [4.3 3.0 1.1 0.1 Iris-setosa] [5.8 4.0 1.2 0.2 Iris-setosa] [5.7 4.4 1.5 0.4 Iris-setosa] [5.4 3.9 1.3 0.4 Iris-setosa] [5.1 3.5 1.4 0.3 Iris-setosa] [5.7 3.8 1.7 0.3 Iris-setosa] [5.1 3.8 1.5 0.3 Iris-setosa] [5.4 3.4 1.7 0.2 Iris-setosa] [5.1 3.7 1.5 0.4 Iris-setosa] [4.6 3.6 1.0 0.2 Iris-setosa] [5.1 3.3 1.7 0.5 Iris-setosa] [4.8 3.4 1.9 0.2 Iris-setosa] [5.0 3.0 1.6 0.2 Iris-setosa] [5.0 3.4 1.6 0.4 Iris-setosa] [5.2 3.5 1.5 0.2 Iris-setosa] [5.2 3.4 1.4 0.2 Iris-setosa] [4.7 3.2 1.6 0.2 Iris-setosa] [4.8 3.1 1.6 0.2 Iris-setosa] [5.4 3.4 1.5 0.4 Iris-setosa] [5.2 4.1 1.5 0.1 Iris-setosa] [5.5 4.2 

### Handling Unexpected Fields

In [3]:
{
    f, err := os.Open("iris_unexpected.csv")
    if err != nil {
        fmt.Println(err)
    }
    defer f.Close()

    // read
    r := csv.NewReader(f)
    // Each record should have 5 fields
    r.FieldsPerRecord = 5

    var rawCSV [][]string

    for {
        record, err := r.Read()
        if err == io.EOF {
            break
        }
        
        // if we had a parsing error:
        if err != nil {
            fmt.Println("####", err)
            continue
        }
        
        // if data has the expected # of fields
        rawCSV = append(rawCSV, record)
    }
    
    
    fmt.Println(rawCSV)
}

#### line 8, column 0: wrong number of fields in line
[[5.1 3.5 1.4 0.2 Iris-setosa] [4.9 3.0 1.4 0.2 Iris-setosa] [4.7 3.2 1.3 0.2 Iris-setosa] [4.6 3.1 1.5 0.2 Iris-setosa] [5.0 3.6 1.4 0.2 Iris-setosa] [5.4 3.9 1.7 0.4 Iris-setosa] [4.6 3.4 1.4 0.3 Iris-setosa] [4.4 2.9 1.4 0.2 Iris-setosa] [4.9 3.1 1.5 0.1 Iris-setosa] [5.4 3.7 1.5 0.2 Iris-setosa] [4.8 3.4 1.6 0.2 Iris-setosa] [blah 3.0 blah2 0.1 ] [4.3 3.0 1.1 0.1 Iris-setosa] [5.8 4.0 1.2 0.2 Iris-setosa] [5.7 4.4 1.5 0.4 Iris-setosa] [5.4 3.9 1.3 0.4 Iris-setosa] [5.1 3.5 1.4 0.3 Iris-setosa] [5.7 3.8 1.7 0.3 Iris-setosa] [5.1 3.8 1.5 0.3 Iris-setosa] [5.4 3.4 1.7 0.2 Iris-setosa] [5.1 3.7 1.5 0.4 Iris-setosa] [4.6 3.6 1.0 0.2 Iris-setosa] [5.1 3.3 1.7 0.5 Iris-setosa] [4.8 3.4 1.9 0.2 Iris-setosa] [5.0 3.0 1.6 0.2 Iris-setosa] [5.0 3.4 1.6 0.4 Iris-setosa] [5.2 3.5 1.5 0.2 Iris-setosa] [5.2 3.4 1.4 0.2 Iris-setosa] [4.7 3.2 1.6 0.2 Iris-setosa] [4.8 3.1 1.6 0.2 Iris-setosa] [5.4 3.4 1.5 0.4 Iris-setosa] [5.2 4.1 1.5 0.1 Iris-s

### Handling unexpected types

In [4]:
type CSVRec struct {
    SepalLength float64
    SepalWidth float64
    PetalLength float64
    PetalWidth float64
    Species string
    ParseError error
}

{
    f, err := os.Open("iris_unexpected.csv")
    if err != nil {
        fmt.Println(err)
    }
    defer f.Close()

    // read
    r := csv.NewReader(f)
    // Each record should have 5 fields
    r.FieldsPerRecord = 5
    
    var csvData []CSVRec

    for {
        record, err := r.Read()
        if err == io.EOF {
            break
        }
        
        // if we had a parsing error:
        if err != nil {
            fmt.Println("####", err)
            continue
        }
        
        // for the row
        var csvRec CSVRec
        
        for idx, val := range record {
            if idx == 4 { // for the string column
                if val == "" {
                    fmt.Printf("### Unexpected type in column %d", idx)
                    csvRec.ParseError = fmt.Errorf("Empty string value")
                    break
                }
                
                // otherwise
                csvRec.Species = val
                continue
            }
            
            var floatVal float64
            if floatVal, err = strconv.ParseFloat(val, 64); err != nil {
                fmt.Printf("### Unexpected type in column %d\n", idx)
                csvRec.ParseError = fmt.Errorf("Could not parse float")
                break
            }
            
            switch idx {
                case 0:
                    csvRec.SepalLength = floatVal
                case 1:
                    csvRec.SepalWidth = floatVal
                case 2:
                    csvRec.PetalLength = floatVal
                case 3:
                    csvRec.PetalWidth = floatVal
            }
        }
        
        if csvRec.ParseError == nil {
            csvData = append(csvData, csvRec)
        }
    }    
}

#### line 8, column 0: wrong number of fields in line
### Unexpected type in column 0


### CSV w/ data frames

import "github.com/kniren/gota/dataframe"

In [5]:
import "github.com/kniren/gota/dataframe"

In [8]:
{
    irisFile, err := os.Open("iris.csv")
    if err != nil {
        fmt.Println(err)
    }
    defer irisFile.Close()
    
    irisDF := dataframe.ReadCSV(irisFile)
    fmt.Println(irisDF)
}

[149x5] DataFrame

    5.1      3.5      1.4      0.2      Iris-setosa
 0: 4.900000 3.000000 1.400000 0.200000 Iris-setosa
 1: 4.700000 3.200000 1.300000 0.200000 Iris-setosa
 2: 4.600000 3.100000 1.500000 0.200000 Iris-setosa
 3: 5.000000 3.600000 1.400000 0.200000 Iris-setosa
 4: 5.400000 3.900000 1.700000 0.400000 Iris-setosa
 5: 4.600000 3.400000 1.400000 0.300000 Iris-setosa
 6: 5.000000 3.400000 1.500000 0.200000 Iris-setosa
 7: 4.400000 2.900000 1.400000 0.200000 Iris-setosa
 8: 4.900000 3.100000 1.500000 0.100000 Iris-setosa
 9: 5.400000 3.700000 1.500000 0.200000 Iris-setosa
    ...      ...      ...      ...      ...        
    <float>  <float>  <float>  <float>  <string>   

