# Demo For the GoWorkshop

## Linear Regression

### first we need to load the data
For this we will be using dataframe-go Library

In [64]:
import (
	"context"
	"fmt"
	"io/ioutil"
	"log"
	"strings"
	"github.com/rocketlaunchr/dataframe-go"
	"github.com/rocketlaunchr/dataframe-go/imports"
)

var ctx = context.Background()

func loadGopherData() *dataframe.DataFrame {
	content, err := ioutil.ReadFile("gopher_locations 1.csv")
	if err != nil {
		log.Fatal(err)
	}

	// Convert the content to a string
	csvString := string(content)
	df, err := imports.LoadFromCSV(ctx, strings.NewReader(csvString), imports.CSVLoadOptions{InferDataTypes: true, NilValue: &[]string{"NA"}[0]})
	if err != nil {
		panic(err)
	}

	return df
}

var df = loadGopherData()

In [65]:

func displayTheData(df *dataframe.DataFrame) {
 // Print the DataFrame
 fmt.Print(df.Table())
}

In [66]:
func main(){
    displayTheData(df)
	//Get the number of rows in the DataFrame
	numRows := df.NRows()

	// Get the first column by index
	X := df.Series[0]

	// Print the values of the first column
	for i := 0; i < int(numRows); i++ {
		value := X.Value(i)
		fmt.Println(value)
	}
}
    

+-------+--------------------+--------------------+---------------+---------------+----------------+----------+----------------+----------+
|       |    LOCATIONNAME    | AVERAGETEMPERATURE | NROFPREDATORS | NRFOODSOURCES | NRWATERSOURCES | NRHUMANS | VEGETATIONTYPE | LANDSIZE |
+-------+--------------------+--------------------+---------------+---------------+----------------+----------+----------------+----------+
|  0:   |    Flooded Loch    |        5.84        |      17       |       3       |       0        |    22    |    Wetland     |    64    |
|  1:   |   Arid Wasteland   |       35.24        |      15       |       3       |       1        |    49    |     Desert     |   667    |
|  2:   |    Dark Jungle     |        36.8        |      19       |      13       |       10       |    3     |     Forest     |   660    |
|  3:   |  Green Highlands   |       12.75        |      19       |       4       |       2        |    2     |     Hills      |   309    |
|  4:   |    Marshy 

In [55]:
import (
	"context"
	"fmt"
	"io/ioutil"
	"log"
	"strconv"
	"strings"
	"github.com/gonum/stat"
	"gonum.org/v1/gonum/mat"
	"github.com/rocketlaunchr/dataframe-go"
	"github.com/rocketlaunchr/dataframe-go/imports"
)


func main() {

	// Extract the columns
	averageTemperature := df.Series[1]
	predatorDensity := df.Series[2]
	foodAvailability := df.Series[3]
	waterSources := df.Series[4]
	humanActivity := df.Series[5]
	vegetation := df.Series[6]
	landArea := df.Series[7]

	// Prepare X and y
	// For this example, let's use AverageTemperature as the target (y)
	// and PredatorDensity, FoodAvailability, WaterSources, HumanActivity, Vegetation, and LandArea as features (X)
	var X [][]float64
	var y []float64

	numRows := df.NRows()

	for i := 0; i < int(numRows); i++ {
		row := []float64{}

		// Convert categorical variables to numerical (simplified encoding)
		predatorDensityValue := map[string]float64{"low": 1, "medium": 2, "high": 3}[fmt.Sprintf("%v", predatorDensity.Value(i))]
		foodAvailabilityValue := map[string]float64{"low": 1, "medium": 2, "high": 3}[fmt.Sprintf("%v", foodAvailability.Value(i))]
		waterSourcesValue := map[string]float64{"near": 1, "medium": 2, "far": 3}[fmt.Sprintf("%v", waterSources.Value(i))]
		humanActivityValue := map[string]float64{"low": 1, "medium": 2, "high": 3}[fmt.Sprintf("%v", humanActivity.Value(i))]
		vegetationValue := map[string]float64{"forest": 1, "cliff": 2, "grassland": 3}[fmt.Sprintf("%v", vegetation.Value(i))]

		landAreaValue, err := strconv.ParseFloat(fmt.Sprintf("%v", landArea.Value(i)), 64)
		if err != nil {
			log.Fatalf("Error converting LandArea to float64: %v", err)
		}

		row = append(row, predatorDensityValue, foodAvailabilityValue, waterSourcesValue, humanActivityValue, vegetationValue, landAreaValue)
		X = append(X, row)

		averageTemperatureValue, err := strconv.ParseFloat(fmt.Sprintf("%v", averageTemperature.Value(i)), 64)
		if err != nil {
			log.Fatalf("Error converting AverageTemperature to float64: %v", err)
		}
		y = append(y, averageTemperatureValue)
	}

	// Convert X and y to matrices for gonum
	nSamples, nFeatures := len(X), len(X[0])
	Xmat := mat.NewDense(nSamples, nFeatures, nil)
	for i := range X {
		for j := range X[i] {
			Xmat.Set(i, j, X[i][j])
		}
	}

	ymat := mat.NewVecDense(nSamples, y)

	// Add a column of ones to X for the intercept term
	XWithIntercept := mat.NewDense(nSamples, nFeatures+1, nil)
	for i := 0; i < nSamples; i++ {
		XWithIntercept.Set(i, 0, 1) // intercept term
		for j := 0; j < nFeatures; j++ {
			XWithIntercept.Set(i, j+1, Xmat.At(i, j))
		}
	}

	// Compute (X^T * X)
	var XT mat.Dense
	XT.Mul(XWithIntercept.T(), XWithIntercept)

	// Compute (X^T * y)
	var XTy mat.VecDense
	XTy.MulVec(XWithIntercept.T(), ymat)

	// Solve for beta (coefficients)
	var beta mat.VecDense
	err := beta.SolveVec(&XT, &XTy)
	if err != nil {
		log.Fatal(err)
	}

	// Print the coefficients
	fmt.Printf("Coefficients: %v\n", mat.Formatted(&beta, mat.Prefix("             "), mat.Squeeze()))
}

Coefficients: ⎡  12.537097419536728⎤
             ⎢ 0.40869494110716387⎥
             ⎢-0.21431649816529333⎥
             ⎢-0.09737403641504794⎥
             ⎢  0.6476661883408436⎥
             ⎢ -0.8911282337266179⎥
             ⎣0.006035263591470038⎦


In [63]:
import (
    "github.com/sjwhitworth/golearn/base"
    "github.com/sjwhitworth/golearn/linear_model"
    )

func main(){
    dataGrid := base.ConvertDataFrameToInstances(df, 0)
    // Create a linear regression model
    model := linear_models.NewLinearSVC()

    // // Fit the model to the data
    // err = model.Fit(dataGrid)
    // if err != nil {
    //     // Handle error
    //     return
    // }
    
    
}

ERROR: failed to run "/usr/local/go/bin/go get": exit status 1

In [76]:
import (
 "fmt"
 "math"
"gonum.org/v1/gonum/stat"
)
// resultXY --> sum((x-meanX)*(y-meanY))
// resultXX --> sum((x-meanX)^2)
func sumXYandXX(arrayX []float64, arrayY []float64, meanX float64, meanY float64) (float64, float64) {
 resultXX := 0.0
 resultXY := 0.0
for x := 0; x < len(arrayX); x++ {
  for y := 0; y < len(arrayY); y++ {
   if x == y {
    resultXY += (arrayX[x] - meanX) * (arrayY[y] - meanY)
   }
  }
  resultXX += (arrayX[x] - meanX) * (arrayX[x] - meanX)
 }
return resultXY, resultXX
}
// estimateBoB1 --> Function that calculates the regression coefficients b0 and b1
// y_predicted = b0 + b1*x_input
func estimateB0B1(x []float64, y []float64) (float64, float64) {
 var meanX float64
 var meanY float64
 var sumXY float64
 var sumXX float64
meanX = stat.Mean(x, nil) //mean of x
 meanY = stat.Mean(y, nil) //mean pf y
sumXY, sumXX = sumXYandXX(x, y, meanX, meanY)
// regression coefficients
 b1 := sumXY / sumXX    // b1 or slope
 b0 := meanY - b1*meanX // b0 or intercept
return b0, b1
}
func rmseCost(y_predicted []float64, y_test []float64) float64 {
 sz := len(y_test)
 var rmse float64 = 0.0
for i := 0; i < len(y_test); i++ {
  rmse = rmse + math.Abs(y_test[i]-y_predicted[i])*math.Abs(y_test[i]-y_predicted[i])
 }
 rmse = rmse / float64(sz)
 rmse = math.Sqrt(rmse)
return rmse
}
func testing(x_test []float64, b0 float64, b1 float64) []float64 {
 sz := len(x_test)
 res := make([]float64, sz)
 for i := 0; i < len(x_test); i++ {
  res[i] = b0 + b1*x_test[i]
  fmt.Println("for input value : ", x_test[i], ", prediction is :  ", res[i])
 }
 return res
}


func train_test_split(x_ []float64, y_ []float64, train_ratio float64) ([]float64, []float64, []float64, []float64) {
 if len(x_) != len(y_) {
  fmt.Printf("Size of X and Y are different :", len(x_), " and ", len(y_))
 }
 var length float64 = float64(len(x_))
 pos := int(length * train_ratio)
var test_x = x_[pos-1:]
 var test_y = y_[pos-1:]
 var train_x = x_[:pos-1]
 var train_y = y_[:pos-1]
return test_x, test_y, train_x, train_y
}

func rSquared(yActual []float64, yPredicted []float64) float64 {
    yMean := stat.Mean(yActual, nil)
    sumSqTotal := 0.0
    sumSqResidual := 0.0
    for i := range yActual {
        sumSqTotal += (yActual[i] - yMean) * (yActual[i] - yMean)
        sumSqResidual += (yActual[i] - yPredicted[i]) * (yActual[i] - yPredicted[i])
    }
    return 1.0 - (sumSqResidual / sumSqTotal)
}

func main() {
    // Extract the values from dataframe.Series objects into slices
    X_data := make([]float64, df.NRows())
    y_data := make([]float64, df.NRows())
    for i := 0; i < df.NRows(); i++ {
        X_val, ok := df.Series[2].Value(i).(float64)
        if !ok {
            // Handle the case when the value is not float64
            // Convert it to float64 or handle it appropriately
            X_val = float64(df.Series[2].Value(i).(int64))
        }
        X_data[i] = X_val

        y_val, ok := df.Series[5].Value(i).(float64)
        if !ok {
            // Handle the case when the value is not float64
            // Convert it to float64 or handle it appropriately
            y_val = float64(df.Series[5].Value(i).(int64))
        }
        y_data[i] = y_val
    }

    // Split the data
    test_x, test_y, train_x, train_y := train_test_split(X_data, y_data, 0.80)

    // Calculate coefficients
    b0, b1 := estimateB0B1(train_x, train_y)
    fmt.Printf("Los valores de b0=%v , b1=%v\n", b0, b1)

    // Test the model
    prediction := testing(test_x, b0, b1)

    // Calculate RMSE
    rmseValue := rmseCost(prediction, test_y)
    fmt.Println("RMSE:", rmseValue)

    rsquared := rSquared(test_y, prediction)
    fmt.Println("R-squared:", rsquared)
}


Los valores de b0=193.36695049679437 , b1=-9.87112808706571
for input value :  16 , prediction is :   35.428901103743016
for input value :  17 , prediction is :   25.557773016677316
for input value :  18 , prediction is :   15.686644929611589
for input value :  18 , prediction is :   15.686644929611589
for input value :  19 , prediction is :   5.81551684254589
for input value :  16 , prediction is :   35.428901103743016
for input value :  15 , prediction is :   45.300029190808715
for input value :  19 , prediction is :   5.81551684254589
for input value :  17 , prediction is :   25.557773016677316
for input value :  20 , prediction is :   -4.055611244519838
for input value :  15 , prediction is :   45.300029190808715
for input value :  16 , prediction is :   35.428901103743016
for input value :  16 , prediction is :   35.428901103743016
for input value :  17 , prediction is :   25.557773016677316
for input value :  17 , prediction is :   25.557773016677316
for input value :  18 , predi