In [2]:
import (
		"github.com/pa-m/sklearn/metrics"
	    ms "github.com/pa-m/sklearn/model_selection"
		"github.com/e-XpertSolutions/go-iforest/iforest"	
		"bufio"
		"encoding/csv"
		"strconv"
		"github.com/janpfeifer/gonb/gonbui"
		"gonum.org/v1/plot/plotutil"
		"gonum.org/v1/plot"
		"gonum.org/v1/plot/plotter"
		"gonum.org/v1/plot/vg"
		"gonum.org/v1/plot/vg/draw"
		"os/exec"
		"github.com/janpfeifer/gonb/cache"
		"math"

	)

	

# UTILITIES

In [3]:


//load file Data and Return [][]float64 array
func loadData(myfile string) [][]float64 {

	l:=false
file, err := os.Open(myfile)
	if err != nil {
		fmt.Println("Error opening file:", err)
		return nil
	}
	defer file.Close()

	reader := csv.NewReader(file)
	records, err := reader.ReadAll()
	if err != nil {
		fmt.Println("Error reading CSV:", err)
		return nil
	}
	
	inputData := make([][]float64, len(records))
	for i, row := range records {

		inputData[i] = make([]float64, len(row))
		for j, value := range row {
			inputData[i][j], err = strconv.ParseFloat(value, 64)
			if err != nil {
				fmt.Printf("Error converting to float: line %d",i)
				l=true
				break	
			}
		}
		if l == true{
			break
		}
	}
	return inputData
}

func GetLabelsFromFile(filePath string)[]int{
	file, err := os.Open(filePath)
	if err != nil {
		fmt.Println("Error opening file:", err)
		return nil
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)
	
	var labels []int

	for scanner.Scan() {
		label,err := strconv.Atoi(scanner.Text())
		if err != nil {
			// Handle the error if the conversion fails
			fmt.Println("Error:", err)
			return nil
		}
		labels= append(labels,label)
	}
	return labels
}

//Convert labels to float for F1 and Accuracy Score metrics
func ConvertToFloat(Labels_Pred []int , labels_Tru []int)([]float64 ,[]float64){

	length :=len(Labels_Pred)
	labelsPred_float := make([]float64, length)
	labelsTru_float := make([]float64, length)

	for i:=0; i<length; i++ {
		
		labelsPred_float[i] = float64(Labels_Pred[i])
		labelsTru_float[i] = float64(labels_Tru[i])
		
	}
	return labelsPred_float,labelsTru_float

}

func F1Score(labelsPred []int, labelsTru []int){

	pred,tru := ConvertToFloat(labelsPred,labelsTru)

	Ypred, Ytrue := mat.NewDense(len(pred), 1, pred), mat.NewDense(len(tru), 1, tru)
	var sampleWeight []float64
	fmt.Printf("F1 macro %.2f\n", metrics.F1Score(Ytrue, Ypred, "macro", sampleWeight))
	fmt.Printf("F1 micro %.2f\n", metrics.F1Score(Ytrue, Ypred, "micro", sampleWeight))
	fmt.Printf("F1 weighted %.2f\n", metrics.F1Score(Ytrue, Ypred, "weighted", sampleWeight))	 

}

func AccuracyScore(labelsPred []int, labelsTru []int){

	pred,tru := ConvertToFloat(labelsPred,labelsTru)

	var nilDense *mat.Dense
	normalize, sampleWeight := true, nilDense
	Ypred, Ytrue := mat.NewDense(len(pred), 1, pred), mat.NewDense(len(tru), 1, tru)
	fmt.Println("Accuracy Score is: ",metrics.AccuracyScore(Ytrue, Ypred, normalize, sampleWeight))

}

// call command line executable
func prepareDataset(dataset string){
	cmd :=exec.Command("./extract_splitDataset.sh", dataset)
	cmd.Run()
}



# TRAIN TEST FUNCTION

In [4]:
//Isolation Forest algorithm

func isoForestTrain_Test(training_data [][]float64 ,testing_data [][]float64 , treesNumber int, subsampleSize int, outliers float64) *iforest.Forest{
//model initialization
forest := iforest.NewForest(treesNumber, subsampleSize, outliers)

//training stage - creating trees
forest.Train(training_data)

//testing stage - finding anomalies 
forest.Test(testing_data)

threshold := forest.AnomalyBound
//labels := forest.Labels

fmt.Println("threshold is",threshold)

return forest;
}


# PREDICT FUNCTION

In [8]:
func predict(forest * iforest.Forest){

	newData := loadData("testing.csv")
	labels:=GetLabelsFromFile("testing_labels.csv")
	labels1,_ ,_ := forest.Predict(newData)
	AccuracyScore(labels1,labels)
	F1Score(labels1,labels)
	
	}

In [1]:
%%
prepareDataset("2020.06.21.csv")

ERROR: failed to run "/usr/local/go/bin/go build -o /tmp/gonb_423f33f1/gonb_423f33f1": exit status 1

# RUN ALGORITHM TRAIN-TEST

In [12]:

%%
data:=loadData("training.csv")
labels:=GetLabelsFromFile("training_labels.csv")
var forest = cache.Cache("my_forest", func() * iforest.Forest {return isoForestTrain_Test(data,data,100,1000,0.52)})
AccuracyScore(forest.Labels,labels)
F1Score(forest.Labels,labels)

Accuracy Score is:  0.44794886181208465
F1 macro 0.44
F1 micro 0.45
F1 weighted 0.44


In [None]:
%%
// reset 
cache.ResetKey("my_forest")

# PREDICT ON ANOTHER DATASET

In [None]:
%%
predict(forest)

# K-Fold Cross Validation Functions

In [13]:
type Split struct{
	TrainIndex [] int
	TestIndex [] int
	}

func KFold(K int, datasetlen int)(ch chan Split){

	Kpoint :=  int(math.Ceil(float64(datasetlen)/float64(K)))
	Kpoints:=[]int{0}

	for j:=1; j< K; j++{
		if Kpoints[j-1] + Kpoint >= datasetlen {
			break
		} 
		Kpoints = append(Kpoints, Kpoints[j-1] + Kpoint)
		
	}
	Kpoints = append(Kpoints, datasetlen)

    Indexes:=make([]int, datasetlen)
	for i:=0; i<datasetlen; i++{
		Indexes[i]=i
		
	} 

	ch = make(chan Split)
	
	go func(){
		var sp *Split
		for testsplit:=0; testsplit<K; testsplit++{
			if testsplit == 0{
				sp = &Split{
					TestIndex: Indexes[:Kpoints[1]],
					TrainIndex:  Indexes[Kpoints[1]:],				
				}

				}else if testsplit == K-1{
					sp = &Split{
					TestIndex: Indexes[Kpoints[K-1]:],
					TrainIndex:  Indexes[:Kpoints[K-1]],
					}
				}else{
					before:= Indexes[:Kpoints[testsplit]]
					after:= Indexes[Kpoints[testsplit+1]:]
					concatenated:=make([]int, len(before)+len(after))
					copy(concatenated, before)
    				copy(concatenated[len(before):], after)

					sp = &Split{
					TestIndex: Indexes[Kpoints[testsplit]:Kpoints[testsplit+1]],
					TrainIndex:  concatenated,
				}
			}
			ch <-*sp
		}
		close(ch)
	}()
	return ch

}
func getElements(indexTrain []int ,indexTest []int ,dataArray [][]float64, labelsArray []int)([][]float64,[][]float64, []int) {
	dataTrain := make([][]float64, len(indexTrain))
	dataTest := make([][]float64, len(indexTest))
	labels := make([]int, len(indexTest))
	
	for i, index := range indexTrain {
		dataTrain[i] = dataArray[index]
	}
	for i, index := range indexTest {
		dataTest[i] = dataArray[index]
		labels[i] = labelsArray[index]

	}
	return dataTrain,dataTest,labels
}

 func CrossValidation(dataset string){
	
	i:=1
	data:=loadData(dataset)
	labels:=GetLabelsFromFile("training_labels.csv")
	for s := range KFold(5,len(data)){
		fmt.Println("%Iteration N.",i)
		i++
		traindata,testdata,labeldata := getElements(s.TrainIndex,s.TestIndex,data,labels)
		forest:=isoForestTrain_Test(traindata, testdata,100,1000,0.5)
		AccuracyScore(forest.Labels,labeldata)
		F1Score(forest.Labels,labeldata)	
	}

}
%%
/* data := loadData("training.csv")
fmt.Println(data)
//datasetlen:=len(data)
for s := range KFold(5,data){
	fmt.Println(s.TrainData)
	
} */


In [14]:
%%
CrossValidation("training.csv")


%Iteration N. 1
12.969940887097108
threshold is 0.12396105242230762
Accuracy Score is:  0.5502485061254936
F1 macro 0.55
F1 micro 0.55
F1 weighted 0.55
%Iteration N. 2
12.969940887097108
threshold is 0.15165764478885746
Accuracy Score is:  0.3633054152444117
F1 macro 0.36
F1 micro 0.36
F1 weighted 0.36
%Iteration N. 3
12.969940887097108
threshold is 0.14009326292239582
Accuracy Score is:  0.39726927037304294
F1 macro 0.39
F1 micro 0.40
F1 weighted 0.39
%Iteration N. 4
12.969940887097108
threshold is 0.0970649542969606
Accuracy Score is:  0.31464199712981505
F1 macro 0.28
F1 micro 0.31
F1 weighted 0.28
%Iteration N. 5
12.969940887097108
threshold is 0.1402239916391459
Accuracy Score is:  0.4055329430651726
F1 macro 0.41
F1 micro 0.41
F1 weighted 0.41


In [None]:
%%
x:=GetLabels("training_labels.csv")
fmt.Print(x)