-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithms.go
293 lines (222 loc) · 9.46 KB
/
algorithms.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
package algorithms
import (
"fmt"
"../datasets"
"../diabetesdata"
"../logging"
"../metrics"
"../support"
"errors"
"os"
"strconv"
)
const (
default_kfold_count = 10 // use n=10 for kfolds
)
var KfoldCount = default_kfold_count
var Metrics [11]metrics.SessionMetrics
var algorithmDescriptions = []string{"None", // 0
"Remove Incomplete Records", // 1
"Replace Missing Values With Mean", // 2
"Replace Missing Values With Modal", // 3
"Replace Missing Values Based On Nearest Neighbours", // 4
"Replace Missing Values With Graduations", // 5
"K-Fold Cross Evaluation", // 6
"Hot Deck"} // 7
func GetAlgorithmDescription(algoIndex int) string {
if algoIndex >= 0 && algoIndex < len(algorithmDescriptions) {
return algorithmDescriptions[algoIndex]
}
return ""
}
func DoProcessAlgorithm(dataset []diabetesdata.PimaDiabetesRecord, algorithm int) ([]diabetesdata.PimaDiabetesRecord, error) {
// index specified out of range
if algorithm < 0 || algorithm > len(algorithmDescriptions)-1 {
return dataset, errors.New("Invalid algorithm specified")
}
data := make([]diabetesdata.PimaDiabetesRecord, len(dataset))
var err error = nil
switch algorithm {
case 0:
copy(data[:], dataset)
case 1:
dataset, err = removeIncompleteRecords(dataset)
case 2:
dataset, err = replaceMissingValuesWithMean(dataset)
case 3:
dataset, err = replaceMissingValuesWithModal(dataset)
case 4:
// dataset, err = ReplaceNearestNeighbours(dataset) // the new N-neighbour algo
case 7: dataset, err = ReplaceUsingHotDeck (dataset)
default:
copy(data[:], dataset)
}
return dataset, err
}
func anonymiseDiabetesRecord(data diabetesdata.PimaDiabetesRecord) []float64 {
anonymous := make([]float64, support.SizeOfPimaDiabetesRecord()-1)
anonymous[0] = float64(data.NumberOfTimesPregnant)
anonymous[1] = float64(data.PlasmaGlucoseConcentration)
anonymous[2] = float64(data.DiastolicBloodPressure)
anonymous[3] = float64(data.TricepsSkinfoldThickness)
anonymous[4] = float64(data.SeriumInsulin)
anonymous[5] = float64(data.BodyMassIndex)
anonymous[6] = float64(data.DiabetesPedigreeFunction)
anonymous[7] = float64(data.Age)
return anonymous
}
func precision(index int) float64 {
return 100.0 * float64(Metrics[index].TruePositiveCount) / float64(Metrics[index].TruePositiveCount+Metrics[index].FalsePositiveCount)
}
func recall(index int) float64 {
return 100.0 * float64(Metrics[index].TruePositiveCount) / float64(Metrics[index].FalseNegativeCount+Metrics[index].TruePositiveCount)
}
func accuracy(index int) float64 {
return support.Percentage(float64(totalCorrect(index)), float64(totalCount(index)))
}
func totalCount(index int) int {
return Metrics[index].TruePositiveCount + Metrics[index].TrueNegativeCount + Metrics[index].FalsePositiveCount + Metrics[index].FalseNegativeCount
}
func totalCorrect(index int) int {
return Metrics[index].TruePositiveCount + Metrics[index].TrueNegativeCount
}
func specificity(index int) float64 {
return 100.0 * float64(Metrics[index].TrueNegativeCount) / float64(Metrics[index].TrueNegativeCount+Metrics[index].FalsePositiveCount)
}
func truePositiveRate(index int) float64 {
tp := Metrics[index].TruePositiveCount
fn := Metrics[index].FalseNegativeCount
return support.Percentage(float64(tp), float64(tp+fn))
}
func falsePositiveRate(index int) float64 {
fp := Metrics[index].FalsePositiveCount
tn := Metrics[index].TrueNegativeCount
return support.Percentage(float64(fp), float64(fp+tn))
}
func showSessionMetrics(sessionhandle *os.File) {
var str string
for index := 0; index < KfoldCount; index++ {
str = fmt.Sprintf("Test Fold %02d: \n", index+1)
logging.DoWriteString(str, true, true)
sessionhandle.WriteString(str)
tp := Metrics[index].TruePositiveCount
tn := Metrics[index].TrueNegativeCount
fp := Metrics[index].FalsePositiveCount
fn := Metrics[index].FalseNegativeCount
str = fmt.Sprintf("(TP=%d, TN=%d, FP=%d, FN=%d)\n", tp, tn, fp, fn)
logging.DoWriteString(str, true, true)
sessionhandle.WriteString(str)
// Accuracy
totalCount := totalCount(index)
totalCorrect := totalCorrect(index)
str = fmt.Sprintf("Accuracy = %d out of %d (%.02f%%)\n", totalCorrect, totalCount, accuracy(index))
logging.DoWriteString(str, true, true) // console and log
sessionhandle.WriteString(str) // session file
// Precision
str = fmt.Sprintf("Precision : %.02f%%\n", precision(index))
logging.DoWriteString(str, true, true) // console and log
sessionhandle.WriteString(str) // session file
// Recall
str = fmt.Sprintf("Recall : %.02f%%\n", recall(index))
logging.DoWriteString(str, true, true) // console and log
sessionhandle.WriteString(str) // session file
// Specificity
str = fmt.Sprintf("Specificity : %0.2f%%\n", specificity(index))
logging.DoWriteString(str, true, true) // console and log
sessionhandle.WriteString(str) // session file
// True Positive Rate (TPR)
str = fmt.Sprintf("True Positive Rate (TPR) = %0.2f%%\n", truePositiveRate(index))
logging.DoWriteString(str, true, true)
sessionhandle.WriteString(str)
// False Positive Rate (FPR)
str = fmt.Sprintf("False Positive Rate (FPR) = %0.2f%%\n", falsePositiveRate(index))
logging.DoWriteString(str, true, true)
sessionhandle.WriteString(str)
logging.DoWriteString("\n", true, true)
sessionhandle.WriteString("\n")
}
}
func DoShowAlgorithmTestSummary(sessionhandle *os.File, testdata []diabetesdata.PimaDiabetesRecord) {
var truePositiveCount int // Number of true positives (TP)
var trueNegativeCount int // Number of true negatives (TN)
var falsePositiveCount int // Number of false positives (FP)
var falseNegativeCount int // Number of false negatives (FN)
// build up a table for each fold
if ApplyKFold {
// final accuracy measures
showSessionMetrics(sessionhandle)
}
// Table column headings
str := support.LeftAlignStringInColumn("Test Record", 15)
str += support.LeftAlignStringInColumn("Best Match", 15)
str += support.LeftAlignStringInColumn("Similarity", 12)
str += support.LeftAlignStringInColumn("Predicted", 12)
str += support.LeftAlignStringInColumn("Calculated", 12)
str += "\n"
sessionhandle.WriteString(str)
str = support.LeftAlignStringInColumn("Number", 15)
str += support.LeftAlignStringInColumn("Record", 15)
str += support.LeftAlignStringInColumn("Measure", 12)
str += support.LeftAlignStringInColumn("Outcome", 12)
str += support.LeftAlignStringInColumn("Outcome", 12)
str += "\n"
sessionhandle.WriteString(str)
// Now get the results as per the test data
for testIndex := 0; testIndex < len(testdata); testIndex++ {
// outcome read from the actual record
changeStatus := "" // either blank, FP or FN for each test record
// Build SimilarityTable for all records in training set for this test record!!
BuildSimilarityTable(testdata[testIndex])
if len(SimilarityTable) == 0 {
// ok for some reason the comparison table has ended up empty
return
}
// most similar record from training set will now be element zero.
numberOfNearestNeighbours := support.GetNumberOfNeighbours()
//countTPThreshold := classifier.ThresholdClassifier.TPThreshold
closestRecordsIndices := make([]int, numberOfNearestNeighbours) // N closest matches
for neighbourIndex := 0; neighbourIndex < numberOfNearestNeighbours; neighbourIndex++ {
closestRecordsIndices[neighbourIndex] = SimilarityTable[neighbourIndex].Index
}
// get predicted value from closest match
var expectedOutcomeValue int = testdata[testIndex].TestedPositive
var closestOutcomeValue int = datasets.PimaTrainingData[closestRecordsIndices[0]].TestedPositive // defauklts to infected
//TP
if expectedOutcomeValue == 1 && closestOutcomeValue == 1 {
truePositiveCount++
}
//TN
if expectedOutcomeValue == 0 && closestOutcomeValue == 0 {
trueNegativeCount++
}
//FP
if expectedOutcomeValue == 1 && closestOutcomeValue == 0 {
changeStatus = "FP" // false positive
falsePositiveCount++
}
//FN
if expectedOutcomeValue == 0 && closestOutcomeValue == 1 {
changeStatus = "FN" // false negative
falseNegativeCount++
}
// dump closest three records for each test data record to session file.
N := support.GetNumberOfNeighbours()
for recIndex := 0; recIndex < N; recIndex++ {
var str string
// just a bit of layout formatting to session file
if recIndex == 0 {
str = support.CentreStringInColumn(fmt.Sprintf("%-15s", strconv.Itoa(testIndex)), 15)
} else {
str = support.CentreStringInColumn(fmt.Sprintf("%-15s", " "), 15)
}
str += support.CentreStringInColumn(fmt.Sprintf("%-15s", strconv.Itoa(closestRecordsIndices[recIndex])), 15)
str += support.CentreStringInColumn(fmt.Sprintf("%.8f", SimilarityTable[recIndex].CosineSimilarity), 12)
str += support.CentreStringInColumn(fmt.Sprintf("%s", strconv.Itoa(testdata[testIndex].TestedPositive)), 12)
str += support.CentreStringInColumn(fmt.Sprintf("%s", strconv.Itoa(datasets.PimaTrainingData[closestRecordsIndices[recIndex]].TestedPositive)), 12)
str += changeStatus // FN or FP here or just blank
str += "\n"
sessionhandle.WriteString(str) // this will be in session file really
}
}
}
// end of file