# Regression on the GPUbenchmark dataset in Weka

### Import weka and libraries

In [1]:
%maven nz.ac.waikato.cms.weka:weka-stable:3.8.3

import java.text.DecimalFormat;
import weka.core.*;
import weka.core.converters.ConverterUtils;
import weka.classifiers.bayes.*;
import weka.classifiers.trees.*;
import weka.classifiers.lazy.IBk;
import weka.classifiers.*;

### Read data

In [2]:
static Instances readData() {
    try {
        // Read data file
        ConverterUtils.DataSource source = new ConverterUtils.DataSource("../data/GPUbenchmark.arff");
        Instances data = source.getDataSet();
        // Set class index to last
        data.setClassIndex(data.numAttributes() - 1);
        // Return dataset
        return data;
    }
    catch (Exception ex) {
        ex.printStackTrace();
        return null;
    }
}

### Test regression

In [4]:
double avg_diff = 0.0;
double sq_diff = 0.0;
double avg_diff_perc = 0.0;
DecimalFormat df = new DecimalFormat("0.00"); 

try {
    for (int i = 0; i < 19; i++) {
        //Read data
        Instances data = readData();
                
        //Optional: remove unnecessary attributes
        //data.deleteAttributeAt(1);

        //Remove instance
        Instance inst = data.remove(i);

        Classifier cl = new IBk(3);
        cl.buildClassifier(data);

        //Actual and predicted benchmark values
        double estimated = cl.classifyInstance(inst);
        double actual = inst.classValue();

        //Diffs
        double diff = Math.abs(estimated - actual);
        sq_diff += Math.pow(estimated - actual, 2);
        avg_diff += diff;
        double diff_perc = diff / actual * 100.0;
        avg_diff_perc += diff_perc;

        //Output
        System.out.println("Predicted: " + df.format(estimated) + " (actual " + actual + ") -> Diff " + df.format(diff) + " (" + df.format(diff_perc) + "%)");
    }
    avg_diff /= 19;
    sq_diff = Math.sqrt(sq_diff / 19);
    avg_diff_perc /= 19;
    System.out.println("Average diff: " + df.format(avg_diff) + " Squared diff: " + df.format(sq_diff) + " (" + df.format(avg_diff_perc) + "%)");
} 
catch (Exception ex) {
    ex.printStackTrace();
}

Predicted: 112,97 (actual 158.0) -> Diff 45,03 (28,50%)
Predicted: 123,63 (actual 126.0) -> Diff 2,37 (1,88%)
Predicted: 52,27 (actual 76.0) -> Diff 23,73 (31,23%)
Predicted: 69,17 (actual 98.7) -> Diff 29,53 (29,92%)
Predicted: 97,37 (actual 114.0) -> Diff 16,63 (14,59%)
Predicted: 102,40 (actual 98.9) -> Diff 3,50 (3,54%)
Predicted: 69,23 (actual 70.6) -> Diff 1,37 (1,94%)
Predicted: 57,43 (actual 67.2) -> Diff 9,77 (14,53%)
Predicted: 56,33 (actual 63.8) -> Diff 7,47 (11,70%)
Predicted: 51,67 (actual 60.9) -> Diff 9,23 (15,16%)
Predicted: 48,70 (actual 48.1) -> Diff 0,60 (1,25%)
Predicted: 37,80 (actual 44.9) -> Diff 7,10 (15,81%)
Predicted: 39,57 (actual 40.3) -> Diff 0,73 (1,82%)
Predicted: 39,60 (actual 39.5) -> Diff 0,10 (0,25%)
Predicted: 36,03 (actual 38.8) -> Diff 2,77 (7,13%)
Predicted: 40,30 (actual 37.4) -> Diff 2,90 (7,75%)
Predicted: 40,60 (actual 36.5) -> Diff 4,10 (11,23%)
Predicted: 48,77 (actual 34.6) -> Diff 14,17 (40,94%)
Predicted: 38,07 (actual 33.2) -> Diff 4,87