In [6]:
import csv
import random
import math
import operator
 
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        dataset = [ line.split(';') for line in csvfile.read()[1::].split()]
        for x in range(len(dataset)-1):
            for y in range(len(dataset[0])):
                try:
                    dataset[x][y] = float(dataset[x][y])
                except ValueError: pass
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])
 
 
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        if(isinstance(instance1[x], float) and isinstance(instance2[x], float)):
            distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)
 
def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0
    
def main():
    # prepare data
    trainingSet=[]
    testSet=[]
    split = 0.67
    loadDataset('carbon_nanotubes.csv', split, trainingSet, testSet)
    print('Train set: ' + repr(len(trainingSet)))
    print('Test set: ' + repr(len(testSet)))
    # generate predictions
    predictions=[]
    k = 3
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)
        result = getResponse(neighbors)
        predictions.append(result)
        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')

main()

Train set: 7105
Test set: 3615
> predicted=0.114944, actual=0.232369
> predicted=0.302196, actual=0.088712
> predicted=0.157373, actual=0.039796
> predicted=0.374735, actual=0.25597
> predicted=0.51733, actual=0.398581
> predicted=0.588946, actual=0.469452
> predicted=0.827503, actual=0.943402
> predicted=0.75629, actual=0.875088
> predicted=0.75629, actual=0.969157
> predicted=0.017014, actual=0.058651
> predicted=0.174486, actual=0.020882
> predicted=0.174486, actual=0.085676
> predicted=0.211887, actual=0.123074
> predicted=0.277407, actual=0.366505
> predicted=0.096337, actual=0.00759
> predicted=0.047346, actual=0.136027
> predicted=0.289735, actual=0.201163
> predicted=0.636027, actual=0.547346
> predicted=0.711887, actual=0.623074
> predicted=0.816373, actual=0.66211
> predicted=0.50759, actual=0.750882
> predicted=0.816373, actual=0.905442
> predicted=0.853688, actual=0.942351
> predicted=0.585676, actual=0.674486
> predicted=0.827159, actual=0.738691
> predicted=0.777407, actu

> predicted=0.513848, actual=0.654627
> predicted=0.769914, actual=0.718976
> predicted=0.859757, actual=0.808798
> predicted=0.321607, actual=0.372603
> predicted=0.411238, actual=0.462247
> predicted=0.603247, actual=0.55231
> predicted=0.69309, actual=0.642132
> predicted=0.808634, actual=0.757623
> predicted=0.872843, actual=0.783076
> predicted=0.693457, actual=0.834104
> predicted=0.872843, actual=0.923789
> predicted=0.590956, actual=0.641967
> predicted=0.757122, actual=0.616409
> predicted=0.757122, actual=0.667437
> predicted=0.757122, actual=0.706176
> predicted=0.89812, actual=0.847181
> predicted=0.987961, actual=0.936895
> predicted=0.872843, actual=0.96265
> predicted=0.705936, actual=0.65494
> predicted=0.770228, actual=0.680515
> predicted=0.590956, actual=0.731453
> predicted=0.770228, actual=0.821294
> predicted=0.885643, actual=0.936581
> predicted=0.911097, actual=0.860124
> predicted=0.975301, actual=0.92429
> predicted=0.086656, actual=0.005256
> predicted=0.0866

> predicted=0.727137, actual=0.878003
> predicted=0.76943, actual=0.920221
> predicted=0.839166, actual=0.970287
> predicted=0.816493, actual=0.866802
> predicted=0.94808, actual=0.897904
> predicted=0.773133, actual=0.874072
> predicted=0.773133, actual=0.92439
> predicted=0.886324, actual=0.936408
> predicted=0.94808, actual=0.978969
> predicted=0.943947, actual=0.994157
> predicted=0.187404, actual=0.033502
> predicted=0.226114, actual=0.136288
> predicted=0.379912, actual=0.290125
> predicted=0.251714, actual=0.072174
> predicted=0.021102, actual=0.110777
> predicted=0.30285, actual=0.212953
> predicted=0.367102, actual=0.457029
> predicted=0.49531, actual=0.405558
> predicted=0.649041, actual=0.559151
> predicted=0.598021, actual=0.687967
> predicted=0.212953, actual=0.033502
> predicted=0.251714, actual=0.097541
> predicted=0.341513, actual=0.187404
> predicted=0.290125, actual=0.136289
> predicted=0.290125, actual=0.226114
> predicted=0.418281, actual=0.328406
> predicted=0.5723

> predicted=0.905683, actual=0.966067
> predicted=0.969918, actual=0.999174
> predicted=0.106792, actual=0.019028
> predicted=0.089319, actual=0.001355
> predicted=0.106792, actual=0.150556
> predicted=0.054164, actual=0.010388
> predicted=0.1418, actual=0.098051
> predicted=0.0453, actual=0.001574
> predicted=0.080689, actual=0.124399
> predicted=0.194553, actual=0.150853
> predicted=0.238528, actual=0.282152
> predicted=0.220879, actual=0.30862
> predicted=0.440368, actual=0.352314
> predicted=0.097954, actual=0.054318
> predicted=0.13314, actual=0.176817
> predicted=0.212111, actual=0.255779
> predicted=0.343572, actual=0.299823
> predicted=0.440368, actual=0.370105
> predicted=0.484091, actual=0.413867
> predicted=0.089104, actual=0.045472
> predicted=0.168035, actual=0.12431
> predicted=0.361423, actual=0.405133
> predicted=0.335094, actual=0.378776
> predicted=0.343872, actual=0.387565
> predicted=0.45768, actual=0.414061
> predicted=0.42279, actual=0.466456
> predicted=0.545436,

> predicted=0.358867, actual=0.394726
> predicted=0.441174, actual=0.47704
> predicted=0.523528, actual=0.559403
> predicted=0.578365, actual=0.542479
> predicted=0.707024, actual=0.742808
> predicted=0.888434, actual=0.852608
> predicted=0.439016, actual=0.403205
> predicted=0.584613, actual=0.485458
> predicted=0.422125, actual=0.521304
> predicted=0.567889, actual=0.504473
> predicted=0.441174, actual=0.540342
> predicted=0.567889, actual=0.603686
> predicted=0.586742, actual=0.622564
> predicted=0.685936, actual=0.65005
> predicted=0.768152, actual=0.732265
> predicted=0.932677, actual=0.896829
> predicted=0.979286, actual=0.915925
> predicted=0.888434, actual=0.95175
> predicted=0.565628, actual=0.466435
> predicted=0.439016, actual=0.502272
> predicted=0.631027, actual=0.66687
> predicted=0.694388, actual=0.730145
> predicted=0.858867, actual=0.894726
> predicted=0.877792, actual=0.913685
> predicted=0.960122, actual=0.995987
> predicted=0.609859, actual=0.574007
> predicted=0.71

> predicted=0.26247, actual=0.317657
> predicted=0.401829, actual=0.37283
> predicted=0.340705, actual=0.39584
> predicted=0.661349, actual=0.606253
> predicted=0.684321, actual=0.629171
> predicted=0.658251, actual=0.713392
> predicted=0.14787, actual=0.093056
> predicted=0.24844, actual=0.1934
> predicted=0.355492, actual=0.300381
> predicted=0.488915, actual=0.433773
> predicted=0.483111, actual=0.427953
> predicted=0.401829, actual=0.456978
> predicted=0.771392, actual=0.716409
> predicted=0.690383, actual=0.745395
> predicted=0.823494, actual=0.768462
> predicted=0.817657, actual=0.87283
> predicted=0.122067, actual=0.176896
> predicted=0.277476, actual=0.332538
> predicted=0.442829, actual=0.387661
> predicted=0.436727, actual=0.381603
> predicted=0.488915, actual=0.544139
> predicted=0.483111, actual=0.538138
> predicted=0.567203, actual=0.622067
> predicted=0.670611, actual=0.615748
> predicted=0.638469, actual=0.6934
> predicted=0.667442, actual=0.722414
> predicted=0.803479, 

> predicted=0.990645, actual=0.964429
> predicted=0.342304, actual=0.36859
> predicted=0.470943, actual=0.419686
> predicted=0.494607, actual=0.443394
> predicted=0.617983, actual=0.566769
> predicted=0.644261, actual=0.593026
> predicted=0.691454, actual=0.742584
> predicted=0.740147, actual=0.791243
> predicted=0.766317, actual=0.817474
> predicted=0.919686, actual=0.86859
> predicted=0.917187, actual=0.866027
> predicted=0.940807, actual=0.991954
> predicted=0.393459, actual=0.444664
> predicted=0.522228, actual=0.495913
> predicted=0.468348, actual=0.519527
> predicted=0.621907, actual=0.570669
> predicted=0.569406, actual=0.620631
> predicted=0.774341, actual=0.67182
> predicted=0.644261, actual=0.695476
> predicted=0.720411, actual=0.746729
> predicted=0.692811, actual=0.743893
> predicted=0.846374, actual=0.795133
> predicted=0.770155, actual=0.821413
> predicted=0.767559, actual=0.818796
> predicted=0.793835, actual=0.845041
> predicted=0.947365, actual=0.89621
> predicted=0.91

> predicted=0.947893, actual=0.876735
> predicted=0.947893, actual=0.995685
> predicted=0.876489, actual=0.805172
> predicted=0.829473, actual=0.900725
> predicted=0.094126, actual=0.028794
> predicted=0.070043, actual=0.013398
> predicted=0.05279, actual=0.085477
> predicted=0.054582, actual=0.021947
> predicted=0.111205, actual=0.078621
> predicted=0.094126, actual=0.126718
> predicted=0.150799, actual=0.183413
> predicted=0.064978, actual=0.0083
> predicted=0.080296, actual=0.023638
> predicted=0.080296, actual=0.047737
> predicted=0.217837, actual=0.063242
> predicted=0.217837, actual=0.095776
> predicted=0.209154, actual=0.119838
> predicted=0.209154, actual=0.152469
> predicted=0.070043, actual=0.135272
> predicted=0.111205, actual=0.167832
> predicted=0.24871, actual=0.191971
> predicted=0.281278, actual=0.224592
> predicted=0.240123, actual=0.207495
> predicted=0.264191, actual=0.296813
> predicted=0.064978, actual=0.032371
> predicted=0.121681, actual=0.089034
> predicted=0.10

> predicted=0.548435, actual=0.497985
> predicted=0.641341, actual=0.590915
> predicted=0.013416, actual=0.040501
> predicted=0.141291, actual=0.090977
> predicted=0.133577, actual=0.083267
> predicted=0.211133, actual=0.160631
> predicted=0.280848, actual=0.230531
> predicted=0.311913, actual=0.362274
> predicted=0.532865, actual=0.482445
> predicted=0.552305, actual=0.501839
> predicted=0.579362, actual=0.528976
> predicted=0.649082, actual=0.59863
> predicted=0.57156, actual=0.621912
> predicted=0.769216, actual=0.718796
> predicted=0.711077, actual=0.761523
> predicted=0.114262, actual=0.164534
> predicted=0.183991, actual=0.23443
> predicted=0.393295, actual=0.443756
> predicted=0.420389, actual=0.47082
> predicted=0.412725, actual=0.463133
> predicted=0.509545, actual=0.559971
> predicted=0.703479, actual=0.652979
> predicted=0.722784, actual=0.672415
> predicted=0.749843, actual=0.699533
> predicted=0.792543, actual=0.819701
> predicted=0.839023, actual=0.889393
> predicted=0.31

> predicted=0.213029, actual=0.247879
> predicted=0.148713, actual=0.218527
> predicted=0.148713, actual=0.25327
> predicted=0.278654, actual=0.313346
> predicted=0.368249, actual=0.333477
> predicted=0.269318, actual=0.339019
> predicted=0.478293, actual=0.373833
> predicted=0.115985, actual=0.026131
> predicted=0.075671, actual=0.06096
> predicted=0.14139, actual=0.176052
> predicted=0.242395, actual=0.207509
> predicted=0.268032, actual=0.302771
> predicted=0.427147, actual=0.322719
> predicted=0.427147, actual=0.357396
> predicted=0.362752, actual=0.328027
> predicted=0.417774, actual=0.382955
> predicted=0.42329, actual=0.388497
> predicted=0.478293, actual=0.443599
> predicted=0.234676, actual=0.130655
> predicted=0.066583, actual=0.136063
> predicted=0.240063, actual=0.170721
> predicted=0.225401, actual=0.190718
> predicted=0.196187, actual=0.230993
> predicted=0.257119, actual=0.291978
> predicted=0.242395, actual=0.312303
> predicted=0.416408, actual=0.347066
> predicted=0.47

> predicted=0.845688, actual=0.78083
> predicted=0.746945, actual=0.811716
> predicted=0.897319, actual=0.843995
> predicted=0.874672, actual=0.907017
> predicted=0.476649, actual=0.50898
> predicted=0.603036, actual=0.635444
> predicted=0.688819, actual=0.656517
> predicted=0.666033, actual=0.698359
> predicted=0.729032, actual=0.761302
> predicted=0.814838, actual=0.782483
> predicted=0.887688, actual=0.855404
> predicted=0.961988, actual=0.908629
> predicted=0.99427, actual=0.941015
> predicted=0.918321, actual=0.950649
> predicted=0.918321, actual=0.971748
> predicted=0.615806, actual=0.583537
> predicted=0.636843, actual=0.669269
> predicted=0.581783, actual=0.646693
> predicted=0.743758, actual=0.679057
> predicted=0.709734, actual=0.742096
> predicted=0.80498, actual=0.772658
> predicted=0.826271, actual=0.858682
> predicted=0.835904, actual=0.8683
> predicted=0.921668, actual=0.889354
> predicted=0.887688, actual=0.952291
> predicted=0.89903, actual=0.984649
> predicted=0.56242

> predicted=0.688874, actual=0.716367
> predicted=0.858794, actual=0.831175
> predicted=0.880156, actual=0.907786
> predicted=0.971668, actual=0.944072
> predicted=0.557963, actual=0.530508
> predicted=0.621985, actual=0.545619
> predicted=0.496831, actual=0.573107
> predicted=0.609724, actual=0.63721
> predicted=0.686058, actual=0.658614
> predicted=0.673853, actual=0.701337
> predicted=0.750129, actual=0.72264
> predicted=0.737788, actual=0.786677
> predicted=0.765292, actual=0.814179
> predicted=0.865274, actual=0.89282
> predicted=0.941711, actual=0.91415
> predicted=0.880156, actual=0.929094
> predicted=0.880156, actual=0.956657
> predicted=0.922743, actual=0.978061
> predicted=0.67084, actual=0.64338
> predicted=0.783824, actual=0.75638
> predicted=0.798971, actual=0.771533
> predicted=0.869046, actual=0.820339
> predicted=0.896505, actual=0.847799
> predicted=0.835404, actual=0.862888
> predicted=0.884222, actual=0.911668
> predicted=0.926603, actual=0.89909
> predicted=0.858794

> predicted=0.717566, actual=0.654505
> predicted=0.735258, actual=0.766729
> predicted=0.816365, actual=0.784879
> predicted=0.789707, actual=0.821172
> predicted=0.92501, actual=0.893507
> predicted=0.649698, actual=0.618212
> predicted=0.699373, actual=0.667903
> predicted=0.704128, actual=0.672643
> predicted=0.659198, actual=0.722275
> predicted=0.816892, actual=0.753779
> predicted=0.807745, actual=0.776222
> predicted=0.811803, actual=0.780304
> predicted=0.861322, actual=0.829826
> predicted=0.789707, actual=0.884233
> predicted=0.789707, actual=0.915695
> predicted=0.920446, actual=0.888942
> predicted=0.942888, actual=0.974412
> predicted=0.929122, actual=0.992145
> predicted=0.694655, actual=0.663159
> predicted=0.704128, actual=0.76722
> predicted=0.89326, actual=0.798691
> predicted=0.771921, actual=0.803416
> predicted=0.82142, actual=0.852919
> predicted=0.965358, actual=0.933887
> predicted=0.78931, actual=0.757796
> predicted=0.843557, actual=0.812092
> predicted=0.816

> predicted=0.796168, actual=0.847355
> predicted=0.821604, actual=0.911305
> predicted=0.872846, actual=0.962459
> predicted=0.936975, actual=0.988131
> predicted=0.475272, actual=0.424067
> predicted=0.513799, actual=0.564963
> predicted=0.590872, actual=0.539666
> predicted=0.680676, actual=0.629522
> predicted=0.655226, actual=0.706499
> predicted=0.744978, actual=0.796132
> predicted=0.77061, actual=0.82176
> predicted=0.949935, actual=0.860313
> predicted=0.949935, actual=0.911558
> predicted=0.885812, actual=0.937008
> predicted=0.885812, actual=0.975577
> predicted=0.744597, actual=0.693305
> predicted=0.770339, actual=0.719176
> predicted=0.860132, actual=0.808924
> predicted=0.834816, actual=0.88598
> predicted=0.975765, actual=0.924585
> predicted=0.718916, actual=0.667708
> predicted=0.808606, actual=0.7574
> predicted=0.924205, actual=0.873
> predicted=0.988559, actual=0.898822
> predicted=0.860132, actual=0.950013
> predicted=0.847132, actual=0.898296
> predicted=0.924205

> predicted=0.060602, actual=0.087799
> predicted=0.145501, actual=0.11835
> predicted=0.189807, actual=0.162631
> predicted=0.047004, actual=0.074188
> predicted=0.080966, actual=0.053833
> predicted=0.043585, actual=0.098008
> predicted=0.179685, actual=0.125188
> predicted=0.023223, actual=0.10481
> predicted=0.213703, actual=0.131994
> predicted=0.094487, actual=0.148932
> predicted=0.25801, actual=0.176141
> predicted=0.183031, actual=0.155804
> predicted=0.227288, actual=0.200123
> predicted=0.234056, actual=0.20684
> predicted=0.251175, actual=0.278393
> predicted=0.203531, actual=0.257957
> predicted=0.339725, actual=0.285116
> predicted=0.247774, actual=0.302201
> predicted=0.383891, actual=0.329392
> predicted=0.142314, actual=0.016419
> predicted=0.033428, actual=0.060557
> predicted=0.067417, actual=0.040247
> predicted=0.084454, actual=0.111582
> predicted=0.135487, actual=0.162672
> predicted=0.213703, actual=0.186486
> predicted=0.220445, actual=0.193239
> predicted=0.24

> predicted=0.435514, actual=0.404449
> predicted=0.032468, actual=0.133191
> predicted=0.28439, actual=0.18366
> predicted=0.412151, actual=0.33077
> predicted=0.412151, actual=0.381166
> predicted=0.412151, actual=0.462573
> predicted=0.06729, actual=0.016976
> predicted=0.121581, actual=0.172009
> predicted=0.102147, actual=0.203037
> predicted=0.28439, actual=0.253392
> predicted=0.214777, actual=0.265207
> predicted=0.346541, actual=0.296172
> predicted=0.307755, actual=0.358229
> predicted=0.288305, actual=0.389122
> predicted=0.520814, actual=0.439533
> predicted=0.400625, actual=0.451093
> predicted=0.532517, actual=0.482116
> predicted=0.574935, actual=0.493493
> predicted=0.644716, actual=0.54393
> predicted=0.574935, actual=0.625408
> predicted=0.636961, actual=0.586519
> predicted=0.125561, actual=0.044072
> predicted=0.28439, actual=0.334681
> predicted=0.365618, actual=0.41603
> predicted=0.63317, actual=0.55182
> predicted=0.703095, actual=0.602241
> predicted=0.644716, 

> predicted=0.0857, actual=0.021758
> predicted=0.944702, actual=0.85518
> predicted=0.893183, actual=0.982816
> predicted=0.162485, actual=0.098121
> predicted=0.047309, actual=0.136891
> predicted=0.290712, actual=0.201127
> predicted=0.175288, actual=0.23913
> predicted=0.392853, actual=0.328661
> predicted=0.521472, actual=0.431915
> predicted=0.86716, actual=0.777666
> predicted=0.752316, actual=0.816871
> predicted=0.970526, actual=0.906375
> predicted=0.123845, actual=0.213393
> predicted=0.982296, actual=0.892606
> predicted=0.367277, actual=0.277552
> predicted=0.431854, actual=0.521825
> predicted=0.47006, actual=0.559829
> predicted=0.713426, actual=0.623685
> predicted=0.392913, actual=0.547522
> predicted=0.701601, actual=0.637348
> predicted=0.675666, actual=0.585842
> predicted=0.701601, actual=0.791357
> predicted=0.82923, actual=0.85518
> predicted=0.82923, actual=0.944702
> predicted=0.893183, actual=0.982816
> predicted=0.816871, actual=0.970526
> predicted=0.105629,