In [None]:
import csv
import random
import math
import operator
 
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        dataset = [ line.split(';') for line in csvfile.read()[1::].split()]
        for x in range(len(dataset)-1):
            for y in range(4):
                try:
                    dataset[x][y] = float(dataset[x][y])
                except ValueError: pass
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])
 
 
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        if(isinstance(instance1[x], float) and isinstance(instance2[x], float)):
            distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)
 
def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0
    
def main():
    # prepare data
    trainingSet=[]
    testSet=[]
    split = 0.67
    loadDataset('carbon_nanotubes.csv', split, trainingSet, testSet)
    
    print('Train set: ' + repr(len(trainingSet)))
    print('Test set: ' + repr(len(testSet)))
    # generate predictions
    predictions=[]
    k = 3
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)
        result = getResponse(neighbors)
        predictions.append(result)
        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')
    

main()

Train set: 7118
Test set: 3602
> predicted='0.588946', actual='0.25597'
> predicted='0.039796', actual='0.374735'
> predicted='0.51733', actual='0.182779'
> predicted='0.969157', actual='0.302196'
> predicted='0.51733', actual='0.398581'
> predicted='0.017014', actual='0.68244'
> predicted='0.657521', actual='0.327502'
> predicted='0.51733', actual='0.732213'
> predicted='0.157373', actual='0.827503'
> predicted='0.232369', actual='0.898389'
> predicted='0.353687', actual='0.020882'
> predicted='0.547346', actual='0.211887'
> predicted='0.711887', actual='0.047346'
> predicted='0.469496', actual='0.136027'
> predicted='0.816373', actual='0.48165'
> predicted='0.366505', actual='0.701163'
> predicted='0.277407', actual='0.520882'
> predicted='0.469496', actual='0.558651'
> predicted='0.327159', actual='0.66211'
> predicted='0.085676', actual='0.750882'
> predicted='0.16211', actual='0.827159'
> predicted='0.442351', actual='0.777407'
> predicted='0.596337', actual='0.932119'
> predicted

> predicted='0.436895', actual='0.103562'
> predicted='0.795984', actual='0.129317'
> predicted='0.885643', actual='0.218976'
> predicted='0.628913', actual='0.29558'
> predicted='0.001174', actual='0.334508'
> predicted='0.052158', actual='0.385491'
> predicted='0.077763', actual='0.411097'
> predicted='0.808798', actual='0.475465'
> predicted='0.411238', actual='0.077905'
> predicted='0.552158', actual='0.218825'
> predicted='0.026424', actual='0.359757'
> predicted='0.257623', actual='0.39812'
> predicted='0.795984', actual='0.46265'
> predicted='0.180306', actual='0.513639'
> predicted='0.628913', actual='0.488273'
> predicted='0.270228', actual='0.603562'
> predicted='0.321294', actual='0.654627'
> predicted='0.193457', actual='0.52679'
> predicted='0.885643', actual='0.55231'
> predicted='0.269914', actual='0.603247'
> predicted='0.026424', actual='0.69309'
> predicted='0.052158', actual='0.718825'
> predicted='0.42429', actual='0.757623'
> predicted='0.167841', actual='0.501175'

> predicted='0.157279', actual='0.107081'
> predicted='0.001483', actual='0.386324'
> predicted='0.559878', actual='0.227137'
> predicted='0.029051', actual='0.362174'
> predicted='0.758254', actual='0.42439'
> predicted='0.121421', actual='0.455888'
> predicted='0.839166', actual='0.506167'
> predicted='0.133448', actual='0.467631'
> predicted='0.308269', actual='0.258254'
> predicted='0.006167', actual='0.339166'
> predicted='0.006167', actual='0.389187'
> predicted='0.633448', actual='0.350532'
> predicted='0.098957', actual='0.481922'
> predicted='0.827828', actual='0.443947'
> predicted='0.827828', actual='0.494157'
> predicted='0.909036', actual='0.575876'
> predicted='0.203119', actual='0.58761'
> predicted='0.285039', actual='0.618585'
> predicted='0.001483', actual='0.66865'
> predicted='0.08761', actual='0.420221'
> predicted='0.804698', actual='0.470287'
> predicted='0.16865', actual='0.501483'
> predicted='0.886324', actual='0.551746'
> predicted='0.260999', actual='0.59538

> predicted='0.158431', actual='0.491718'
> predicted='0.460548', actual='0.521'
> predicted='0.420339', actual='0.02651'
> predicted='0.724333', actual='0.05765'
> predicted='0.447857', actual='0.114435'
> predicted='0.173129', actual='0.233649'
> predicted='0.502881', actual='0.229976'
> predicted='0.711439', actual='0.378221'
> predicted='0.739041', actual='0.405683'
> predicted='0.10346', actual='0.436792'
> predicted='0.799541', actual='0.466067'
> predicted='0.641999', actual='0.581519'
> predicted='0.420339', actual='0.086963'
> predicted='0.480829', actual='0.147378'
> predicted='0.905683', actual='0.178564'
> predicted='0.568711', actual='0.235448'
> predicted='0.933059', actual='0.26659'
> predicted='0.290437', actual='0.350876'
> predicted='0.586963', actual='0.52651'
> predicted='0.554003', actual='0.614435'
> predicted='0.554003', actual='0.67491'
> predicted='0.096226', actual='0.762965'
> predicted='0.240769', actual='0.180298'
> predicted='0.905683', actual='0.239041'
>

> predicted='0.397435', actual='0.543809'
> predicted='0.375218', actual='0.708016'
> predicted='0.164464', actual='0.830053'
> predicted='0.618382', actual='0.432238'
> predicted='0.114591', actual='0.447186'
> predicted='0.007648', actual='0.48677'
> predicted='0.328137', actual='0.514104'
> predicted='0.196691', actual='0.528913'
> predicted='0.360249', actual='0.692976'
> predicted='0.067556', actual='0.732708'
> predicted='0.442127', actual='0.775401'
> predicted='0.216543', actual='0.88255'
> predicted='0.271161', actual='0.417364'
> predicted='0.151955', actual='0.484342'
> predicted='0.725148', actual='0.538907'
> predicted='0.940197', actual='0.605898'
> predicted='0.315726', actual='0.648082'
> predicted='0.501629', actual='0.6878'
> predicted='0.119826', actual='0.785162'
> predicted='0.561177', actual='0.895323'
> predicted='0.112323', actual='0.590993'
> predicted='0.114591', actual='0.633215'
> predicted='0.382644', actual='0.715357'
> predicted='0.449502', actual='0.7828

> predicted='0.179898', actual='0.513235'
> predicted='0.206798', actual='0.540151'
> predicted='0.228369', actual='0.561691'
> predicted='0.007928', actual='0.674453'
> predicted='0.056364', actual='0.722965'
> predicted='0.115449', actual='0.782029'
> predicted='0.196061', actual='0.862692'
> predicted='0.610174', actual='0.276744'
> predicted='0.551039', actual='0.30895'
> predicted='0.056231', actual='0.389595'
> predicted='0.163752', actual='0.497215'
> predicted='0.911189', actual='0.577785'
> predicted='0.265984', actual='0.599377'
> predicted='0.298212', actual='0.631589'
> predicted='0.0456', actual='0.712012'
> predicted='0.094035', actual='0.760592'
> predicted='0.486298', actual='0.819588'
> predicted='0.287492', actual='0.954143'
> predicted='0.706798', actual='0.373562'
> predicted='0.09391', actual='0.427294'
> predicted='0.15296', actual='0.486415'
> predicted='0.142274', actual='0.475717'
> predicted='0.841282', actual='0.507928'
> predicted='0.222965', actual='0.55636

> predicted='0.565156', actual='0.704226'
> predicted='0.419918', actual='0.7534'
> predicted='0.115748', actual='0.782396'
> predicted='0.303479', actual='0.776572'
> predicted='0.57627', actual='0.909214'
> predicted='0.294536', actual='0.961006'
> predicted='0.397008', actual='0.7303'
> predicted='0.643321', actual='0.837555'
> predicted='0.5302', actual='0.863613'
> predicted='0.08027', actual='0.886448'
> predicted='0.24844', actual='0.915372'
> predicted='0.303479', actual='0.970277'
> predicted='0.507149', actual='0.840705'
> predicted='0.675194', actual='0.86975'
> predicted='0.562216', actual='0.89584'
> predicted='0.257836', actual='0.924892'
> predicted='0.419918', actual='0.947633'
> predicted='0.617208', actual='0.950922'
> predicted='0.455056', actual='0.983111'
> predicted='0.065535', actual='0.014334'
> predicted='0.112807', actual='0.164003'
> predicted='0.349116', actual='0.015589'
> predicted='0.783456', actual='0.116754'
> predicted='0.349116', actual='0.066769'
> p

> predicted='0.472987', actual='0.10097'
> predicted='0.374498', actual='0.04169'
> predicted='0.746548', actual='0.080526'
> predicted='0.44317', actual='0.11008'
> predicted='0.815013', actual='0.148838'
> predicted='0.860789', actual='0.155654'
> predicted='0.511898', actual='0.1783'
> predicted='0.445547', actual='0.073668'
> predicted='0.730632', actual='0.103307'
> predicted='0.808137', actual='0.14207'
> predicted='0.459219', actual='0.164776'
> predicted='0.308137', actual='0.014196'
> predicted='0.787661', actual='0.121507'
> predicted='0.128354', actual='0.089609'
> predicted='0.562181', actual='0.189815'
> predicted='0.621507', actual='0.287661'
> predicted='0.41803', actual='0.046196'
> predicted='0.401937', actual='0.069053'
> predicted='0.774016', actual='0.107806'
> predicted='0.438664', actual='0.144226'
> predicted='0.888178', actual='0.18294'
> predicted='0.500522', actual='0.205663'
> predicted='0.616878', actual='0.244372'
> predicted='0.95689', actual='0.251245'
> 

> predicted='0.80714', actual='0.140477'
> predicted='0.683413', actual='0.228049'
> predicted='0.121681', actual='0.332877'
> predicted='0.039088', actual='0.372403'
> predicted='0.095776', actual='0.42899'
> predicted='0.183413', actual='0.516848'
> predicted='0.760674', actual='0.549432'
> predicted='0.939481', actual='0.60611'
> predicted='0.776195', actual='0.109543'
> predicted='0.491008', actual='0.15764'
> predicted='0.863778', actual='0.197287'
> predicted='0.920433', actual='0.253952'
> predicted='0.619838', actual='0.286548'
> predicted='0.992687', actual='0.326111'
> predicted='0.691971', actual='0.358654'
> predicted='0.18689', actual='0.398185'
> predicted='0.281278', actual='0.614754'
> predicted='0.889514', actual='0.222899'
> predicted='0.377757', actual='0.255527'
> predicted='0.90488', actual='0.238531'
> predicted='0.188627', actual='0.310553'
> predicted='0.049432', actual='0.382823'
> predicted='0.20405', actual='0.415422'
> predicted='0.276195', actual='0.487489'

> predicted='0.257582', actual='0.590915'
> predicted='0.34675', actual='0.013416'
> predicted='0.730509', actual='0.063843'
> predicted='0.42431', actual='0.090977'
> predicted='0.486312', actual='0.152979'
> predicted='0.556117', actual='0.222784'
> predicted='0.583176', actual='0.249843'
> predicted='0.893305', actual='0.432134'
> predicted='0.218971', actual='0.552305'
> predicted='0.05213', actual='0.718796'
> predicted='0.474625', actual='0.141291'
> predicted='0.8778', actual='0.211133'
> predicted='0.017379', actual='0.350713'
> predicted='0.040526', actual='0.373859'
> predicted='0.059961', actual='0.393295'
> predicted='0.796466', actual='0.463133'
> predicted='0.180083', actual='0.513416'
> predicted='0.873834', actual='0.540501'
> predicted='0.199531', actual='0.532865'
> predicted='0.893305', actual='0.559971'
> predicted='0.963012', actual='0.629678'
> predicted='0.319646', actual='0.652979'
> predicted='0.032866', actual='0.699533'
> predicted='0.389451', actual='0.72278

> predicted='0.635933', actual='0.968931'
> predicted='0.649223', actual='0.982602'
> predicted='0.055331', actual='0.020519'
> predicted='0.373833', actual='0.075671'
> predicted='0.812303', actual='0.110453'
> predicted='0.676052', actual='0.009598'
> predicted='0.498447', actual='0.16533'
> predicted='0.71998', actual='0.053282'
> predicted='0.487657', actual='0.154364'
> predicted='0.602696', actual='0.269318'
> predicted='0.714666', actual='0.047766'
> predicted='0.451048', actual='0.08249'
> predicted='0.198271', actual='0.163449'
> predicted='0.591991', actual='0.258655'
> predicted='0.28406', actual='0.31882'
> predicted='0.344569', actual='0.011426'
> predicted='0.399729', actual='0.066583'
> predicted='0.669035', actual='0.037285'
> predicted='0.81882', actual='0.152222'
> predicted='0.520519', actual='0.187087'
> predicted='0.879404', actual='0.213029'
> predicted='0.949166', actual='0.247879'
> predicted='0.954765', actual='0.25327'
> predicted='0.646712', actual='0.313346'

> predicted='0.510742', actual='0.843995'
> predicted='0.198359', actual='0.864966'
> predicted='0.047889', actual='0.897319'
> predicted='0.4234', actual='0.907017'
> predicted='0.078664', actual='0.562424'
> predicted='0.088272', actual='0.572143'
> predicted='0.926447', actual='0.593151'
> predicted='0.292133', actual='0.625537'
> predicted='0.139955', actual='0.656517'
> predicted='0.999354', actual='0.666033'
> predicted='0.364966', actual='0.698359'
> predicted='0.052691', actual='0.719295'
> predicted='0.544647', actual='0.877982'
> predicted='0.30498', actual='0.971748'
> predicted='0.819784', actual='0.636843'
> predicted='0.335904', actual='0.669269'
> predicted='0.646693', actual='0.679057'
> predicted='0.955703', actual='0.772658'
> predicted='0.159573', actual='0.826271'
> predicted='0.232442', actual='0.89903'
> predicted='0.11276', actual='0.961988'
> predicted='0.660988', actual='0.99427'
> predicted='0.326271', actual='0.659573'
> predicted='0.903893', actual='0.753376

> predicted='0.25638', actual='0.665407'
> predicted='0.895102', actual='0.741871'
> predicted='0.090987', actual='0.756771'
> predicted='0.869593', actual='0.280303'
> predicted='0.982548', actual='0.316455'
> predicted='0.741098', actual='0.407786'
> predicted='0.838874', actual='0.429094'
> predicted='0.789947', actual='0.456657'
> predicted='0.520631', actual='0.444072'
> predicted='0.174162', actual='0.507912'
> predicted='0.161769', actual='0.57184'
> predicted='0.366345', actual='0.699442'
> predicted='0.060804', actual='0.727037'
> predicted='0.201337', actual='0.790689'
> predicted='0.173853', actual='0.839663'
> predicted='0.188874', actual='0.854685'
> predicted='0.542847', actual='0.875973'
> predicted='0.237788', actual='0.903526'
> predicted='0.710617', actual='0.37792'
> predicted='0.065371', actual='0.39909'
> predicted='0.003971', actual='0.41415'
> predicted='0.129269', actual='0.463019'
> predicted='0.838874', actual='0.505591'
> predicted='0.19302', actual='0.52703'

> predicted='0.906387', actual='0.239721'
> predicted='0.595796', actual='0.262463'
> predicted='0.037423', actual='0.370756'
> predicted='0.812092', actual='0.145425'
> predicted='0.483559', actual='0.150226'
> predicted='0.53318', actual='0.199847'
> predicted='0.537423', actual='0.20409'
> predicted='0.591129', actual='0.257796'
> predicted='0.307384', actual='0.338888'
> predicted='0.010224', actual='0.343557'
> predicted='0.059927', actual='0.39326'
> predicted='0.064653', actual='0.397987'
> predicted='0.136293', actual='0.469627'
> predicted='0.834542', actual='0.501208'
> predicted='0.528436', actual='0.195102'
> predicted='0.915941', actual='0.249274'
> predicted='0.947428', actual='0.280761'
> predicted='0.663594', actual='0.330261'
> predicted='0.636293', actual='0.30296'
> predicted='0.019133', actual='0.352467'
> predicted='0.023603', actual='0.356936'
> predicted='0.078148', actual='0.411481'
> predicted='0.109655', actual='0.442988'
> predicted='0.13201', actual='0.46534

> predicted='0.667825', actual='0.001159'
> predicted='0.424157', actual='0.090823'
> predicted='0.488271', actual='0.154938'
> predicted='0.539512', actual='0.206179'
> predicted='0.398736', actual='0.065403'
> predicted='0.155093', actual='0.103943'
> predicted='0.603675', actual='0.270341'
> predicted='0.437005', actual='0.103672'
> predicted='0.462799', actual='0.129466'
> predicted='0.834816', actual='0.16815'
> predicted='0.52698', actual='0.193646'
> predicted='0.52698', actual='0.244891'
> predicted='0.616601', actual='0.283268'
> predicted='0.667825', actual='0.334492'
> predicted='0.06534', actual='0.398673'
> predicted='0.783154', actual='0.116488'
> predicted='0.475591', actual='0.142257'
> predicted='0.61668', actual='0.283347'
> predicted='0.129501', actual='0.462834'
> predicted='0.180688', actual='0.514021'
> predicted='0.334492', actual='0.001159'
> predicted='0.449876', actual='0.116542'
> predicted='0.539666', actual='0.206333'
> predicted='0.629522', actual='0.29618

> predicted='0.401991', actual='0.068623'
> predicted='0.772722', actual='0.105909'
> predicted='0.691019', actual='0.150174'
> predicted='0.803442', actual='0.136554'
> predicted='0.054811', actual='0.262568'
> predicted='0.456316', actual='0.249033'
> predicted='0.833916', actual='0.37472'
> predicted='0.487058', actual='0.361098'
> predicted='0.197874', actual='0.405282'
> predicted='0.109502', actual='0.442796'
> predicted='0.139981', actual='0.473206'
> predicted='0.333776', actual='0.667231'
> predicted='0.585503', actual='0.711528'
> predicted='0.282865', actual='0.742135'
> predicted='0.157036', actual='0.823718'
> predicted='0.269286', actual='0.810199'
> predicted='0.187594', actual='0.854421'
> predicted='0.269286', actual='0.935971'
> predicted='0.300036', actual='0.966525'
> predicted='0.252251', actual='0.044817'
> predicted='0.963126', actual='0.089015'
> predicted='0.993814', actual='0.119695'
> predicted='0.534663', actual='0.201232'
> predicted='0.772722', actual='0.3