In [3]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os, argparse, math, random
from datetime import datetime
import errno
import sklearn.svm as svm 

start_time = datetime.now()

outputPath = "../../output/svm/2b/"

if not os.path.exists(os.path.dirname(outputPath)):
    try:
        os.makedirs(os.path.dirname(outputPath))
    except OSError as exc:  # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

inputPath = "../../dataset/prep/2b/bovw/"

train = ["train_bayou.bovw", "train_chalet.bovw", "train_creek.bovw"]
test = ["test_bayou.bovw", "test_chalet.bovw", "test_creek.bovw"]

In [4]:
testData = []
trainData = []


def fileHandle(fileName):
    print("Reading: ", fileName)
    wholeData = []
    file = open(fileName)
    for line in file:
        teLine = line.rstrip('\n ').split(' ')
        nLine = [float(i) for i in teLine]
        nLine = np.array(nLine)
#         nLine = np.insert(nLine, 0, 1, axis=0)
        wholeData.append(nLine)
    file.close()
    wholeData = np.array(wholeData)
#     print(wholeData)
    return wholeData

count = 0

for filename in train:
    val = fileHandle(inputPath + filename)
    print(val.shape)
    trainData.append(val)
    
for filename in test:
    val = fileHandle(inputPath + filename)
    testData.append(val)

testData = np.array(testData)
trainData = np.array(trainData)

print(testData.shape)
print(trainData.shape)

Reading:  ../../dataset/prep/2b/bovw/train_bayou.bovw
(50, 32)
Reading:  ../../dataset/prep/2b/bovw/train_chalet.bovw
(50, 32)
Reading:  ../../dataset/prep/2b/bovw/train_creek.bovw
(50, 32)
Reading:  ../../dataset/prep/2b/bovw/test_bayou.bovw
Reading:  ../../dataset/prep/2b/bovw/test_chalet.bovw
Reading:  ../../dataset/prep/2b/bovw/test_creek.bovw
(3, 50, 32)
(3, 50, 32)


In [14]:
def plot(mainList, obj, name):
#     mainList = trainDataOrig
    numFeature = 2
    nClass = 3
    
    
    minMax = np.zeros((numFeature,2))
    colors = ['#136906', '#fcbdfc', '#e5ff00', '#ff0000', '#3700ff', '#000000']

    # Resolution affects the time required to process.
    res = 200

    count = 0
    for i in range(nClass):
        for j in range(numFeature):
            if count == 0:
                minMax[j, 0] = np.ceil(np.amin(mainList[i][:, j]))
                minMax[j, 1] = np.ceil(np.amax(mainList[i][:, j]))
            else:
                minMax[j, 0] = min(minMax[j,0], np.ceil(np.amin(mainList[i][:, j])))
                minMax[j, 1] = max(minMax[j,1], np.ceil(np.amax(mainList[i][:, j])))
            count = 1

    dataRange = np.zeros((numFeature))
    for i in range(numFeature):
        dataRange[i] = 0.1*(minMax[i, 1] - minMax[i, 0])

    x = np.linspace(minMax[0, 0] - dataRange[0], minMax[0, 1] + dataRange[0], res)
    y = np.linspace(minMax[1,0] - dataRange[1], minMax[1,1] + dataRange[1], res)

    tellClassNum = []#np.zeros((np.size(x,0)*np.size(y,0), nClass))

    count = 0
    points = []
    for j in y:
        for i in x:
            dataPt = np.array([i, j])
            points.append(dataPt)
            
    decisions = obj.predict(points)

    for idx in range(1):
        fig1 = plt.figure(1)
        ax = fig1.gca()

        plotClass = []
        for cl in range(nClass):
            temp1 = []
            for fe in range(numFeature):
                temp1.append([])
            plotClass.append(temp1)

        class_colours = []
        classes = []
        count = 0
        for j in y:
            for i in x:
                classNum = decisions[count]
                plotClass[int(classNum)][0].append(i)
                plotClass[int(classNum)][1].append(j)
                count += 1

        plotname = name + ".png"
        
        ax.plot(plotClass[0][0], plotClass[0][1], c = colors[0],marker=".",  linestyle="None", label="Class 1 Prediction")
        ax.plot(plotClass[1][0], plotClass[1][1], c = colors[1],marker=".",  linestyle="None", label="Class 2 Prediction")
        ax.plot(plotClass[2][0], plotClass[2][1], c=colors[2],marker=".",  linestyle="None", label="Class 3 Prediction")
        ax.plot(mainList[0][:,0],mainList[0][:,1], c = colors[3], marker=".",  linestyle="None", label="Class 1 Data", ms='2')
        ax.plot(mainList[1][:,0],mainList[1][:,1], c = colors[4], marker=".",  linestyle="None", label="Class 2 Data", ms='2')
        ax.plot(mainList[2][:,0],mainList[2][:,1], c = colors[5], marker=".",  linestyle="None", label="Class 3 Data", ms='2')
        class_colours = [colors[3], colors[4], colors[5], colors[0], colors[1], colors[2]]
        classes = ["Class 1 Data", "Class 2 Data", "Class 3 Data", "Class 1 Prediction", "Class 2 Prediction", "Class 3 Prediction"]
        plotname = "123" + plotname
            
        ax.patch.set_visible(False)
        plt.title("Class 1 - Class 2 - Class 3")
        plotname = outputPath + plotname
        plt.xlabel('X')
        plt.ylabel('Y')
        recs = []
        for i in range(0,len(class_colours)):
            recs.append(mpatches.Rectangle((0,0),1,1,fc=class_colours[i]))
        plt.legend(recs, classes, loc='upper right')
        plt.savefig(plotname)
        print(plotname)
#         plt.show()
    
def ana(confusionMatrix):
    precision = []
    for i in range(3):
        s = 0
        for j in range(3):
            s += confusionMatrix[i][j]
        p = confusionMatrix[i][i]/s
        precision.append(p)

    recall = []
    for i in range(3):
        s = 0
        for j in range(3):
            s += confusionMatrix[j][i]
        r = confusionMatrix[i][i]/s
        recall.append(r)

    fMeasure = []

    for i in range(3):
        fMeasure.append((2*precision[i]*recall[i])/(precision[i]+recall[i]))

    s = 0
    correct = 0
    for i in range(3):
        correct += confusionMatrix[i][i]
        for j in range(3):
            s += confusionMatrix[i][j]
    accuracy = correct/s

    print("precision: \t",precision)
    print("recall: \t", recall)
    print("F Measure: \t", fMeasure)
    print("Accuracy: ", accuracy)
    
    print("Average precision: ", (precision[0]+precision[1]+precision[2])/3)
    print("Average recall: ", (recall[0]+recall[1]+recall[2])/3)
    print("Average fMeasure: ", (fMeasure[0]+fMeasure[1]+fMeasure[2])/3)

In [15]:
mainList = trainData
nClass = 3

X=mainList[0]
for i in range(1,nClass):
    X=np.concatenate((X,mainList[i]),axis=0)
# print(X.shape)
Y=np.zeros(len(mainList[0]),int)
for i in range(1,nClass):
    b=(np.zeros(len(mainList[i]),int)+i)
    Y=np.concatenate((Y,b))

In [16]:
clf = svm.SVC(kernel='linear')
clf.fit(X,Y)
# a=np.array([[1,1.2],[2,4]],float)
confusionMatrix = np.zeros((nClass,nClass),int)

for i in range(nClass):
    pr=clf.predict(testData[i])
    for j in pr:
        confusionMatrix[i][j]+=1

print(confusionMatrix)

# plot(trainData, clf, "_1a_svm_linear")

ana(confusionMatrix)

[[21 13 16]
 [19 21 10]
 [20  8 22]]
precision: 	 [0.42, 0.42, 0.44]
recall: 	 [0.35, 0.5, 0.4583333333333333]
F Measure: 	 [0.3818181818181818, 0.4565217391304348, 0.4489795918367347]
Accuracy:  0.4266666666666667
Average precision:  0.4266666666666667
Average recall:  0.4361111111111111
Average fMeasure:  0.4291065042617838


In [17]:
clf2 = svm.SVC(kernel='poly',gamma='auto',coef0=2,degree=2)
clf2.fit(X,Y)
# a=np.array([[1,1.2],[2,4]],float)
confusionMatrix = np.zeros((nClass,nClass),int)

for i in range(nClass):
    pr=clf2.predict(testData[i])
    for j in pr:
        confusionMatrix[i][j]+=1

print(confusionMatrix)

# plot(trainData, clf2, "_1a_svm_poly")

ana(confusionMatrix)

[[30 10 10]
 [20 21  9]
 [16 18 16]]
precision: 	 [0.6, 0.42, 0.32]
recall: 	 [0.45454545454545453, 0.42857142857142855, 0.45714285714285713]
F Measure: 	 [0.5172413793103449, 0.42424242424242425, 0.37647058823529417]
Accuracy:  0.44666666666666666
Average precision:  0.4466666666666667
Average recall:  0.44675324675324674
Average fMeasure:  0.43931813059602104


In [19]:
clf2 = svm.SVC(kernel='poly',gamma='auto',coef0=3,degree=3)
clf2.fit(X,Y)
# a=np.array([[1,1.2],[2,4]],float)
confusionMatrix = np.zeros((nClass,nClass),int)

for i in range(nClass):
    pr=clf2.predict(testData[i])
    for j in pr:
        confusionMatrix[i][j]+=1

print(confusionMatrix)

# plot(trainData, clf2, "_1a_svm_poly")

ana(confusionMatrix)

[[31 11  8]
 [19 24  7]
 [14 18 18]]
precision: 	 [0.62, 0.48, 0.36]
recall: 	 [0.484375, 0.4528301886792453, 0.5454545454545454]
F Measure: 	 [0.5438596491228069, 0.46601941747572817, 0.43373493975903615]
Accuracy:  0.4866666666666667
Average precision:  0.48666666666666664
Average recall:  0.4942199113779302
Average fMeasure:  0.4812046687858571


In [18]:
clf2 = svm.SVC(kernel='rbf',gamma='auto')
clf2.fit(X,Y)
# a=np.array([[1,1.2],[2,4]],float)
confusionMatrix = np.zeros((nClass,nClass),int)

for i in range(nClass):
    pr=clf2.predict(testData[i])
    for j in pr:
        confusionMatrix[i][j]+=1

print(confusionMatrix)

# plot(trainData, clf2, "_1a_svm_rbf")

ana(confusionMatrix)

[[46  4  0]
 [42  7  1]
 [46  4  0]]
precision: 	 [0.92, 0.14, 0.0]
recall: 	 [0.34328358208955223, 0.4666666666666667, 0.0]
F Measure: 	 [0.5, 0.2153846153846154, nan]
Accuracy:  0.35333333333333333
Average precision:  0.35333333333333333
Average recall:  0.26998341625207295
Average fMeasure:  nan


