In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn import svm

In [2]:
TrainLabelsDf = pd.read_csv('question-2-train-labels.csv', header=None)
TrainFeatureDf = pd.read_csv('question-2-train-features.csv',header = None)
TestLabelsDf = pd.read_csv('question-2-test-labels.csv', header = None)
TestFeatureDf = pd.read_csv('question-2-test-features.csv', header = None)

Xtrain = np.array(TrainFeatureDf)
Xtest = np.array(TestFeatureDf)

In [18]:
Mean = TrainLabelsDf.mean()
Ytrain = TrainLabelsDf > Mean
Ytrain = Ytrain.astype(int)
Ytrain = np.array(Ytrain)
Mean = TestLabelsDf.mean()
Ytest = TestLabelsDf > Mean
Ytest = Ytest.astype(int)
YCV = Ytest
Ytest = np.array(Ytest)

In [10]:
def sigmoid(z):
    a = 1/(1+np.e**(-z))
    a = a.T
    return a

def pi(X,w):
    z = w.T.dot(X.T)
    return np.array(sigmoid(z))

def likelihood(X,y,w):
    summa = 0
    for i in range(len(X)):
        z = np.dot(w.T,X[i])
        summa += (y[i]*(z)-math.log(1 + math.exp(z)))
    return summa

def cost_function(X,y,w):
    #Finding mean error
    num_of_observation = len(y)
    predictions = pi(X, w)
    cost0=[]
    cost1=[]
    for i in range(len(y)):
        if(y[i] == 1 and predictions[i] != 0 ):
            cost1.append(y[i]*math.log(predictions[i]))
            cost0.append(0)
        elif(y[i] == 1 and predictions[i] == 0 ):
            cost0.append(0)
            cost1.append(0)
        elif(y[i] == 0 and predictions[i] != 1):
            cost0.append((1-y[i])*math.log(1-predictions[i]))
            cost1.append(0)
        elif(y[i] == 0 and predictions[i] == 1):
            cost1.append(0)
            cost0.append(0)
            
        
    total_cost = np.array(cost1 + cost0)
    avr_cost = total_cost.sum() / num_of_observation
    return avr_cost

def update(X,y,w,lr):
    w = w.reshape(8,1)
    
    predictions = pi(X,w)
        
    Gradient = np.dot(X.T, (y - predictions ))
    
    Gradient = Gradient / len(X)
    
    Gradient *= lr
    
    w = w + Gradient
    
    return w
def mini_batches(X, y, batch_size): 
    mini_batches = [] 
    data = np.hstack((X, y)) 
    np.random.shuffle(data) 
    n_batches = data.shape[0] // batch_size 

    for i in range(n_batches + 1): 
        batch = data[i * batch_size:(i + 1)*batch_size, :] 
        X_mini = batch[:, :-1] 
        Y_mini = batch[:, -1].reshape((-1, 1)) 
        mini_batches.append((X_mini, Y_mini)) 
    
    if data.shape[0] % batch_size != 0: 
        batch = data[i * batch_size:data.shape[0]] 
        Xnew = batch[:, :-1] 
        Ynew = batch[:, -1].reshape((-1, 1)) 
        mini_batches.append((Xnew, Ynew)) 
    return mini_batches

def train(X, Y, weights, lr, iters):
    costhist = []
    for i in range(iters):
        weights = update(X, Y, weights, lr)
        cost = likelihood(X, Y, weights)
        costhist.append(cost)
        
        if i % 100 == 0:
            print ("iter: " + str(i) + " likelihood: "+str(cost))

    return weights, costhist

def batchtrain(X, Y, weights, lr, iters,batchsize):
    costhist = []
    costhist.append(likelihood(X, Y, weights))
    for i in range(iters):
        minibatches = mini_batches(X,Y,batchsize)
        for m in minibatches:
            xmini, ymini = m
            weights = update(xmini, ymini, weights, lr)
            cost = likelihood(xmini, ymini, weights)
            costhist.append(cost)
        
        if i % 100 == 0:
            print ("iter: " + str(i) + " likelihood: "+str(cost))

    return weights, costhist

BEST TO CHOOSE LR = 0.01 SINCE LIKELIHOOD FUNCTION BEHAVES WELL AND IT GIVES THE MAXIMUM LIKELIHOOD FUNCTION VALUE OF DIFFERENT LEARNING RATES(E.G. 1000TH ITERATION OF 0.0001 IS -9476 AND 0.01 IS -8400

In [56]:
weights= np.zeros(8)

trainedweights, costhist = train(Xtrain,Ytrain,weights,lr = 0.01, iters = 1000)

k = pi(Xtest,trainedweights)
k = k.round()
f = k == Ytest
count = 0
for i in range(len(f)):
    if f[i] == True: 
        count += 1 
accuracy = count / len(Ytest)
print('accuracy = ' + str(accuracy))

iter: 0 likelihood: [-9667.36978899]
iter: 100 likelihood: [-9038.68101705]
iter: 200 likelihood: [-8871.92331225]
iter: 300 likelihood: [-8752.15841195]
iter: 400 likelihood: [-8659.54057001]
iter: 500 likelihood: [-8586.02237404]
iter: 600 likelihood: [-8526.48053635]
iter: 700 likelihood: [-8477.31656788]
iter: 800 likelihood: [-8435.94673764]
iter: 900 likelihood: [-8400.49541878]
accuracy = 0.6589698046181173


FOR CONFUSION MATRIX BELOW, (0,0) IS TN, (1,0) IS FN, (1,1) TP, (0,1) FP


In [57]:
confus = confusion_matrix(Ytest, k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

[[1521  441]
 [ 711  705]]
precision = 0.6151832460732984
recall = 0.4978813559322034
NPV = 0.6814516129032258
FPR = 0.22477064220183487
FDR = 0.08228571428571428


In [58]:
weights= np.zeros(8)
trainedweights, costhist = batchtrain(Xtrain,Ytrain,weights,lr = 0.1, iters = 1000,batchsize=32)
k = pi(Xtest,trainedweights)
k = k.round()
f = k == Ytest
count = 0
for i in range(len(f)):
    if f[i] == True: 
        count += 1 
accuracy = count / len(Ytest)
print('accuracy = ' + str(accuracy))

iter: 0 likelihood: [-18.3582215]
iter: 100 likelihood: [-3.37904498]
iter: 200 likelihood: [-13.4211142]
iter: 300 likelihood: [-13.0051838]
iter: 400 likelihood: [-26.47654886]
iter: 500 likelihood: [-6.23720428]
iter: 600 likelihood: [-23.17760265]
iter: 700 likelihood: [-42.70704818]
iter: 800 likelihood: [-6.90346273]
iter: 900 likelihood: [-8.10285499]
accuracy = 0.7329780935464772


In [59]:
confus = confusion_matrix(Ytest, k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

[[1745  217]
 [ 685  731]]
precision = 0.7710970464135021
recall = 0.5162429378531074
NPV = 0.7181069958847737
FPR = 0.11060142711518858
FDR = 0.06442857142857143


In [33]:
clssvm = svm.SVC(kernel='linear',C=0.1)

In [19]:
TestCVf = TestFeatureDf.drop(labels=[3370,3371,3372,3373,3374,3375,3376,3377])
XtestCV = np.array(TestCVf)
TestCVl = YCV.drop(labels=[3370,3371,3372,3373,3374,3375,3376,3377])
YtestCV = np.array(TestCVl)
XCV = np.concatenate((Xtrain,XtestCV))
YCV = np.concatenate((Ytrain,YtestCV))

folds = []
foldlabels = []
for i in range(10):
    folds.append(XCV[(i*1737):((i+1)*1737)])
    foldlabels.append(YCV[(i*1737):((i+1)*1737)])

BELOW AND ABOVE CELL IS FOR 10 FOLD CV AT LINEAR KERNEL SVM

In [34]:
errors = []
foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8]))

clssvm.fit(foldtrain,foldlabelstrain )
print('accuracy = ' + str(clssvm.score(folds[9],foldlabels[9])))
errors.append(clssvm.score(folds[9],foldlabels[9]))
k = clssvm.predict(folds[9])

confus = confusion_matrix(foldlabels[9], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[8],foldlabels[8]))
k = clssvm.predict(folds[8])
print('accuracy = ' + str(clssvm.score(folds[8],foldlabels[8])))
confus = confusion_matrix(foldlabels[8], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))


foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[7],foldlabels[7]))
k = clssvm.predict(folds[7])
print('accuracy = ' + str(clssvm.score(folds[7],foldlabels[7])))
confus = confusion_matrix(foldlabels[7], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[6],foldlabels[6]))
k = clssvm.predict(folds[6])
print('accuracy = ' + str(clssvm.score(folds[6],foldlabels[6])))
confus = confusion_matrix(foldlabels[6], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[5],foldlabels[5]))
k = clssvm.predict(folds[5])
print('accuracy = ' + str(clssvm.score(folds[5],foldlabels[5])))
confus = confusion_matrix(foldlabels[5], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[4],foldlabels[4]))
k = clssvm.predict(folds[4])
print('accuracy = ' + str(clssvm.score(folds[4],foldlabels[4])))
confus = confusion_matrix(foldlabels[4], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))


foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[3],foldlabels[3]))

k = clssvm.predict(folds[3])
print('accuracy = ' + str(clssvm.score(folds[3],foldlabels[3])))
confus = confusion_matrix(foldlabels[3], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[2],foldlabels[2]))
print('accuracy = ' + str(clssvm.score(folds[2],foldlabels[2])))
k = clssvm.predict(folds[2])

confus = confusion_matrix(foldlabels[2], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[1],foldlabels[1]))

k = clssvm.predict(folds[1])
print('accuracy = ' + str(clssvm.score(folds[1],foldlabels[1])))
confus = confusion_matrix(foldlabels[1], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[1], folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[1], foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[0],foldlabels[0]))

k = clssvm.predict(folds[0])
print('accuracy = ' + str(clssvm.score(folds[0],foldlabels[0])))
confus = confusion_matrix(foldlabels[0], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

errors = np.array(errors)
meancverror = errors.sum()/10
print('MEAN ERROR = ' + str(meancverror))

  y = column_or_1d(y, warn=True)


accuracy = 0.7294185377086931
[[1058  106]
 [ 364  209]]
precision = 0.6634920634920635
recall = 0.3647469458987784
NPV = 0.7440225035161744
FPR = 0.09106529209621993
FDR = 0.03357142857142857


  y = column_or_1d(y, warn=True)


accuracy = 0.7697179044329303
[[627 197]
 [203 710]]
precision = 0.782800441014333
recall = 0.7776560788608982
NPV = 0.755421686746988
FPR = 0.23907766990291263
FDR = 0.02857142857142857


  y = column_or_1d(y, warn=True)


accuracy = 0.8324697754749568
[[483 153]
 [138 963]]
precision = 0.8629032258064516
recall = 0.8746594005449592
NPV = 0.7777777777777778
FPR = 0.24056603773584906
FDR = 0.020785714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.7069660333909038
[[676  70]
 [439 552]]
precision = 0.887459807073955
recall = 0.557013118062563
NPV = 0.6062780269058295
FPR = 0.0938337801608579
FDR = 0.03635714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.7012089810017271
[[1069   57]
 [ 462  149]]
precision = 0.7233009708737864
recall = 0.24386252045826515
NPV = 0.6982364467668191
FPR = 0.050621669626998225
FDR = 0.037071428571428575


  y = column_or_1d(y, warn=True)


accuracy = 0.7374784110535406
[[1088  127]
 [ 329  193]]
precision = 0.603125
recall = 0.36973180076628354
NPV = 0.7678193366266761
FPR = 0.10452674897119342
FDR = 0.03257142857142857


  y = column_or_1d(y, warn=True)


accuracy = 0.7478411053540587
[[731 223]
 [215 568]]
precision = 0.718078381795196
recall = 0.7254150702426565
NPV = 0.7727272727272727
FPR = 0.23375262054507337
FDR = 0.031285714285714285


  y = column_or_1d(y, warn=True)


accuracy = 0.6989061600460564
[[430 488]
 [ 35 784]]
precision = 0.6163522012578616
recall = 0.9572649572649573
NPV = 0.9247311827956989
FPR = 0.5315904139433552
FDR = 0.03735714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.7599309153713298
[[970 181]
 [236 350]]
precision = 0.6591337099811676
recall = 0.5972696245733788
NPV = 0.8043117744610282
FPR = 0.157254561251086
FDR = 0.029785714285714287


  y = column_or_1d(y, warn=True)


accuracy = 0.7852619458837076
[[1304  303]
 [  70   60]]
precision = 0.1652892561983471
recall = 0.46153846153846156
NPV = 0.9490538573508006
FPR = 0.18855009334163036
FDR = 0.026642857142857142
MEAN ERROR = 0.7469199769717904


In [22]:
#MEAN CROSS VALIDATION ERROR
meancverror

0.7469199769717904

In [24]:
k = clssvm.predict(Xtest)
k

array([1, 1, 1, ..., 0, 0, 0])

In [29]:
clssvm = svm.SVC(kernel='rbf')

BELOW CELL IS FOR RBF KERNEL SVM 10 FOLD CV

In [31]:
errors = []
foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8]))

clssvm.fit(foldtrain,foldlabelstrain )
print('accuracy = ' + str(clssvm.score(folds[9],foldlabels[9])))
errors.append(clssvm.score(folds[9],foldlabels[9]))
k = clssvm.predict(folds[9])

confus = confusion_matrix(foldlabels[9], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[8],foldlabels[8]))
k = clssvm.predict(folds[8])
print('accuracy = ' + str(clssvm.score(folds[8],foldlabels[8])))
confus = confusion_matrix(foldlabels[8], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))


foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[6],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[7],foldlabels[7]))
k = clssvm.predict(folds[7])
print('accuracy = ' + str(clssvm.score(folds[7],foldlabels[7])))
confus = confusion_matrix(foldlabels[7], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[5],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[6],foldlabels[6]))
k = clssvm.predict(folds[6])
print('accuracy = ' + str(clssvm.score(folds[6],foldlabels[6])))
confus = confusion_matrix(foldlabels[6], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[4],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[5],foldlabels[5]))
k = clssvm.predict(folds[5])
print('accuracy = ' + str(clssvm.score(folds[5],foldlabels[5])))
confus = confusion_matrix(foldlabels[5], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[3],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[3],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[4],foldlabels[4]))
k = clssvm.predict(folds[4])
print('accuracy = ' + str(clssvm.score(folds[4],foldlabels[4])))
confus = confusion_matrix(foldlabels[4], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))


foldtrain = np.concatenate((folds[0], folds[1],folds[2],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[2],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[3],foldlabels[3]))

k = clssvm.predict(folds[3])
print('accuracy = ' + str(clssvm.score(folds[3],foldlabels[3])))
confus = confusion_matrix(foldlabels[3], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[1],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[1],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[2],foldlabels[2]))
print('accuracy = ' + str(clssvm.score(folds[2],foldlabels[2])))
k = clssvm.predict(folds[2])

confus = confusion_matrix(foldlabels[2], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[0], folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[0], foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[1],foldlabels[1]))

k = clssvm.predict(folds[1])
print('accuracy = ' + str(clssvm.score(folds[1],foldlabels[1])))
confus = confusion_matrix(foldlabels[1], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

foldtrain = np.concatenate((folds[1], folds[2],folds[3],folds[4],folds[5],folds[6],folds[7],folds[8],folds[9]))
foldlabelstrain = np.concatenate((foldlabels[1], foldlabels[2],foldlabels[3],foldlabels[4],foldlabels[5],foldlabels[6],foldlabels[7],foldlabels[8],foldlabels[9]))

clssvm.fit(foldtrain,foldlabelstrain )
errors.append(clssvm.score(folds[0],foldlabels[0]))

k = clssvm.predict(folds[0])
print('accuracy = ' + str(clssvm.score(folds[0],foldlabels[0])))
confus = confusion_matrix(foldlabels[0], k)
print(confus)
precision = confus[1][1] / (confus[1][1] + confus[0][1])
print('precision = ' + str(precision))
recall = confus[1][1] / (confus[1][1] + confus[1][0])
print('recall = ' + str(recall))
NPR = confus[0][0] / (confus[0][0] + confus[1][0])
print('NPV = ' + str(NPR))
FPR = confus[0][1] / (confus[0][1] + confus[0][0])
print('FPR = ' + str(FPR))
FDR = (confus[0][1] + confus[1][0] )/ (len(Ytrain))
print('FDR = ' + str(FDR))

errors = np.array(errors)
meancverror = errors.sum()/10
print('MEAN ERROR = ' + str(meancverror))

  y = column_or_1d(y, warn=True)


accuracy = 0.8606793321819228
[[1034  130]
 [ 112  461]]
precision = 0.7800338409475466
recall = 0.8045375218150087
NPV = 0.9022687609075044
FPR = 0.11168384879725086
FDR = 0.017285714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.9107656879677605
[[740  84]
 [ 71 842]]
precision = 0.9092872570194385
recall = 0.9222343921139102
NPV = 0.9124537607891492
FPR = 0.10194174757281553
FDR = 0.01107142857142857


  y = column_or_1d(y, warn=True)


accuracy = 0.8929188255613126
[[610  26]
 [160 941]]
precision = 0.9731127197518097
recall = 0.8546775658492279
NPV = 0.7922077922077922
FPR = 0.040880503144654086
FDR = 0.013285714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.7748992515831894
[[731  15]
 [376 615]]
precision = 0.9761904761904762
recall = 0.62058526740666
NPV = 0.6603432700993677
FPR = 0.020107238605898123
FDR = 0.027928571428571428


  y = column_or_1d(y, warn=True)


accuracy = 0.6948762233736328
[[1126    0]
 [ 530   81]]
precision = 1.0
recall = 0.132569558101473
NPV = 0.6799516908212561
FPR = 0.0
FDR = 0.03785714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.8572251007484168
[[1064  151]
 [  97  425]]
precision = 0.7378472222222222
recall = 0.814176245210728
NPV = 0.9164513350559862
FPR = 0.1242798353909465
FDR = 0.017714285714285714


  y = column_or_1d(y, warn=True)


accuracy = 0.8635578583765112
[[746 208]
 [ 29 754]]
precision = 0.7837837837837838
recall = 0.9629629629629629
NPV = 0.9625806451612903
FPR = 0.2180293501048218
FDR = 0.016928571428571428


  y = column_or_1d(y, warn=True)


accuracy = 0.8324697754749568
[[634 284]
 [  7 812]]
precision = 0.7408759124087592
recall = 0.9914529914529915
NPV = 0.9890795631825273
FPR = 0.3093681917211329
FDR = 0.020785714285714286


  y = column_or_1d(y, warn=True)


accuracy = 0.7812320092112838
[[776 375]
 [  5 581]]
precision = 0.6077405857740585
recall = 0.9914675767918089
NPV = 0.9935979513444302
FPR = 0.32580364900086883
FDR = 0.027142857142857142


  y = column_or_1d(y, warn=True)


accuracy = 0.7265400115141047
[[1139  468]
 [   7  123]]
precision = 0.20812182741116753
recall = 0.9461538461538461
NPV = 0.993891797556719
FPR = 0.2912258867454885
FDR = 0.033928571428571426
MEAN ERROR = 0.8195164075993091
