In [62]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [63]:
data = pd.read_csv('vertebral_2C.csv')

In [64]:
data.head()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,Class Label
0,63.03,22.55,39.61,40.48,98.67,-0.25,AB
1,39.06,10.06,25.02,29.0,114.41,4.56,AB
2,68.83,22.22,50.09,46.61,105.99,-3.53,AB
3,69.3,24.65,44.31,44.64,101.87,11.21,AB
4,49.71,9.65,28.32,40.06,108.17,7.92,AB


data[0][7] = "Flow1"
data[0][8] = "Flow2"
data[0][10] = "Nitrogen1"
data[0][11] = "Nitogen2"
data[0][13] = "Fre1"
data[0][14] = "Fre2"

In [65]:
def model(X,T):
    a = np.ones(shape=(len(X),1))
    a = pd.DataFrame(a,columns=['bias'])
    Xa = pd.concat([a,X],axis=1)
    Xamatrix = Xa.as_matrix()
    Xpinv = np.linalg.pinv(Xamatrix)
    W = Xpinv.dot(T)
    return W,Xa

In [66]:
def AccuracyTrainData(Xa,W,T):
    trainOutput = Xa.dot(W)
    predtrainOutputbinary = np.ones(trainOutput.shape[0])
    for i in range(len(trainOutput)):
        if trainOutput[i] < 0:
            predtrainOutputbinary[i] = -1
    predtrainOutputbinary
    TP=0
    TN=0
    FP=0
    FN=0
    for i in range(len(predtrainOutputbinary)):
        if (T[i] == 1 and predtrainOutputbinary[i] == 1):
            TP = TP +1
        elif(T[i] == -1 and predtrainOutputbinary[i] == 1):
            FP = FP + 1
        elif (T[i] == -1 and predtrainOutputbinary[i] == -1):
            TN = TN +1
        else:
            FN = FN +1
    print("TP " , TP)
    print("TN " ,TN)
    print("FP ", FP)
    print("FN ", FN)
    accuracybinary = (TP + TN) / (TP + TN + FP + FN)
    print("Accuracy",accuracybinary)
    sensitivity = TP / (TP + FN)
    print("sensitivity",sensitivity)
    specificity = TN / ( TN + FP)
    print("specificity",specificity)
    PPV = TP / (TP + FP)
    print("PPV",PPV)


In [67]:
#calculate accuracy for whole data
wholeData = data.drop(['Class Label'],axis=1)
wholeDataTarget = data['Class Label'].apply(lambda x: 1 if x == 'AB' else -1)
W,wholeDataA = model(wholeData,wholeDataTarget)
AccuracyTrainData(wholeDataA,W,wholeDataTarget)

TP  196
TN  68
FP  32
FN  14
Accuracy 0.8516129032258064
sensitivity 0.9333333333333333
specificity 0.68
PPV 0.8596491228070176


In [68]:
X_train, X_test, y_train, y_test = train_test_split(wholeData, wholeDataTarget, test_size=0.33, random_state=42)

In [69]:
y_train = pd.DataFrame(y_train).reset_index().drop(['index'],axis=1)
y_test = pd.DataFrame(y_test).reset_index().drop(['index'],axis=1)
X_train = X_train.reset_index().drop(['index'],axis=1)
X_test = X_test.reset_index().drop(['index'],axis=1)

In [79]:
type(wholeDataTarget)

pandas.core.series.Series

In [73]:
###W for train data
Wtest,_ = model(X_train,y_train)

In [86]:
atest = np.ones(shape=(len(X_test),1))
atest = pd.DataFrame(atest,columns=['bias'])
Xatest = pd.concat([atest,X_test],axis=1)
AccuracyTrainData(Xatest,W,y_test['Class Label'])

TP  69
TN  21
FP  8
FN  5
Accuracy 0.8737864077669902
sensitivity 0.9324324324324325
specificity 0.7241379310344828
PPV 0.8961038961038961


In [87]:
###start multiclass classification 
datamul = pd.read_csv('vertebral_3C.csv')


In [89]:
Tmulcol = datamul['Class Label']
def changeInputs(x):
    if x == 'DH':
        return 0
    elif x == 'NO':
        return 1
    else:
        return 2
Tmulcol = Tmulcol.apply(lambda x : changeInputs(x))
Tmul = pd.get_dummies(Tmulcol)
for i in Tmul.columns:
    Tmul[i] = Tmul[i].apply(lambda x : -1 if x == 0 else x)

In [94]:
Wmul,Xamul= model(wholeData,Tmul)

In [105]:
def AccuracyMul(X,W,T):
    trainOutputMult = X.dot(W)
    trainOutputMult
    predtrainOutputmult = np.zeros(trainOutputMult.shape[0],dtype=int)
    # predtrainOutputmult[i] = pd.a(trainOutputMult[i:i+1,:])
    predtrainOutputmult= trainOutputMult.idxmax(axis=1)
    predtrainOutputmult.shape
    confMatrix = np.zeros(shape=(3,3),dtype=int)
    for i in range(len(predtrainOutputmult)):
        row = T[i]
        col = predtrainOutputmult[i]
        confMatrix[row,col] = confMatrix[row,col] + 1;
    print("Confusion matrix",confMatrix)
    ppv = np.zeros(shape= (3,1))
    #calculate PPV for each class
    for i in range(3):
        TPi = confMatrix[i][i]
        FPI = confMatrix[:,i:i+1].sum()
        ppv[i] = TPi/(FPI)
    print("PPV max " , ppv.max())
    print(" PPV min " , ppv.min() )
    print(" PPV max class " , np.argmax(ppv))
    print("PPv min class" , np.argmin(ppv) )


In [106]:
##ppv for whole data set
AccuracyMul(Xamul,Wmul,Tmulcol)

Confusion matrix [[ 33  22   5]
 [  8  81  11]
 [  2   8 140]]
PPV max  0.897435897436
 PPV min  0.72972972973
 PPV max class  2
PPv min class 1


In [131]:
#use cross validation 
X_train, X_test, y_train, y_test = train_test_split(wholeData,datamul['Class Label'] , test_size=0.33, random_state=42)

In [132]:
y_train = pd.DataFrame(y_train).reset_index().drop(['index'],axis=1)
y_test = pd.DataFrame(y_test).reset_index().drop(['index'],axis=1)
X_train = X_train.reset_index().drop(['index'],axis=1)
X_test = X_test.reset_index().drop(['index'],axis=1)

In [133]:
def changeInputs(x):
    if x == 'DH':
        return 0
    elif x == 'NO':
        return 1
    else:
        return 2
def reformY(y):
    y = y.apply(lambda x : changeInputs(x))
    y_mul= pd.get_dummies(y)
    for i in y_mul.columns:
        y_mul[i] = y_mul[i].apply(lambda x : -1 if x == 0 else x)
    return y_mul,y
y_train_mul,y_train = reformY(y_train['Class Label'])
y_test_mul,y_test = reformY(y_test['Class Label'])

In [134]:
Wmultrain,_= model(X_train,y_train_mul)

In [136]:
atestmul = np.ones(shape=(len(X_test),1))
atestmul = pd.DataFrame(atestmul,columns=['bias'])
Xatestmul = pd.concat([atestmul,X_test],axis=1)


In [137]:
##ppv for whole data set
AccuracyMul(Xatestmul,Wmultrain,y_test)

Confusion matrix [[ 5 12  3]
 [ 2 22  5]
 [ 0  4 50]]
PPV max  0.862068965517
 PPV min  0.578947368421
 PPV max class  2
PPv min class 1
