In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import cross_val_score

In [2]:
data = pd.read_csv("vertebral_column_data/column_2C.dat", sep=" ",header = 0, index_col = None,)

In [3]:
data.head()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,abnormal
0,63.03,22.55,39.61,40.48,98.67,-0.25,AB
1,39.06,10.06,25.02,29.0,114.41,4.56,AB
2,68.83,22.22,50.09,46.61,105.99,-3.53,AB
3,69.3,24.65,44.31,44.64,101.87,11.21,AB
4,49.71,9.65,28.32,40.06,108.17,7.92,AB


In [4]:
data.tail()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,abnormal
305,47.9,13.62,36.0,34.29,117.45,-4.25,NO
306,53.94,20.72,29.22,33.22,114.37,-0.42,NO
307,61.45,22.69,46.17,38.75,125.67,-2.71,NO
308,45.25,8.69,41.58,36.56,118.55,0.21,NO
309,33.84,5.07,36.64,28.77,123.95,-0.2,NO


In [5]:
data['abnormal'].replace(['AB','NO'], [1,0], inplace=True)

In [6]:
data.head()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,abnormal
0,63.03,22.55,39.61,40.48,98.67,-0.25,1
1,39.06,10.06,25.02,29.0,114.41,4.56,1
2,68.83,22.22,50.09,46.61,105.99,-3.53,1
3,69.3,24.65,44.31,44.64,101.87,11.21,1
4,49.71,9.65,28.32,40.06,108.17,7.92,1


In [7]:
data.describe()

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,abnormal
count,310.0,310.0,310.0,310.0,310.0,310.0,310.0
mean,60.496484,17.542903,51.93071,42.953871,117.920548,26.296742,0.677419
std,17.236109,10.00814,18.553766,13.422748,13.317629,37.558883,0.46822
min,26.15,-6.55,14.0,13.37,70.08,-11.06,0.0
25%,46.4325,10.6675,37.0,33.3475,110.71,1.6,0.0
50%,58.69,16.36,49.565,42.405,118.265,11.765,1.0
75%,72.88,22.12,63.0,52.6925,125.4675,41.285,1.0
max,129.83,49.43,125.74,121.43,163.07,418.54,1.0


In [42]:
data= shuffle(data)
X = data.iloc[:308,:6]
Y = data.iloc[:308,6]
tX = data.iloc[309,:6]
tY = data.iloc[309,6]
# build and fit model
reg = LogisticRegression(solver='lbfgs')
reg.fit(X,Y)

print("Coefficients: ",reg.coef_)
print("Intercept: ", reg.intercept_)

# compute predicted values from training set
Y_pred = reg.predict(X)

cm = confusion_matrix(Y, Y_pred)
print("Confusion matrix:\n",cm)

accuracy = (cm[0][0]+cm[1][1])/(cm[0][0]+cm[1][1]+cm[0][1]+cm[1][0])
print("Accuracy calculated from the training set = %.3f" % (accuracy))

print(classification_report(Y, Y_pred, target_names=['normal', 'abnormal']))

Coefficients:  [[-0.01174577  0.08670421 -0.01879453 -0.08456992 -0.10695632  0.16700819]]
Intercept:  [15.19410005]
Confusion matrix:
 [[ 77  22]
 [ 22 187]]
Accuracy calculated from the training set = 0.857
              precision    recall  f1-score   support

      normal       0.78      0.78      0.78        99
    abnormal       0.89      0.89      0.89       209

   micro avg       0.86      0.86      0.86       308
   macro avg       0.84      0.84      0.84       308
weighted avg       0.86      0.86      0.86       308



In [43]:
# cross-validate
# number of folds
k = 10
scores = cross_val_score(estimator=reg,
                        X=X,
                        y=Y,
                        scoring="accuracy",
                        cv=k)
print("Accuracies from %d individual folds:" % k)
print(scores)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

Accuracies from 10 individual folds:
[0.77419355 0.96774194 0.87096774 0.87096774 0.77419355 0.83870968
 0.80645161 0.90322581 0.77419355 0.86206897]
Accuracy calculated using 10-fold cross validation = 0.844


In [44]:
# retrieve estimated probabilities (from training set)
reg.predict_proba(X)

array([[7.16942255e-03, 9.92830577e-01],
       [8.18766607e-01, 1.81233393e-01],
       [7.50613807e-01, 2.49386193e-01],
       [1.27790527e-03, 9.98722095e-01],
       [2.84885629e-03, 9.97151144e-01],
       [2.33036556e-03, 9.97669634e-01],
       [1.02612767e-01, 8.97387233e-01],
       [8.24415942e-03, 9.91755841e-01],
       [1.01093540e-01, 8.98906460e-01],
       [2.01197950e-01, 7.98802050e-01],
       [6.47402122e-01, 3.52597878e-01],
       [1.86790647e-01, 8.13209353e-01],
       [8.26892553e-05, 9.99917311e-01],
       [4.54131738e-05, 9.99954587e-01],
       [4.43202331e-01, 5.56797669e-01],
       [7.21502047e-01, 2.78497953e-01],
       [6.17664317e-01, 3.82335683e-01],
       [8.75762679e-01, 1.24237321e-01],
       [5.24594306e-01, 4.75405694e-01],
       [2.11810156e-08, 9.99999979e-01],
       [1.37838167e-01, 8.62161833e-01],
       [4.57496815e-01, 5.42503185e-01],
       [2.07273994e-01, 7.92726006e-01],
       [1.62083568e-02, 9.83791643e-01],
       [1.589977

In [45]:
def binaryPrediction(d):
    
    prediction = reg.predict(d)
    probability = reg.predict_proba(d)
    
    
    print('Abnormality :',probability[:,1],'%')
    print('Normality :',probability[:,0],'%')
    
    print('Predicted is: ')
    if prediction == 1:
        print('Abnormal')
        return prediction
    else:
        print('Normal')
        return prediction

In [46]:
print(tX,',',tY)
print(np.array(tX).reshape(1, -1))

pelvic_incidence            48.26
pelvic_tilt                 16.42
lumbar_lordosis_angle       36.33
sacral_slope                31.84
pelvic_radius               94.88
degree_spondylolisthesis    28.34
Name: 209, dtype: float64 , 1
[[48.26 16.42 36.33 31.84 94.88 28.34]]


In [56]:
print(binaryPrediction(np.array(tX).reshape(1, -1)),' actually is:',tY)

Abnormality : [0.99929768] %
Normality : [0.00070232] %
Predicted is: 
Abnormal
[1]  actually is: 1
