In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
#from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import cross_val_score


In [29]:


df = pd.read_csv('column_2C.dat', sep=' ',
                 header = None)

df.rename(
  columns={
    0 : 'pelvic incidence',
    1 : 'pelvic tilt',
    2 : 'lumbar lordosis angle',
    3 : 'sacral slope',
    4 : 'pelvic radius',
    5 : 'grade of spondylolisthesis',
    6 : 'abnormal'  
  },
  inplace=True)
    
df.head(10)

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,abnormal
0,63.03,22.55,39.61,40.48,98.67,-0.25,AB
1,39.06,10.06,25.02,29.0,114.41,4.56,AB
2,68.83,22.22,50.09,46.61,105.99,-3.53,AB
3,69.3,24.65,44.31,44.64,101.87,11.21,AB
4,49.71,9.65,28.32,40.06,108.17,7.92,AB
5,40.25,13.92,25.12,26.33,130.33,2.23,AB
6,53.43,15.86,37.17,37.57,120.57,5.99,AB
7,45.37,10.76,29.04,34.61,117.27,-10.68,AB
8,43.79,13.53,42.69,30.26,125.0,13.29,AB
9,36.69,5.01,41.95,31.68,84.24,0.66,AB


In [30]:
df['abnormal'].replace(['AB','NO'], [1,0], inplace=True)
df.head(10)

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,abnormal
0,63.03,22.55,39.61,40.48,98.67,-0.25,1
1,39.06,10.06,25.02,29.0,114.41,4.56,1
2,68.83,22.22,50.09,46.61,105.99,-3.53,1
3,69.3,24.65,44.31,44.64,101.87,11.21,1
4,49.71,9.65,28.32,40.06,108.17,7.92,1
5,40.25,13.92,25.12,26.33,130.33,2.23,1
6,53.43,15.86,37.17,37.57,120.57,5.99,1
7,45.37,10.76,29.04,34.61,117.27,-10.68,1
8,43.79,13.53,42.69,30.26,125.0,13.29,1
9,36.69,5.01,41.95,31.68,84.24,0.66,1


In [31]:
df.describe()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,abnormal
count,310.0,310.0,310.0,310.0,310.0,310.0,310.0
mean,60.496484,17.542903,51.93071,42.953871,117.920548,26.296742,0.677419
std,17.236109,10.00814,18.553766,13.422748,13.317629,37.558883,0.46822
min,26.15,-6.55,14.0,13.37,70.08,-11.06,0.0
25%,46.4325,10.6675,37.0,33.3475,110.71,1.6,0.0
50%,58.69,16.36,49.565,42.405,118.265,11.765,1.0
75%,72.88,22.12,63.0,52.6925,125.4675,41.285,1.0
max,129.83,49.43,125.74,121.43,163.07,418.54,1.0


In [88]:
X = df.iloc[:,:6]
Y = df.iloc[:,6]

# build and fit model
reg = LogisticRegression(solver='lbfgs')
reg.fit(X,Y)

print("Coefficients: ",reg.coef_)
print("Intercept: ", reg.intercept_)

# compute predicted values from training set
Y_pred = reg.predict(X)

cm = confusion_matrix(Y, Y_pred)
print("Confusion matrix:\n",cm)

accuracy = (cm[0][0]+cm[1][1])/(cm[0][0]+cm[1][1]+cm[0][1]+cm[1][0])
print("Accuracy calculated from the training set = %.3f" % (accuracy))

print(classification_report(Y, Y_pred, target_names=['normal', 'abnormal']))


Coefficients:  [[-0.03205038  0.10757546 -0.01869301 -0.06459006 -0.10677264  0.16808262]]
Intercept:  [15.15571757]
Confusion matrix:
 [[ 78  22]
 [ 22 188]]
Accuracy calculated from the training set = 0.858
              precision    recall  f1-score   support

      normal       0.78      0.78      0.78       100
    abnormal       0.90      0.90      0.90       210

   micro avg       0.86      0.86      0.86       310
   macro avg       0.84      0.84      0.84       310
weighted avg       0.86      0.86      0.86       310



In [35]:
# cross-validate
# number of folds
k = 10
scores = cross_val_score(estimator=reg,
                        X=X,
                        y=Y,
                        scoring="accuracy",
                        cv=k)
print("Accuracies from %d individual folds:" % k)
print(scores)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

Accuracies from 10 individual folds:
[0.58064516 0.67741935 0.74193548 0.77419355 0.90322581 0.93548387
 0.93548387 0.96774194 0.83870968 0.93548387]
Accuracy calculated using 10-fold cross validation = 0.829


In [87]:
reg.predict_proba(X)

array([[1.63954257e-01, 8.36045743e-01],
       [2.32218449e-01, 7.67781551e-01],
       [6.26451321e-01, 3.73548679e-01],
       [5.30480103e-02, 9.46951990e-01],
       [2.20054264e-01, 7.79945736e-01],
       [5.86117646e-01, 4.13882354e-01],
       [4.59791438e-01, 5.40208562e-01],
       [9.03397089e-01, 9.66029109e-02],
       [2.07104920e-01, 7.92895080e-01],
       [5.70672504e-02, 9.42932750e-01],
       [7.12145432e-01, 2.87854568e-01],
       [1.03128214e-01, 8.96871786e-01],
       [1.51594671e-01, 8.48405329e-01],
       [9.82166360e-02, 9.01783364e-01],
       [1.97489847e-01, 8.02510153e-01],
       [4.96215397e-01, 5.03784603e-01],
       [6.50923702e-01, 3.49076298e-01],
       [7.50423774e-01, 2.49576226e-01],
       [4.40030259e-01, 5.59969741e-01],
       [4.97034691e-01, 5.02965309e-01],
       [6.51830836e-01, 3.48169164e-01],
       [5.68412286e-01, 4.31587714e-01],
       [2.81038559e-02, 9.71896144e-01],
       [7.30517849e-01, 2.69482151e-01],
       [1.038538

# Input for prediction

In [73]:
print('pelvic incidence: ' )
a = input()
print('pelvic tilt: ' )
b = input()
print('lumbar lordosis angle: ' )
c = input()
print('sacral slope: ' )
d = input()
print('pelvic radius: ' )
e = input()
print('grade of spondylolisthesis: ' )
f = input()


pelvic incidence: 
63.03
pelvic tilt: 
22.55
lumbar lordosis angle: 
39.61
sacral slope: 
40.48
pelvic radius: 
98.67
grade of spondylolisthesis: 
-0.25


In [80]:
diagnose = diagnose.astype(np.float64)
print(diagnose)

[[63.03 22.55 39.61 40.48 98.67 -0.25]]


In [103]:
prediction = reg.predict(diagnose)
chance = (reg.predict_proba(diagnose))
print(chance)
print(prediction)
if prediction > 0.5:
    print('Patient has abnormality with a ',chance[:,1], ' probability')
    
else:
    print('Patinet is normal with a ', chance[:,0], ' probability')

[[0.16395426 0.83604574]]
[1]
Patient has abnormality with a  [0.83604574]  probability
