In [38]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, train_test_split

names = ['pelvic incidence', 'pelvic tilt', 'lumbar lordosis angle', 
         'sacral slope', 'pelvic radius', 'grade of spondylolisthesis','label']
# read data (replace with your own path)
df = pd.read_csv('column_2C.dat', 
                 sep=' ',
                 names=names,
                     header=None, 
                     index_col = None, 
                     na_values = '?')
print(df.shape)
df.head(10)

(310, 7)


Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,label
0,63.03,22.55,39.61,40.48,98.67,-0.25,AB
1,39.06,10.06,25.02,29.0,114.41,4.56,AB
2,68.83,22.22,50.09,46.61,105.99,-3.53,AB
3,69.3,24.65,44.31,44.64,101.87,11.21,AB
4,49.71,9.65,28.32,40.06,108.17,7.92,AB
5,40.25,13.92,25.12,26.33,130.33,2.23,AB
6,53.43,15.86,37.17,37.57,120.57,5.99,AB
7,45.37,10.76,29.04,34.61,117.27,-10.68,AB
8,43.79,13.53,42.69,30.26,125.0,13.29,AB
9,36.69,5.01,41.95,31.68,84.24,0.66,AB


In [28]:
df.describe()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis
count,310.0,310.0,310.0,310.0,310.0,310.0
mean,60.496484,17.542903,51.93071,42.953871,117.920548,26.296742
std,17.236109,10.00814,18.553766,13.422748,13.317629,37.558883
min,26.15,-6.55,14.0,13.37,70.08,-11.06
25%,46.4325,10.6675,37.0,33.3475,110.71,1.6
50%,58.69,16.36,49.565,42.405,118.265,11.765
75%,72.88,22.12,63.0,52.6925,125.4675,41.285
max,129.83,49.43,125.74,121.43,163.07,418.54


In [39]:
#dividing into input and labels
X = df.iloc[:,:6]
Y = df.iloc[:,6]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state=7)

In [40]:
# build and fit model
reg = LogisticRegression()
reg.fit(X_train,Y_train)

#get coeficients
coef = reg.coef_
print("Coefficients: ",coef)

# compute predicted values from training set
Y_pred = reg.predict(X_test)

#compute and print confusion matrix and accuracy
cm = confusion_matrix(Y_test, Y_pred)
print("Confusion matrix:\n",cm)
acc = accuracy_score(Y_test, Y_pred)
print("accuracy: ", acc)

Coefficients:  [[-0.02083919 -0.09282822  0.0297846   0.04813885  0.01257851 -0.1513414 ]]
Confusion matrix:
 [[57  2]
 [11 23]]
accuracy:  0.8602150537634409




In [45]:
# cross-validate
# number of folds
k = 10
scores = cross_val_score(estimator=reg,
                        X=X_test,
                        y=Y_test,
                        scoring="accuracy",
                        cv=k)
print(scores.shape)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

(10,)
Accuracy calculated using 10-fold cross validation = 0.903


