# **Detecting Parkinson's Disease**

***By-Anand Sharma*** 

## **Importing libraries/modules**

In [31]:
#Importing libraries/modules

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

## **Read the data**

In [2]:
#Read the data

df=pd.read_csv('https://raw.githubusercontent.com/chaitanyabaranwal/ParkinsonAnalysis/master/parkinsons.csv')


## **Print the data**

In [10]:
#Printing the 1st 5 rows of the data
print(df.shape)
df.head()

(195, 24)


Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE,status
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,0.02182,0.0313,0.02971,0.06545,0.02211,21.033,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654,1
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,0.03134,0.04518,0.04368,0.09403,0.01929,19.085,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674,1
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,0.482,0.02757,0.03858,0.0359,0.0827,0.01309,20.651,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634,1
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,0.517,0.02924,0.04005,0.03772,0.08771,0.01353,20.644,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975,1
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.584,0.0349,0.04825,0.04465,0.1047,0.01767,19.649,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335,1


## **Extract features and labes from data set.**

In [24]:
#Extract features and labes from data set.

features=df.loc[:,df.columns!='status'].values[:,1:]
labels=df.loc[:,'status'].values[:]#or use just .values for a column

## **Displaying extracted features and labels.**

In [27]:
#Displaying extracted features and labels.

print(features.shape)
print(labels.shape)

display(features)
labels[:]

(195, 22)
(195,)


array([[119.992, 157.30200000000002, 74.997, ..., 0.266482,
        2.3014419999999998, 0.284654],
       [122.4, 148.65, 113.819, ..., 0.33559, 2.486855, 0.368674],
       [116.682, 131.111, 111.555, ..., 0.311173, 2.342259,
        0.33263400000000004],
       ...,
       [174.688, 240.005, 74.28699999999999, ..., 0.158453,
        2.6797720000000003, 0.13172799999999998],
       [198.764, 396.961, 74.904, ..., 0.207454, 2.138608, 0.123306],
       [214.28900000000002, 260.277, 77.973, ..., 0.190667, 2.555477,
        0.148569]], dtype=object)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## **Count of 0/1 in labels.**

In [30]:
#Count of 0/1 in labels
print('No. of 1s :{}'.format(labels[labels==1].shape[0]))
print('No. of 0s :{}'.format(labels[labels==0].shape[0]))

No. of 1s :147
No. of 0s :48


## **Scale features b/w -1 and 1.**

In [32]:
#Scale features b/w -1 and 1

scaler=MinMaxScaler((-1,1))
x=scaler.fit_transform(features)
y=labels

## **Split the dataset.**

In [36]:
#Split the dataset into train set and test set

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=7)

In [37]:
print(x_train.shape)
print(y_train.shape)

(156, 22)
(156,)


## **Train the model.**

In [38]:
#Train the model

model=XGBClassifier()
model.fit(x_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

## **Calculate Model Accuracy.**

In [42]:
#Calculate Model Accuracy

y_pred=model.predict(x_test)
print('Model Accuracy : {} %'.format(accuracy_score(y_test,y_pred)*100))

Model Accuracy : 94.87179487179486 %
