In [9]:
import pandas as pd
import numpy as np
import sklearn 
from xgboost import XGBClassifier #algorithm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [10]:
#loading the dataset
columns = ['MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'Shimmer:DDA', 'HNR']
dataset = pd.read_csv('D:\STUDIES\FINAL YEAR\FYP\IMPLEMENTATION\ParkinsonsDetection\Audio\Dataset\parkinsons.csv')

In [11]:
#Getting the list of features and the label
np.set_printoptions(suppress=True)
features = dataset.loc[:,columns].values
label = dataset.loc[:,'status'].values
features

array([[ 0.00784,  0.00007,  0.0037 , ...,  0.0313 ,  0.06545, 21.033  ],
       [ 0.00968,  0.00008,  0.00465, ...,  0.04518,  0.09403, 19.085  ],
       [ 0.0105 ,  0.00009,  0.00544, ...,  0.03858,  0.0827 , 20.651  ],
       ...,
       [ 0.0136 ,  0.00008,  0.00624, ...,  0.01365,  0.03804, 17.883  ],
       [ 0.0074 ,  0.00004,  0.0037 , ...,  0.01321,  0.03794, 19.02   ],
       [ 0.00567,  0.00003,  0.00295, ...,  0.01161,  0.03078, 21.209  ]])

In [12]:
#scaling the features
scale = MinMaxScaler((-1,1))
x_data = scale.fit_transform(features)
y_data = label

In [13]:
#spliting the dataset as 20% for testing and rest for training
x_train,x_test,y_train,y_test = train_test_split(x_data, y_data, test_size = 0.2, random_state = 7)

In [14]:
#Training the model
model = XGBClassifier()
model.fit(x_train,y_train)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [22]:
#Predicting from testing data
test_prediction = model.predict(x_test)
print(test_prediction)

[1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1
 0 1]


In [23]:
#Calculating the accuracy from testing data
test_accuracy = accuracy_score(y_test, test_prediction)*100
print(test_accuracy)

87.17948717948718


In [24]:
#Predicting from training data
train_prediction = model.predict(x_train)
print(train_prediction)

[1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 1 1 0 1 0 0 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 0 1 0 0 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0
 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 0 1 0 1 0 1 0 0 1
 0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1
 1 0 1 1 1 1 1 0]


In [25]:
#Calculating the accuracy from training data
train_accuracy = accuracy_score(y_train, train_prediction)*100
print(train_accuracy)

99.35897435897436


In [29]:
#Predicting using data
# np.set_printoptions(suppress=True)
# input_data = (0.027859873599643325,0.00014763142959421684,0.012507670402502244,0.013804175733725828,0.03752301120750673,0.11691055812372304,1.1567507650966085,0.0474105869762126,0.06901175514861325,0.14223176092863782,13.292052871834889)
input_data = (0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.097,0.00563,0.0068,0.01689,26.775)


#Converting the data to numpy array
input_data_array = np.asarray(input_data)

#Reshaping the array 
reshape_input_data = input_data_array.reshape(1, -1)

#standardizing the input data
standardize_input_data = scale.transform(reshape_input_data)
print(standardize_input_data)

#Predicting
prediction = model.predict(standardize_input_data)
print(prediction)

[[-0.92312579 -0.97628458 -0.90558767 -0.91854234 -0.90560283 -0.97370823
  -0.98027938 -0.95839753 -0.97014925 -0.95827449  0.49020564]]
[0]
