In [73]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

In [74]:
#Load Dataset
parkinsons_data = pd.read_csv('parkinsons.data')
parkinsons_data.head(7)

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335
5,phon_R01_S01_6,120.552,131.162,113.787,0.00968,8e-05,0.00463,0.0075,0.01388,0.04701,...,0.06985,0.01222,21.378,1,0.415564,0.825069,-4.242867,0.299111,2.18756,0.357775
6,phon_R01_S02_1,120.267,137.244,114.82,0.00333,3e-05,0.00155,0.00202,0.00466,0.01608,...,0.02337,0.00607,24.886,1,0.59604,0.764112,-5.634322,0.257682,1.854785,0.211756


In [75]:
parkinsons_data.columns

Index(['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
       'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
       'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
       'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA',
       'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')

In [76]:
#Checking number of rows and columns
parkinsons_data.shape

#Information about data
parkinsons_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

In [77]:
#check missing values
parkinsons_data.isnull().sum()

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

In [78]:
# target variable
parkinsons_data['status'].value_counts()

1    147
0     48
Name: status, dtype: int64

In [79]:
x=parkinsons_data.drop(columns=['name','status'],axis=1)
y=parkinsons_data['status']

In [80]:
#Split dataset into training set and test set ie 80% training set and 20% test set
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=2)

print(x.shape, X_train.shape, X_test.shape)

(195, 22) (156, 22) (39, 22)


In [81]:
#Data Standardization:- convert all value in common range
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[1:5])

[[-1.05512719 -0.83337041 -0.9284778  -0.12997764  0.14406765 -0.15236125
  -0.14436412 -0.15344933  0.44321834  0.51889992 -0.06490951  0.09074355
   0.86575291 -0.06521554 -0.15277503 -0.2558165   1.46329474 -0.46857462
   0.42875723  0.3981808  -0.61014073  0.39291782]
 [ 0.02996187 -0.29531068 -1.12211107 -0.55550578 -0.67219944 -0.47338676
  -0.54959672 -0.47446624  0.29594502  0.0409723   0.38677148  0.3503336
   0.12088079  0.38646758 -0.42840806  0.12878192  0.05032775 -0.13964197
  -0.51631148 -0.43937044 -0.62849605 -0.50948408]
 [-0.97953968 -0.77220159 -0.17874128  0.19632421  0.41615668 -0.15236125
   0.37230746 -0.15137155 -0.41931076 -0.37419715 -0.42962087 -0.33396801
  -0.32935302 -0.42930518 -0.46649635  0.2578447   0.31415019  1.83380438
   0.94121474  0.76057089 -0.93717842  1.13851269]
 [ 0.68311164 -0.0343547   1.09197003 -0.74632558 -0.94428847 -0.66662542
  -0.7150667  -0.66562193 -0.8365014  -0.80384926 -0.84763619 -0.77693819
  -0.72330761 -0.84763419 -0.56205

In [82]:
#Tranning Model 
model=svm.SVC(kernel='linear')

#training the svm model with training data
model.fit(X_train, Y_train)

SVC(kernel='linear')

In [83]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

print('Accuracy score of training data : ', training_data_accuracy)

Accuracy score of training data :  0.8846153846153846


In [84]:
# accuracy score on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

print('Accuracy score of test data : ', test_data_accuracy)

Accuracy score of test data :  0.8717948717948718


In [85]:
#input_data = (244.99000,272.21000,239.17000,0.00451,0.00002,0.00279,0.00237,0.00837,0.01897,0.18100,0.01084,0.01121,0.01255,0.03253,0.01049,21.52800,0.522812,0.646818,-7.304500,0.171088,2.095237,0.096220)

input_data = (119.992,157.302,74.997,0.00784,0.00007,0.0037,0.00554,0.01109,0.04374,0.426,0.02182,0.0313,0.02971,0.06545,0.02211,21.033,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)


#print(input_data_reshaped)


# standardize the data
standard_data = scaler.transform(input_data_reshaped)

prediction = model.predict(standard_data)

#print(prediction)


if (prediction == 0):
    print("The Person is healthy")
else:
    print("The Person has Parkinsons")

The Person has Parkinsons


In [86]:
print("Hello") 

Hello
