Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Pre-processing

In [None]:
#loading the dataset into a pandas dataframe
parkinson_df = pd.read_csv('data/parkinsons.csv')

#printing the first 5 rows
parkinson_df.head()

In [None]:
#print number of rows and columns 
print(parkinson_df.shape)

#check for missing values
parkinson_df.isnull().sum()

In [None]:
#getting statistical measure of the data
parkinson_df.describe()

In [None]:
#Distribution of target variable
parkinson_df['status'].value_counts()

###### 0 --> Healthy  
###### 1 --> Positive for Pakinson's

In [None]:
#Separating the features and Target variables
x = parkinson_df.drop(columns=['name', 'status'], axis=1)
y = parkinson_df['status']

Splitting data into training and test data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=2)

In [None]:
print(x.shape, x_train.shape, x_test.shape)

Data Standardization

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(x_train)

In [None]:
x_train =scaler.transform(x_train)

x_test = scaler.transform(x_test)

Model training and Evaluation

###### Support Vector Machine Model

In [None]:
model = svm.SVC(kernel='linear')

In [None]:
#training the model
model.fit(x_train,y_train)

###### Model Evaluation

In [None]:
#accuracy score on training data
x_train_prediction = model.predict(x_train)
train_data_accuracy = accuracy_score(y_train, x_train_prediction)
print('the training data accuracy is : ', train_data_accuracy) 

In [None]:
#accuracy scrore on test data
x_test_prediction = model.predict(x_test)
test_data_accuracy= accuracy_score(y_test, x_test_prediction)
print('The test data accuracy is : ', test_data_accuracy)

Building a predictive system

In [None]:
input_data = (120.267,137.244,114.82,0.00333,0.00003,0.00155,0.00202,0.00466,0.01608,0.14,0.00779,0.00937,0.01351,0.02337,0.00607,24.886,0.59604,0.764112,-5.634322,0.257682,1.854785,0.211756)

#changing to numpy array 
input_as_np = np.asarray(input_data)

#reshaping 
input_reshaped = input_as_np.reshape(1,-1)

#standardizing the data
std_data = scaler.transform(input_reshaped)

prediction = model.predict(std_data)
print(prediction)

if prediction == 0:
    print('The patient is healthy')
else:
    print("The patient has Parkinson's disease")