In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle

In [2]:
parkinsons_data = pd.read_csv('parkinsons.csv')

In [3]:
X = parkinsons_data.drop(columns=['name', 'status'], axis=1)
Y = parkinsons_data['status']

In [4]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [6]:
model = svm.SVC(kernel='linear', probability=True)  # Enable probability predictions
model.fit(X_train, Y_train)

In [7]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy score of test data:', test_data_accuracy)

Accuracy score of test data: 0.8461538461538461


In [9]:
def classify_disease_level(probability):
    if probability < 0.4:
        return "Low Risk"
    elif 0.4 <= probability < 0.7:
        return "Moderate Risk"
    else:
        return "High Risk"

In [10]:
test_probabilities = model.predict_proba(X_test)[:, 1]  # Probabilities for class '1'
test_disease_levels = [classify_disease_level(prob) for prob in test_probabilities]

In [15]:
results_df = pd.DataFrame({
    "Predicted Probability": test_probabilities,
    "Disease Level": test_disease_levels
})

In [16]:
print("\nDisease Levels for Test Set:")
print(results_df)


Disease Levels for Test Set:
    Predicted Probability  Disease Level
0                0.952388      High Risk
1                0.942123      High Risk
2                0.991985      High Risk
3                0.814209      High Risk
4                0.260759       Low Risk
5                0.531904  Moderate Risk
6                0.365097       Low Risk
7                0.982119      High Risk
8                0.955650      High Risk
9                0.993466      High Risk
10               0.342878       Low Risk
11               0.345810       Low Risk
12               0.590622  Moderate Risk
13               0.996403      High Risk
14               0.438302  Moderate Risk
15               0.690493  Moderate Risk
16               0.583451  Moderate Risk
17               0.865026      High Risk
18               0.824724      High Risk
19               0.946360      High Risk
20               0.609693  Moderate Risk
21               0.613617  Moderate Risk
22               0.760178  

In [18]:
filename = 'parkinson_model.sav'
pickle.dump(model, open(filename, 'wb'))