In [None]:
# Importing the Dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm, tree, ensemble, neighbors, naive_bayes
from sklearn.metrics import accuracy_score

# Data Collection & Analysis

# Load the data from a CSV file to a Pandas DataFrame
parkinsons_data = pd.read_csv('/content/parkinsons.csv')

# Print the first 5 rows of the dataframe
parkinsons_data.head()

# number of rows and columns in the dataframe
parkinsons_data.shape

# Get more information about the dataset
parkinsons_data.info()

# Check for missing values in each column
parkinsons_data.isnull().sum()

# Get some statistical measures about the data
parkinsons_data.describe()

# Distribution of the target variable
parkinsons_data['status'].value_counts()

# Group the data based on the target variable
parkinsons_data.groupby('status').mean()

# Data Pre-Processing

# Separating the features & Target
X = parkinsons_data.drop(columns=['name', 'status'], axis=1)
Y = parkinsons_data['status']

# Splitting the data into training data & test data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# Data Standardization
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Model Training

# Support Vector Machine Model
model_svm = svm.SVC(kernel='linear')
model_svm.fit(X_train, Y_train)

# Decision Tree Classifier
model_tree = tree.DecisionTreeClassifier()
model_tree.fit(X_train, Y_train)

# Random Forest Classifier
model_rf = ensemble.RandomForestClassifier(n_estimators=100, random_state=2)
model_rf.fit(X_train, Y_train)

# K-Nearest Neighbors (K-NN) Classifier
model_knn = neighbors.KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, Y_train)

# Naive Bayes Classifier
model_nb = naive_bayes.GaussianNB()
model_nb.fit(X_train, Y_train)

# Model Evaluation

# Accuracy Score

# Support Vector Machine Model
svm_train_predictions = model_svm.predict(X_train)
svm_test_predictions = model_svm.predict(X_test)
svm_train_accuracy = accuracy_score(svm_train_predictions, Y_train)
svm_test_accuracy = accuracy_score(svm_test_predictions, Y_test)

# Decision Tree Classifier
tree_train_predictions = model_tree.predict(X_train)
tree_test_predictions = model_tree.predict(X_test)
tree_train_accuracy = accuracy_score(tree_train_predictions, Y_train)
tree_test_accuracy = accuracy_score(tree_test_predictions, Y_test)

# Random Forest Classifier
rf_train_predictions = model_rf.predict(X_train)
rf_test_predictions = model_rf.predict(X_test)
rf_train_accuracy = accuracy_score(rf_train_predictions, Y_train)
rf_test_accuracy = accuracy_score(rf_test_predictions, Y_test)

# K-Nearest Neighbors (K-NN) Classifier
knn_train_predictions = model_knn.predict(X_train)
knn_test_predictions = model_knn.predict(X_test)
knn_train_accuracy = accuracy_score(knn_train_predictions, Y_train)
knn_test_accuracy = accuracy_score(knn_test_predictions, Y_test)

# Naive Bayes Classifier
nb_train_predictions = model_nb.predict(X_train)
nb_test_predictions = model_nb.predict(X_test)
nb_train_accuracy = accuracy_score(nb_train_predictions, Y_train)
nb_test_accuracy = accuracy_score(nb_test_predictions, Y_test)

# Display Model Performance

print("Support Vector Machine Accuracy (Training):", svm_train_accuracy)
print("Support Vector Machine Accuracy (Testing):", svm_test_accuracy)

print("Decision Tree Accuracy (Training):", tree_train_accuracy)
print("Decision Tree Accuracy (Testing):", tree_test_accuracy)

print("Random Forest Accuracy (Training):", rf_train_accuracy)
print("Random Forest Accuracy (Testing):", rf_test_accuracy)

print("K-Nearest Neighbors Accuracy (Training):", knn_train_accuracy)
print("K-Nearest Neighbors Accuracy (Testing):", knn_test_accuracy)

print("Naive Bayes Accuracy (Training):", nb_train_accuracy)
print("Naive Bayes Accuracy (Testing):", nb_test_accuracy)
