In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [None]:
# loading the data from csv file to a Pandas DataFrame
parkinsons_data = pd.read_csv('/content/parkinsons.csv')

In [None]:
# printing the first 5 rows of the dataframe
parkinsons_data.head()


In [None]:
# number of rows and columns in the dataframe
parkinsons_data.shape

In [None]:
# getting more information about the dataset
parkinsons_data.info()

In [None]:
# checking for missing values in each column
parkinsons_data.isnull().sum()


In [None]:
# getting some statistical measures about the data
parkinsons_data.describe()

In [None]:
# distribution of target Variable
parkinsons_data['status'].value_counts()

In [None]:
# grouping the data bas3ed on the target variable
parkinsons_data.groupby('status').mean()


In [None]:
X = parkinsons_data.drop(columns=['name','status'], axis=1)
Y = parkinsons_data['status']

In [None]:
print(X)


In [None]:
print(Y)


In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)


In [None]:
print(X.shape, X_train.shape, X_test.shape)


In [None]:
model = svm.SVC(kernel='linear')


In [None]:
# training the SVM model with training data
model.fit(X_train, Y_train)

In [None]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [None]:
print('Accuracy score of training data : ', training_data_accuracy)


In [None]:
# accuracy score on training data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [None]:
print('Accuracy score of test data : ', test_data_accuracy)


In [None]:
input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load dataset (replace with actual dataset path or URL)
url = "/content/Project 14 Parkinsons Disease Data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
df = pd.read_csv(url, names=columns, skiprows=1)  # Skip first row if it contains headers

# Ensure all columns are numeric
X = df.drop(columns=['Outcome']).apply(pd.to_numeric, errors='coerce')  # Convert to numeric
y = df['Outcome'].apply(pd.to_numeric, errors='coerce').astype(int)  # Convert to integer

# Check class distribution
print(y.value_counts())

# Train-test split (80-20) without stratify to avoid ValueError
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
models = {
    "K-Nearest Neighbour": KNeighborsClassifier(n_neighbors=5),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    auc_roc = roc_auc_score(y_test, y_pred)
    results[name] = accuracy * 100
    print(f"{name} Accuracy: {accuracy * 100:.2f}%")
    print(f"{name} AUC-ROC: {auc_roc:.2f}")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

# Check for Overfitting using Cross-Validation
for name, model in models.items():
    cv_scores = cross_val_score(model, X, y, cv=5)
    print(f"{name} Cross-validation Accuracy: {np.mean(cv_scores) * 100:.2f}%")

# Plot Model Performance
plt.figure(figsize=(8,5))
sns.barplot(x=list(results.keys()), y=list(results.values()))
plt.ylabel("Accuracy (%)")
plt.title("Model Comparison")
plt.show()


In [None]:
import pickle


In [None]:
filename = 'parkinsons_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# loading the saved model
loaded_model = pickle.load(open('parkinsons_model.sav', 'rb'))

In [None]:
for column in X.columns:
  print(column)
