In [9]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import pickle

In [10]:
# Load the diabetes dataset
parkinson_Dataset = pd.read_csv('/content/parkinsons_disease.csv')

In [11]:
# Display the first 5 rows of the dataset
print(parkinson_Dataset.head())

   patient_id  MDVP_Fo_Hz  MDVP_Fhi_Hz  MDVP_Flo_Hz  MDVP_Jitter_percent  \
0           1     119.992      157.302       74.997              0.00784   
1           2     122.400      148.650      113.819              0.00968   
2           3     116.682      131.111      111.555              0.01050   
3           4     116.676      137.871      111.366              0.00997   
4           5     116.014      141.781      110.655              0.01284   

   MDVP_Jitter_Abs  MDVP_RAP   MDVP_PPQ  Jitter_DDP  MDVP_Shimmer  ...  \
0          0.00007   0.00370    0.00554     0.01109       0.04374  ...   
1          0.00008   0.00465    0.00696     0.01394       0.06134  ...   
2          0.00009   0.00544    0.00781     0.01633       0.05233  ...   
3          0.00009   0.00502    0.00698     0.01505       0.05492  ...   
4          0.00011   0.00655    0.00908     0.01966       0.06425  ...   

   Shimmer_DDA      NHR     HNR  status      RPDE       DFA   spread1  \
0      0.06545  0.02211  

In [12]:
# Get the number of rows and columns in the dataset
print("Shape of the dataset:", parkinson_Dataset.shape)

Shape of the dataset: (195, 24)


In [13]:
# Display statistical measures of the data
print("Statistical measures of the data:")
print(parkinson_Dataset.describe())

Statistical measures of the data:
       patient_id  MDVP_Fo_Hz  MDVP_Fhi_Hz  MDVP_Flo_Hz  MDVP_Jitter_percent  \
count  195.000000  195.000000   195.000000   195.000000           195.000000   
mean    98.000000  154.228641   197.104918   116.324631             0.006220   
std     56.435804   41.390065    91.491548    43.521413             0.004848   
min      1.000000   88.333000   102.145000    65.476000             0.001680   
25%     49.500000  117.572000   134.862500    84.291000             0.003460   
50%     98.000000  148.790000   175.829000   104.315000             0.004940   
75%    146.500000  182.769000   224.205500   140.018500             0.007365   
max    195.000000  260.105000   592.030000   239.170000             0.033160   

       MDVP_Jitter_Abs    MDVP_RAP    MDVP_PPQ  Jitter_DDP  MDVP_Shimmer  ...  \
count       195.000000  195.000000  195.000000  195.000000    195.000000  ...   
mean          0.000044    0.003306    0.003446    0.009920      0.029709  ...   
st

In [14]:
# Display the count of outcomes
print("status counts:")
print(parkinson_Dataset['status'].value_counts())

status counts:
status
1    147
0     48
Name: count, dtype: int64


In [25]:
# Separate data and labels
X = parkinson_Dataset.drop(columns=['status','patient_id','spread1'], axis=1)
Y = parkinson_Dataset['status']

In [16]:
# Display input features and labels
print("Input features:")
print(X.head())
print("\nLabels:")
print(Y.head())

Input features:
   MDVP_Fo_Hz  MDVP_Fhi_Hz  MDVP_Flo_Hz  MDVP_Jitter_percent  MDVP_Jitter_Abs  \
0     119.992      157.302       74.997              0.00784          0.00007   
1     122.400      148.650      113.819              0.00968          0.00008   
2     116.682      131.111      111.555              0.01050          0.00009   
3     116.676      137.871      111.366              0.00997          0.00009   
4     116.014      141.781      110.655              0.01284          0.00011   

   MDVP_RAP   MDVP_PPQ  Jitter_DDP  MDVP_Shimmer   MDVP_Shimmer_Db  ...  \
0   0.00370    0.00554     0.01109       0.04374             0.426  ...   
1   0.00465    0.00696     0.01394       0.06134             0.626  ...   
2   0.00544    0.00781     0.01633       0.05233             0.482  ...   
3   0.00502    0.00698     0.01505       0.05492             0.517  ...   
4   0.00655    0.00908     0.01966       0.06425             0.584  ...   

   Shimmer_APQ5  MDVP_APQ  Shimmer_DDA      NH

In [17]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [18]:
# Initialize the linear regression model
regressor = LinearRegression()

In [19]:
# Train the linear regression model
regressor.fit(X_train, Y_train)


In [20]:
# Evaluate the model on the training data
training_data_accuracy = regressor.score(X_train, Y_train)
print('Accuracy score on the training data:', training_data_accuracy)

Accuracy score on the training data: 0.529859663935503


In [21]:
# Evaluate the model on the test data
test_data_accuracy = regressor.score(X_test, Y_test)
print('Accuracy score on the test data:', test_data_accuracy)

Accuracy score on the test data: 0.12101059322683128


In [22]:
# Save the trained linear regression model in pickle format
filename = 'trained_model_linear_regression.pkl'
with open(filename, 'wb') as file:
    pickle.dump(regressor, file)


In [23]:
# Load the saved model
with open('trained_model_linear_regression.pkl', 'rb') as file:
    loaded_model = pickle.load(file)


In [24]:
import numpy as np

# Define input data
input_data = np.array([119.992, 157.302, 74.997, 0.00784, 0.00007, 0.00370, 0.00554, 0.01109, 0.04374, 0.06545, 0.02211, 21.033, 1, 0.414783, 0.815285, 0.266482, 2.301442, 0.284654])

# Reshape input data
input_data_reshaped = input_data.reshape(1, -1)

# Assuming 'prediction' is the result of some model.predict() call
# Let's simulate it with a random prediction for demonstration
prediction = np.random.randint(0, 2)  # Randomly predict 0 or 1

# Check prediction and print result
if prediction == 0:
    print("The person does not have Parkinson's Disease.")
else:
    print("The person has Parkinson's Disease.")





The person does not have Parkinson's Disease.
