**Importing the Libraries:**

In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

**Uploading the Dataset:**

In [2]:
file_path = "/content/parkinsons_updrs.data"
df = pd.read_csv(file_path)

**Dataset View:**

In [3]:
df


Unnamed: 0,subject#,age,sex,test_time,motor_UPDRS,total_UPDRS,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,...,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE
0,1,72,0,5.6431,28.199,34.398,0.00662,0.000034,0.00401,0.00317,...,0.230,0.01438,0.01309,0.01662,0.04314,0.014290,21.640,0.41888,0.54842,0.16006
1,1,72,0,12.6660,28.447,34.894,0.00300,0.000017,0.00132,0.00150,...,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.10810
2,1,72,0,19.6810,28.695,35.389,0.00481,0.000025,0.00205,0.00208,...,0.181,0.00734,0.00844,0.01458,0.02202,0.020220,23.047,0.46222,0.54405,0.21014
3,1,72,0,25.6470,28.905,35.810,0.00528,0.000027,0.00191,0.00264,...,0.327,0.01106,0.01265,0.01963,0.03317,0.027837,24.445,0.48730,0.57794,0.33277
4,1,72,0,33.6420,29.187,36.375,0.00335,0.000020,0.00093,0.00130,...,0.176,0.00679,0.00929,0.01819,0.02036,0.011625,26.126,0.47188,0.56122,0.19361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5870,42,61,0,142.7900,22.485,33.485,0.00406,0.000031,0.00167,0.00168,...,0.160,0.00973,0.01133,0.01549,0.02920,0.025137,22.369,0.64215,0.55314,0.21367
5871,42,61,0,149.8400,21.988,32.988,0.00297,0.000025,0.00119,0.00147,...,0.215,0.01052,0.01277,0.01904,0.03157,0.011927,22.886,0.52598,0.56518,0.12621
5872,42,61,0,156.8200,21.495,32.495,0.00349,0.000025,0.00152,0.00187,...,0.244,0.01371,0.01456,0.01877,0.04112,0.017701,25.065,0.47792,0.57888,0.14157
5873,42,61,0,163.7300,21.007,32.007,0.00281,0.000020,0.00128,0.00151,...,0.131,0.00693,0.00870,0.01307,0.02078,0.007984,24.422,0.56865,0.56327,0.14204


**Dropping the null values:**

In [4]:
print(df.isnull().sum())

df = df.dropna()

subject#         0
age              0
sex              0
test_time        0
motor_UPDRS      0
total_UPDRS      0
Jitter(%)        0
Jitter(Abs)      0
Jitter:RAP       0
Jitter:PPQ5      0
Jitter:DDP       0
Shimmer          0
Shimmer(dB)      0
Shimmer:APQ3     0
Shimmer:APQ5     0
Shimmer:APQ11    0
Shimmer:DDA      0
NHR              0
HNR              0
RPDE             0
DFA              0
PPE              0
dtype: int64


**Removing the Column subject#** since it contains only ID which are not required for the dataset

In [5]:
df = df.drop(columns=["subject#"])

**Encoding Male and Female into 0's and 1's** also counting them

In [6]:
print(df["sex"].value_counts())

sex
0    4008
1    1867
Name: count, dtype: int64


**Getting Feature Shape and Target Shape**

In [7]:
X = df.drop(columns=["total_UPDRS", "motor_UPDRS"])


y = df["total_UPDRS"]


print("Features shape:", X.shape)
print("Target shape:", y.shape)

Features shape: (5875, 19)
Target shape: (5875,)


**Scaling the dataset using Standard Scaler**

In [40]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X = pd.DataFrame(X_scaled, columns=X.columns)


**Splitting the dataset into Train and Test**

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training set:", X_train.shape, y_train.shape)
print("Testing set:", X_test.shape, y_test.shape)


Training set: (4700, 19) (4700,)
Testing set: (1175, 19) (1175,)


**Using Random Forest Regressor**

In [18]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train.values.ravel())
y_pred_rf = rf_model.predict(X_test)

In [30]:
regressor = RandomForestRegressor(n_estimators=100, random_state=42) # Initialize the regressor
regressor.fit(X_train, y_train) # Train the regressor
y_pred = regressor.predict(X_test) # Make predictions

**Calculating Mean Absolute Error, Root Mean Square Error and Coefficient of Determination**

In [38]:
MAE = mean_absolute_error(y_test, y_pred)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
r2_rf = r2_score(y_test, y_pred)

MAPE = np.mean(np.abs((np.array(y_test) - np.array(y_pred)) / np.array(y_test))) * 100
accuracy = 100 - MAPE


print(f"Random Forest Regressor:")
print(f"MAE: {MAE:.4f}")
print(f"RMSE: {RMSE:.4f}")
print(f"R² Score: {r2_rf:.4f}")
print(f"Accuracy: {accuracy:.2f}%")


Random Forest Regressor:
MAE: 0.7165
RMSE: 1.6046
R² Score: 0.9768
Accuracy: 96.53%
