Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection & Analysis

In [2]:
# loading the data from csv file to a Pandas DataFrame
alzheimer_data = pd.read_csv('C:/Users/chand/Downloads/alzheimer.csv')

In [3]:
# printing the first 5 rows of the dataframe
alzheimer_data.head()

Unnamed: 0,name,Age,HighBP,HighChol,Diabetes,Smoker,HvyAlcoholConsump,MMSE,CDR,eTIV,...,GH,MH,MDVP,NHR,HNR,RPDE,DFA,D2,PPE,status
0,phon_R01_S01_1,87,1,1,0,1,0,27,0.0,1987,...,5,18,119.992,0.02211,21.033,0.414783,0.815285,2.301442,0.284654,1
1,phon_R01_S01_2,88,0,2,0,1,0,30,0.0,2004,...,3,0,122.4,0.01929,19.085,0.458359,0.819521,2.486855,0.368674,1
2,phon_R01_S01_3,75,1,3,0,0,0,23,0.5,1678,...,5,30,116.682,0.01309,20.651,0.429895,0.825288,2.342259,0.332634,1
3,phon_R01_S01_4,76,1,4,0,0,0,28,0.5,1738,...,2,0,116.676,0.01353,20.644,0.434969,0.819235,2.405554,0.368975,1
4,phon_R01_S01_5,80,1,5,0,0,0,22,0.5,1698,...,2,3,116.014,0.01767,19.649,0.417356,0.823484,2.33218,0.410335,1


In [4]:
# number of rows and columns in the dataframe
alzheimer_data.shape

(195, 24)

In [5]:
# getting more information about the dataset
alzheimer_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   name               195 non-null    object 
 1   Age                195 non-null    int64  
 2   HighBP             195 non-null    int64  
 3   HighChol           195 non-null    int64  
 4   Diabetes           195 non-null    int64  
 5   Smoker             195 non-null    int64  
 6   HvyAlcoholConsump  195 non-null    int64  
 7   MMSE               195 non-null    int64  
 8   CDR                195 non-null    float64
 9   eTIV               195 non-null    int64  
 10  nWBV               195 non-null    float64
 11  ASF                195 non-null    float64
 12  MRD                195 non-null    int64  
 13  EDC                195 non-null    int64  
 14  GH                 195 non-null    int64  
 15  MH                 195 non-null    int64  
 16  MDVP               195 non

In [6]:
# checking for missing values in each column
alzheimer_data.isnull().sum()

name                 0
Age                  0
HighBP               0
HighChol             0
Diabetes             0
Smoker               0
HvyAlcoholConsump    0
MMSE                 0
CDR                  0
eTIV                 0
nWBV                 0
ASF                  0
MRD                  0
EDC                  0
GH                   0
MH                   0
MDVP                 0
NHR                  0
HNR                  0
RPDE                 0
DFA                  0
D2                   0
PPE                  0
status               0
dtype: int64

In [7]:
# getting some statistical measures about the data
alzheimer_data.describe()

Unnamed: 0,Age,HighBP,HighChol,Diabetes,Smoker,HvyAlcoholConsump,MMSE,CDR,eTIV,nWBV,...,GH,MH,MDVP,NHR,HNR,RPDE,DFA,D2,PPE,status
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,...,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,77.923077,0.589744,98.0,0.466667,0.584615,0.046154,26.974359,0.287179,1485.020513,0.726472,...,2.861538,4.225641,154.228641,0.024847,21.885974,0.498536,0.718099,2.381826,0.206552,0.753846
std,8.2624,0.493146,56.435804,0.838916,0.494057,0.210358,4.032645,0.372495,168.700215,0.037661,...,1.105892,8.507144,41.390065,0.040418,4.425764,0.103942,0.055336,0.382799,0.090119,0.431878
min,60.0,0.0,1.0,0.0,0.0,0.0,4.0,0.0,1143.0,0.646,...,1.0,0.0,88.333,0.00065,8.441,0.25657,0.574282,1.423287,0.044539,0.0
25%,71.0,0.0,49.5,0.0,0.0,0.0,26.0,0.0,1363.5,0.696,...,2.0,0.0,117.572,0.005925,19.198,0.421306,0.674758,2.099125,0.137451,1.0
50%,78.0,1.0,98.0,0.0,1.0,0.0,29.0,0.0,1463.0,0.723,...,3.0,0.0,148.79,0.01166,22.085,0.495954,0.722254,2.361532,0.194052,1.0
75%,84.0,1.0,146.5,0.0,1.0,0.0,30.0,0.5,1565.5,0.7515,...,4.0,3.0,182.769,0.02564,25.0755,0.587562,0.761881,2.636456,0.25298,1.0
max,98.0,1.0,195.0,2.0,1.0,1.0,30.0,2.0,2004.0,0.837,...,5.0,30.0,260.105,0.31482,33.047,0.685151,0.825288,3.671155,0.527367,1.0


In [8]:
# distribution of target Variable
alzheimer_data['status'].value_counts()

1    147
0     48
Name: status, dtype: int64

1 --> Alzheimer's Positive

0 --> Healthy

In [9]:
# grouping the data bas3ed on the target variable
alzheimer_data.groupby('status').mean()

  alzheimer_data.groupby('status').mean()


Unnamed: 0_level_0,Age,HighBP,HighChol,Diabetes,Smoker,HvyAlcoholConsump,MMSE,CDR,eTIV,nWBV,...,EDC,GH,MH,MDVP,NHR,HNR,RPDE,DFA,D2,PPE
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,78.708333,0.708333,114.5,0.458333,0.645833,0.020833,26.625,0.333333,1496.979167,0.717833,...,14.104167,3.0,4.729167,181.937771,0.011483,24.67875,0.442552,0.695716,2.154491,0.123017
1,77.666667,0.55102,92.612245,0.469388,0.564626,0.054422,27.088435,0.272109,1481.115646,0.729293,...,14.380952,2.816327,4.061224,145.180762,0.029211,20.974048,0.516816,0.725408,2.456058,0.233828


Data Pre-Processing

Separating the features & Target

In [10]:
X = alzheimer_data.drop(columns=['name','status'], axis=1)
Y = alzheimer_data['status']

In [11]:
print(X)

     Age  HighBP  HighChol  Diabetes  Smoker  HvyAlcoholConsump  MMSE  CDR  \
0     87       1         1         0       1                  0    27  0.0   
1     88       0         2         0       1                  0    30  0.0   
2     75       1         3         0       0                  0    23  0.5   
3     76       1         4         0       0                  0    28  0.5   
4     80       1         5         0       0                  0    22  0.5   
..   ...     ...       ...       ...     ...                ...   ...  ...   
190   75       1       191         0       1                  0    29  0.0   
191   76       0       192         0       1                  0    30  0.0   
192   75       1       193         0       1                  0    28  0.0   
193   78       1       194         1       0                  0    29  0.0   
194   83       1       195         2       0                  0    28  0.0   

     eTIV   nWBV  ...  EDC  GH  MH     MDVP      NHR     HNR   

In [12]:
print(Y)

0      1
1      1
2      1
3      1
4      1
      ..
190    0
191    0
192    0
193    0
194    0
Name: status, Length: 195, dtype: int64


Splitting the data to training data & Test data

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [14]:
print(X.shape, X_train.shape, X_test.shape)

(195, 22) (156, 22) (39, 22)


Data Standardization

In [15]:
scaler = StandardScaler()

In [16]:
scaler.fit(X_train)

In [17]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)

In [18]:
print(X_train)

[[ 0.04858391  0.82305489  0.43774931 ...  0.24786644 -0.55160318
   0.07769494]
 [-0.07365948 -1.21498579  1.11207251 ... -0.46857462 -0.61014073
   0.39291782]
 [ 0.90428764 -1.21498579 -0.09077428 ... -0.13964197 -0.62849605
  -0.50948408]
 ...
 [ 0.53755747 -1.21498579 -0.49172321 ...  0.80962561 -0.47404629
  -0.2159482 ]
 [-1.17384998  0.82305489 -1.53054543 ... -1.1475227  -0.47272835
   0.28181221]
 [ 1.3932612  -1.21498579  1.25787212 ...  0.1041932   1.23632066
  -0.05829386]]


Model Training

Support Vector Machine Model

In [19]:
model = svm.SVC(kernel='linear')

In [20]:
# training the SVM model with training data
model.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [21]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [22]:
print('Accuracy score of training data : ', training_data_accuracy)

Accuracy score of training data :  0.9038461538461539


In [23]:
# accuracy score on training data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [24]:
print('Accuracy score of test data : ', test_data_accuracy)

Accuracy score of test data :  0.8461538461538461


Building a Predictive System

In [25]:
input_data = (81,1,31,2,1,0,27,0.5,1814,0.759,0.968,617,12,4,0,197.076,0.00339,26.775,0.422229,0.741367,1.743867,0.085569)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the data
std_data = scaler.transform(input_data_reshaped)

prediction = model.predict(std_data)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


[0]
The Person does not have Parkinsons Disease


