In [None]:

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE



In [None]:
data = pd.read_csv('/content/heart_failure_clinical_records_dataset (2).csv')
                    


In [None]:
data

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.00,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.00,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.00,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.00,2.7,116,0,0,8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,0,61,1,38,1,155000.00,1.1,143,1,1,270,0
295,55.0,0,1820,0,38,0,270000.00,1.2,139,0,0,271,0
296,45.0,0,2060,1,60,0,742000.00,0.8,138,0,0,278,0
297,45.0,0,2413,0,38,0,140000.00,1.4,140,1,1,280,0


In [None]:
data.shape

(299, 13)

In [None]:
# getting some info about the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB


In [None]:
# checking for missing values
data.isnull().sum()

age                         0
anaemia                     0
creatinine_phosphokinase    0
diabetes                    0
ejection_fraction           0
high_blood_pressure         0
platelets                   0
serum_creatinine            0
serum_sodium                0
sex                         0
smoking                     0
time                        0
DEATH_EVENT                 0
dtype: int64

In [None]:
# checking the distribution of Target Variable
data['DEATH_EVENT'].value_counts()

0    203
1     96
Name: DEATH_EVENT, dtype: int64

In [None]:
#Splitting the Features and Target
X = data.drop(columns='DEATH_EVENT', axis=1)
Y = data['DEATH_EVENT']

In [None]:
print(X)

      age  anaemia  creatinine_phosphokinase  ...  sex  smoking  time
0    75.0        0                       582  ...    1        0     4
1    55.0        0                      7861  ...    1        0     6
2    65.0        0                       146  ...    1        1     7
3    50.0        1                       111  ...    1        0     7
4    65.0        1                       160  ...    0        0     8
..    ...      ...                       ...  ...  ...      ...   ...
294  62.0        0                        61  ...    1        1   270
295  55.0        0                      1820  ...    0        0   271
296  45.0        0                      2060  ...    0        0   278
297  45.0        0                      2413  ...    1        1   280
298  50.0        0                       196  ...    1        1   285

[299 rows x 12 columns]


In [None]:
print(Y)

0      1
1      1
2      1
3      1
4      1
      ..
294    0
295    0
296    0
297    0
298    0
Name: DEATH_EVENT, Length: 299, dtype: int64


In [None]:
#Splitting the Data into Training data & Test Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, stratify=Y)

In [None]:
#Sampling the minority class
sm = SMOTE()
X_train, Y_train = sm.fit_resample(X_train, Y_train)


In [None]:
# Training the data with SVM Model and implementing on training data
model = SVC()
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, Y_train)



SVC(kernel='linear')

In [None]:
# Metrics with the training data
X_train_prediction = svclassifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
training_data_precision = precision_score(X_train_prediction, Y_train)
training_data_recall = recall_score(X_train_prediction, Y_train)
training_data_f1 = f1_score(X_train_prediction, Y_train)

In [None]:
#Display the training data metrics
print('Accuracy with the  Training data : ', training_data_accuracy)
print('Precision with the Training data : ', training_data_precision)
print('Recall with the Training data : ', training_data_recall)
print('F1 with the Training data : ', training_data_f1)


Accuracy with the  Training data :  0.7535211267605634
Precision with the Training data :  0.8450704225352113
Recall with the Training data :  0.7142857142857143
F1 with the Training data :  0.7741935483870968


In [None]:
# Metrics with the test data
X_test_prediction = svclassifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
test_data_precision = precision_score(X_test_prediction, Y_test)
test_data_recall = recall_score(X_test_prediction, Y_test)
test_data_f1 = f1_score(X_test_prediction, Y_test)

In [None]:
#Display the test data metrics
print('Accuracy with the  Testdata : ', test_data_accuracy)
print('Precision with the Training data : ', test_data_precision)
print('Recall with the Training data : ', test_data_recall)
print('F1 with the Training data : ', test_data_f1)

Accuracy with the  Testdata :  0.6777777777777778
Precision with the Training data :  0.7931034482758621
Recall with the Training data :  0.5
F1 with the Training data :  0.6133333333333334
