# Random Forest

### Importing packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, KFold

### Reading dataset

In [2]:
df = pd.read_csv("Dataset/heart_disease_train_Data.csv")

In [3]:
df.head()

Unnamed: 0,Name,Gender,Age,Chest pain,Shortness of breath,Fatigue,Systolic,Diastolic,Heart rate (bpm),Lung sounds,...,Cardiac CT,Obesity,Murmur,Chest x-ray,Previous illnesses,Pulmonary function tests,Spirometry,Diagnosis,Medications,Treatment
0,Jane Doe,Female,55,1,1,1,140,90,100,1,...,Shows a 50% blockage in the left anterior desc...,0,1,,,,,Coronary artery disease (CAD),"Aspirin, metoprolol, atorvastatin",
1,Mark Johnson,Male,57,1,1,1,150,80,110,1,...,Shows a 60% blockage in the right coronary artery,0,1,,,,,Coronary artery disease (CAD),"Aspirin, ramipril, atorvastatin",
2,Emily Davis,Female,60,1,1,1,130,85,95,1,...,Shows a 75% blockage in the left anterior desc...,0,1,,,,,Coronary artery disease (CAD),"Aspirin, lisinopril, rosuvastatin",
3,William Thompson,Male,62,1,1,1,145,90,100,1,...,Shows a 80% blockage in the left circumflex co...,0,1,,,,,Coronary artery disease (CAD),"Aspirin, metoprolol, atorvastatin",
4,Ashley Johnson,Female,58,1,1,1,135,80,105,1,...,Shows a 90% blockage in the right coronary artery,0,1,,,,,Coronary artery disease (CAD),"Aspirin, simvastatin, lisinopril",


### Label Encoding

In [4]:
df.replace({'Gender':{"Male":0,"Female":1}},inplace=True)
df.replace({'Echocardiogram':{"None":0,"Shows increased pressure in the right ventricle":1,"Shows increased pressure in the right ventricle and decreased function of the right ventricle":2,"Shows vegetation on the aortic valve":3,"Shows vegetation on the aortic valve and pulmonic valve":4,"Shows vegetation on the mitral valve":5,"Shows vegetation on the mitral valve and aortic valve":6,"Shows vegetation on the mitral valve and pulmonary valve":7,"Shows vegetation on the mitral valve and tricuspid valve":8,"Shows vegetation on the mitral valve, aortic valve and tricuspid valve":9,"Shows vegetation on the mitral valve, aortic valve, and pulmonary valve":10,"Shows vegetation on the mitral valve, aortic valve, tricuspid valve, and pulmonary valve":11,"Shows vegetation on the mitral valve, tricuspid valve, and pulmonary valve":12,"Shows vegetation on the pulmonary valve":13,"Shows vegetation on the tricuspid valve":14,"Shows vegetation on the tricuspid valve and pulmonary valve":15,"Shows increased pressure in the right ventricle and decreased function of the left ventricle":16,"Shows thickening of the pericardium":17}},inplace=True)
df.replace({'Blood culture':{"None":0,"Positive for Candida albicans":1,"Positive for Candida dubliniensis":2,"Positive for Candida glabrata":3,"Positive for Candida guilliermondii":4,"Positive for Candida krusei":5,"Positive for Candida lusitaniae":6,"Positive for Candida parapsilosis":7,"Positive for Candida rugosa":8,"Positive for Candida tropicalis":9,"Positive for Enterococcus faecalis":10,"Positive for Staphylococcus aureus":11,"Positive for Staphylococcus epidermidis":12,"Positive for Streptococcus anginosus":13,"Positive for Streptococcus bovis":14,"Positive for Streptococcus mitis":15,"Positive for Streptococcus oralis":16,"Positive for Streptococcus pneumonia":17,"Positive for Streptococcus pneumoniae":18,"Positive for Streptococcus salivarius":19,"Positive for Streptococcus sanguinis":20,"Positive for Streptococcus viridans":21}},inplace=True)
df.replace({'EKG':{"None":0,"Abnormal":1,"Normal":2,"Shows an elevated ST segment and PR interval consistent with pericarditis":3,"Shows atrial fibrillation":4,"Shows diffuse ST depression and PR elevation consistent with pericarditis":5,"Shows electrical alternans consistent with pericarditis":6,"Shows evidence of aortic stenosis":7,"Shows evidence of aortic valve regurgitation":8,"Shows evidence of arrhythmogenic right ventricular cardiomyopathy":9,"Shows evidence of atrial fibrillation and left ventricular hypertrophy":10,"Shows evidence of atrial fibrillation and valve regurgitation":11,"Shows evidence of dilated cardiomyopathy":12,"Shows evidence of endocarditis":13,"Shows evidence of hypertrophic cardiomyopathy":14,"Shows evidence of left bundle branch block":15,"Shows evidence of left bundle branch block and left ventricular hypertrophy":16,"Shows evidence of left ventricular dysfunction and valve regurgitation":17,"Shows evidence of left ventricular hypertrophy":18,"Shows evidence of left ventricular hypertrophy and atrial fibrillation":19,"Shows evidence of left ventricular hypertrophy and left atrial enlargement":20,"Shows evidence of left ventricular hypertrophy and ST-segment depression":21,"Shows evidence of left ventricular hypertrophy and ST-segment elevation":22,"Shows evidence of mitral regurgitation":23,"Shows evidence of mitral regurgitation and atrial fibrillation":24,"Shows evidence of mitral stenosis":25,"Shows evidence of mitral valve regurgitation":26,"Shows evidence of pericarditis":27,"Shows evidence of pericarditis and atrial fibrillation":28,"Shows evidence of pericarditis and left atrial enlargement":29,"Shows evidence of pericarditis and left bundle branch block":30,"Shows evidence of pericarditis and left ventricular hypertrophy":31,"Shows evidence of pericarditis and right bundle branch block":32,"Shows evidence of pericarditis and right ventricular hypertrophy":33,"Shows evidence of pericarditis and sinus tachycardia":34,"Shows evidence of pericarditis and valve regurgitation":35,"Shows evidence of pulmonary stenosis":36,"Shows evidence of restrictive cardiomyopathy":37,"Shows evidence of right bundle branch block":38,"Shows evidence of right bundle branch block and left ventricular hypertrophy":39,"Shows evidence of ST-segment depression in leads II, III, and aVF":40,"Shows evidence of Takotsubo cardiomyopathy":41,"Shows evidence of tricuspid stenosis":42,"Shows evidence of tricuspid valve regurgitation":43,"Shows evidence of T-wave inversion in leads V1-V4":44,"Shows left bundle branch block":45,"Shows left ventricular hypertrophy":46,"Shows low voltage QRS consistent with pericarditis":47,"Shows sinus tachycardia":48,"Shows sinus tachycardia and diffuse ST elevation consistent with pericarditis":49,"Shows ST-segment elevation":50,"Shows evidence of atrial enlargement":51,"Shows evidence of right ventricular hypertrophy":52,"Shows evidence of left ventricular hypertrophy and restricted diastolic filling":53,"Shows low voltage and electrical alternans":54,"Shows sinus tachycardia and low voltage":55,"Shows evidence of atrial enlargement and low voltage QRS complex":56,"Shows evidence of atrial fibrillation":57,"Shows evidence of left ventricular hypertrophy and right atrial dilation":58,"Shows evidence of pericardial thickening":59}},inplace=True)
df.replace({'Cardiac CT':{"None":0,"Shows a 100% dilation of the proximal aorta":1,"Shows a 110% dilation of the proximal aorta":2,"Shows a 120% dilation of the proximal aorta":3,"Shows a 130% dilation of the proximal aorta":4,"Shows a 25% dilation of the infrarenal aorta":5,"Shows a 30% dilation of the abdominal aorta":6,"Shows a 30% dilation of the infrarenal aorta":7,"Shows a 35% dilation of the infrarenal aorta":8,"Shows a 38% dilation of the infrarenal aorta":9,"Shows a 40% dilation of the abdominal aorta":10,"Shows a 40% dilation of the infrarenal aorta":11,"Shows a 40% dilation of the thoracic aorta":12,"Shows a 40% dilation of the thoracoabdominal aorta":13,"Shows a 42% dilation of the infrarenal aorta":14,"Shows a 45% dilation of the infrarenal aorta":15,"Shows a 45% dilation of the thoracic aorta":16,"Shows a 50% blockage in the left anterior descending coronary artery":17,"Shows a 50% dilation of the abdominal aorta":18,"Shows a 50% dilation of the proximal aorta":19,"Shows a 50% dilation of the thoracic aorta":20,"Shows a 50% dilation of the thoracoabdominal aorta":21,"Shows a 55% dilation of the proximal aorta":22,"Shows a 55% dilation of the thoracic aorta":23,"Shows a 60% blockage in the left main coronary artery":24,"Shows a 60% blockage in the right coronary artery":25,"Shows a 60% dilation of the abdominal aorta":26,"Shows a 60% dilation of the proximal aorta":27,"Shows a 60% dilation of the thoracic aorta":28,"Shows a 60% dilation of the thoracoabdominal aorta":29,"Shows a 65% dilation of the thoracic aorta":30,"Shows a 70% blockage in the left anterior descending coronary artery":31,"Shows a 70% blockage in the right coronary artery":32,"Shows a 70% dilation of the abdominal aorta":33,"Shows a 70% dilation of the proximal aorta":34,"Shows a 70% dilation of the thoracic aorta":35,'Shows a 70% dilation of the thoracoabdominal aorta':36,"Shows a 75% blockage in the left anterior descending coronary artery":37,"Shows a 75% dilation of the thoracic aorta":38,"Shows a 80% blockage in the left circumflex coronary artery":39,"Shows a 80% dilation of the proximal aorta":40,"Shows a 80% dilation of the thoracoabdominal aorta":41,"Shows a 80% dilation of the thoracoabdominal aorta":42,"Shows a 90% blockage in the right coronary artery":43,"Shows a 90% dilation of the proximal aorta":44,"Shows a 90% dilation of the thoracic aorta":45,"Shows a 90% dilation of the thoracoabdominal aorta":46,"Shows a mild dilatation in the left ventricle":47,"Shows a mild dysfunction of the left ventricle":48,"Shows a mild dysfunction of the right ventricle":49,"Shows a mild hypertrophy in the left ventricle":50,"Shows a mild regurgitation in the mitral valve":51,"Shows a mild restriction in the ventricular filling":52,"Shows a mild stenosis in the mitral valve":53,"Shows a mild stenosis in the pulmonary valve":54,"Shows a mild stenosis in the tricuspid valve":55,"Shows a moderate dilatation in the left ventricle":56,"Shows a moderate dysfunction of the left ventricle":57,"Shows a moderate dysfunction of the right ventricle":58,"Shows a moderate hypertrophy in the left ventricle":59,"Shows a moderate hypertrophy in the right ventricle":60,"Shows a moderate regurgitation in the mitral valve":61,"Shows a moderate restriction in the ventricular filling":62,"Shows a moderate stenosis in the aortic valve":63,"Shows a moderate stenosis in the mitral valve":64,"Shows a moderate stenosis in the pulmonary valve":65,"Shows a moderate stenosis in the tricuspid valve":66,"Shows a severe dilatation in the left ventricle":67,"Shows a severe dysfunction of the left ventricle":68,"Shows a severe dysfunction of the right ventricle":69,"Shows a severe hypertrophy in the left ventricle":70,"Shows a severe hypertrophy in the right ventricle":71,"Shows a severe regurgitation in the mitral valve":72,"Shows a severe restriction in the ventricular filling":73,"Shows a severe stenosis in the aortic valve":74,"Shows a severe stenosis in the mitral valve":75,"Shows a severe stenosis in the tricuspid valve":76,"Shows constrictive pericarditis":77,"Shows effusion in the pericardium":78,"Shows pericardial effusion and tamponade":79,"Shows pericarditis with myocarditis":80,"Shows thickening of the pericardium":81}},inplace=True)
df.replace({'Chest x-ray':{"None":0,"Shows hyperinflation and bronchial wall thickening":1,"Shows hyperinflation, bronchial wall thickening, and increased vascular markings":2,"Shows increased vascular markings and signs of chronic changes in the lungs":3,"Shows increased vascular markings and signs of lung restriction":4,"Shows increased vascular markings and small areas of consolidation in the lungs":5,"Shows increased vascular markings and small areas of fibrosis in the lungs":6,"Shows increased vascular markings in the lungs":7,"Shows increased vascular markings in the lungs, small areas of fibrosis, and signs of lung restriction":8,"Shows increased vascular markings, small areas of fibrosis and signs of lung restriction":9,"Shows increased vascular markings, small areas of fibrosis, and signs of chronic changes in the lungs":10,"Shows increased vascular markings, small areas of fibrosis, and signs of lung restriction":11,"Shows normal lung structures with no obvious abnormalities":12}},inplace=True)
df.replace({'Previous illnesses':{"None":0,"Aortic valve replacement surgery":1,"Mitral valve replacement surgery":2,"Recent chest surgery":3,"Recent viral infection":4,"Tricuspid valve replacement surgery":5}},inplace=True)
df.replace({'Pulmonary function tests':{"None":0,"Shows decreased lung capacity":1,"Shows decreased lung capacity and reduced diffusing capacity of the lung for carbon monoxide":2,"Shows decreased lung capacity, reduced diffusing capacity of the lung for carbon monoxide and decreased lung volumes":3,"Shows normal lung capacity":4}},inplace=True)
df.replace({'Spirometry':{"None":0,"Shows decreased lung function and increased airway resistance":1}},inplace=True)
df.replace({'Diagnosis':{'Coronary artery disease (CAD)':0, 'Mitral regurgitation':1, 'Mitral stenosis':2, 'Aortic stenosis':3, 'Tricuspid stenosis':4,'Pulmonary stenosis':5, 'Dilated cardiomyopathy':6, 'Hypertrophic cardiomyopathy':7, 'Restrictive cardiomyopathy':8,'Arrhythmogenic right ventricular cardiomyopathy':9, 'Takotsubo cardiomyopathy':10, 'Thoracic aortic aneurysm':11, 'Abdominal aortic aneurysm':12,'Proximal aortic aneurysm':13, 'Infrarenal aortic aneurysm':14, 'Thoracoabdominal aortic aneurysm':15, 'Acute pericarditis':16, 'Chronic pericarditis':17,'Constrictive pericarditis':18, 'Pericardial effusion with tamponade':19, 'Pericarditis with myocarditis':20, 'Acute native valve endocarditis':21,'Subacute native valve endocarditis':22, 'Infective endocarditis on prosthetic valve':23, 'Candidal endocarditis':24, 'Native valve endocarditis involving multiple valves':25,'Primary pulmonary hypertension':26, 'Secondary pulmonary hypertension due to COPD':27, 'Secondary pulmonary hypertension due to obesity and sleep apnea':28, 'Secondary pulmonary hypertension due to scleroderma':29,'Secondary pulmonary hypertension due to chronic thromboembolic disease':30}},inplace=True)
df.head()

df.tail(20)

# df.to_csv(r"E:\medical hackathon\preprocessed_heart_training.csv", index=False)

Unnamed: 0,Name,Gender,Age,Chest pain,Shortness of breath,Fatigue,Systolic,Diastolic,Heart rate (bpm),Lung sounds,...,Cardiac CT,Obesity,Murmur,Chest x-ray,Previous illnesses,Pulmonary function tests,Spirometry,Diagnosis,Medications,Treatment
314,Liam Williams,0,50,1,1,1,120,80,110,1,...,0,0,0,0,5,0,0,23,"Vancomycin, gentamicin",
315,Emma Taylor,1,40,1,1,1,130,80,110,1,...,0,0,0,0,2,0,0,23,"Daptomycin, rifampin",
316,John Miller,0,70,1,1,1,140,90,120,1,...,0,0,0,0,1,0,0,23,"Penicillin, gentamicin",
317,Amy Lee,1,45,1,1,1,130,80,110,1,...,0,0,0,0,2,0,0,23,"Vancomycin, gentamicin",
318,Mark Anderson,0,55,1,1,1,140,90,110,1,...,0,0,0,0,5,0,0,23,"Daptomycin, rifampin",
319,Sara Smith,1,60,1,1,1,130,80,120,1,...,0,0,0,0,1,0,0,23,"Oxacillin, vancomycin",
320,James Williams,0,65,1,1,1,130,80,110,1,...,0,0,0,0,2,0,0,23,"Vancomycin, rifampin",
321,David Thompson,0,60,1,1,1,140,90,120,1,...,0,0,0,0,1,0,0,23,"Vancomycin, gentamicin",
322,Kim Lee,1,55,1,1,1,130,80,100,1,...,0,0,0,0,2,0,0,23,"Nafcillin, rifampin",
323,Liam Thompson,0,50,1,1,1,130,80,110,1,...,0,0,0,0,1,0,0,23,"Amoxicillin, gentamicin",


In [5]:
df.tail()

Unnamed: 0,Name,Gender,Age,Chest pain,Shortness of breath,Fatigue,Systolic,Diastolic,Heart rate (bpm),Lung sounds,...,Cardiac CT,Obesity,Murmur,Chest x-ray,Previous illnesses,Pulmonary function tests,Spirometry,Diagnosis,Medications,Treatment
329,Emily Wilson,1,55,1,1,1,130,80,120,1,...,77,0,1,0,0,0,0,18,"Ibuprofen, colchicine, prednisone",
330,Jacob Smith,0,48,1,1,1,130,80,120,1,...,77,0,1,0,0,0,0,18,"Ibuprofen, colchicine, prednisone",
331,Jane Smith,1,65,1,1,1,140,90,110,1,...,77,0,1,0,0,0,0,18,"Ibuprofen, colchicine, prednisone",
332,David Johnson,0,68,1,1,1,140,90,110,0,...,77,0,1,0,0,0,0,18,"Ibuprofen, colchicine, prednisone",
333,Emily Smith,1,45,1,0,1,130,80,120,1,...,77,0,1,0,0,0,0,18,"Ibuprofen, colchicine, prednisone",


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 334 entries, 0 to 333
Data columns (total 49 columns):
 #   Column                                           Non-Null Count  Dtype 
---  ------                                           --------------  ----- 
 0   Name                                             334 non-null    object
 1   Gender                                           334 non-null    int64 
 2   Age                                              334 non-null    int64 
 3   Chest pain                                       334 non-null    int64 
 4   Shortness of breath                              334 non-null    int64 
 5   Fatigue                                          334 non-null    int64 
 6   Systolic                                         334 non-null    int64 
 7   Diastolic                                        334 non-null    int64 
 8   Heart rate (bpm)                                 334 non-null    int64 
 9   Lung sounds                                

### Checking for null values

In [7]:
df.isnull().sum()

Name                                               0
Gender                                             0
Age                                                0
Chest pain                                         0
Shortness of breath                                0
Fatigue                                            0
Systolic                                           0
Diastolic                                          0
Heart rate (bpm)                                   0
Lung sounds                                        0
Cholesterol level (mg/dL)                          0
LDL level (mg/dL)                                  0
HDL level (mg/dL)                                  0
Diabetes                                           0
Atrial fibrillation                                0
Mitral valve prolapse                              0
Rheumatic fever                                    0
Mitral stenosis                                    0
Aortic stenosis                               

### Counting the number of datas for different Diagnosis

In [8]:
df['Diagnosis'].value_counts()

30    12
24    12
20    12
21    12
22    12
29    12
26    11
25    11
27    11
23    11
18    11
28    11
12    11
13    11
14    11
19    11
16    11
17    11
0     10
1     10
11    10
10    10
9     10
8     10
7     10
6     10
5     10
4     10
3     10
2     10
15    10
Name: Diagnosis, dtype: int64

### Splitting the input and target values

In [9]:
# iloc: takes interger values of [rows, columns]
X = df.iloc[:, 1:46]

# taking second last column
y = df.iloc[:, -3]

In [10]:
X.head()

Unnamed: 0,Gender,Age,Chest pain,Shortness of breath,Fatigue,Systolic,Diastolic,Heart rate (bpm),Lung sounds,Cholesterol level (mg/dL),...,Echocardiogram,Blood culture,EKG,Cardiac CT,Obesity,Murmur,Chest x-ray,Previous illnesses,Pulmonary function tests,Spirometry
0,1,55,1,1,1,140,90,100,1,220,...,0,0,18,17,0,1,0,0,0,0
1,0,57,1,1,1,150,80,110,1,210,...,0,0,18,25,0,1,0,0,0,0
2,1,60,1,1,1,130,85,95,1,230,...,0,0,18,37,0,1,0,0,0,0
3,0,62,1,1,1,145,90,100,1,240,...,0,0,18,39,0,1,0,0,0,0
4,1,58,1,1,1,135,80,105,1,220,...,0,0,18,43,0,1,0,0,0,0


In [11]:
y.tail()

329    18
330    18
331    18
332    18
333    18
Name: Diagnosis, dtype: int64

In [12]:
X.shape

(334, 45)

In [13]:
y.shape

(334,)

### Splitting the data for train and test

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size = 0.2)

### Use of Random Forest Classifier

In [15]:
ranforcls = RandomForestClassifier(criterion = "gini", max_depth= 8, min_samples_split = 10, random_state = 42)

In [16]:
ranforcls.fit(X_train, y_train)

In [17]:
ranforcls.feature_importances_

array([0.00587955, 0.03132957, 0.0116079 , 0.00530851, 0.00032527,
       0.02102102, 0.02143521, 0.03404298, 0.00539105, 0.01622878,
       0.01476637, 0.01472195, 0.        , 0.        , 0.031602  ,
       0.0169065 , 0.02778589, 0.03057236, 0.02000721, 0.03279785,
       0.02222571, 0.00627007, 0.03059395, 0.03039231, 0.01900387,
       0.00299345, 0.03836827, 0.02227116, 0.00398764, 0.        ,
       0.00591563, 0.01693064, 0.01194825, 0.01780712, 0.00312942,
       0.04907321, 0.04734855, 0.09023077, 0.07560209, 0.00811783,
       0.02644931, 0.04411781, 0.0161245 , 0.04120152, 0.02816694])

In [18]:
X.columns

Index(['Gender', 'Age', 'Chest pain', 'Shortness of breath', 'Fatigue',
       'Systolic', 'Diastolic', 'Heart rate (bpm)', 'Lung sounds',
       'Cholesterol level (mg/dL)', 'LDL level (mg/dL)', 'HDL level (mg/dL)',
       'Diabetes', 'Atrial fibrillation', 'Mitral valve prolapse',
       'Rheumatic fever', 'Mitral stenosis', 'Aortic stenosis',
       'Tricuspid stenosis', 'Pulmonary stenosis', 'Dilated cardiomyopathy',
       'Hypertrophic cardiomyopathy', 'Restrictive cardiomyopathy',
       'Arrhythmogenic right ventricular cardiomyopathy',
       'Takotsubo cardiomyopathy', 'Drug use', 'Fever', 'Chills', 'Joint pain',
       'Alcoholism', 'Hypertension', 'Fainting', 'Dizziness', 'Smoking',
       'High cholesterol', 'Echocardiogram', 'Blood culture', 'EKG',
       'Cardiac CT', 'Obesity', 'Murmur', 'Chest x-ray', 'Previous illnesses',
       'Pulmonary function tests', 'Spirometry'],
      dtype='object')

In [19]:
y_pred = ranforcls.predict(X_test)

In [20]:
y_pred

array([ 2, 14,  7, 21,  5, 16, 30, 20, 25, 21,  7, 13, 23, 12, 28, 13, 14,
       12,  7, 29, 10, 13, 23, 21, 13, 15, 28, 18, 16, 23, 19, 17,  6,  8,
       30, 30, 20, 27,  4, 12,  3,  4, 20, 20, 12,  7, 30,  4, 10, 14,  9,
       25, 24,  1, 10, 30, 14,  5, 11, 20, 16, 23, 29, 13,  7, 14,  6],
      dtype=int64)

In [21]:
y_test

25      2
309    14
73      7
195    21
57      5
       ..
280    29
3       0
77      7
311    14
60      6
Name: Diagnosis, Length: 67, dtype: int64

### Checking the accuracy of our model

In [22]:
accuracy_score(y_test, y_pred)

0.7313432835820896

### Checking the accuracy of each folds of the model ( 6 different folds )

In [23]:
kf = KFold(n_splits = 6, shuffle= True, random_state=42)

cv_scores = cross_val_score(ranforcls, X_train, y_train, cv=kf)

In [24]:
cv_scores

array([0.71111111, 0.73333333, 0.77777778, 0.86363636, 0.77272727,
       0.84090909])

### Testing of model by feeding new inputs

In [25]:
input_sample=(1,55,1,1,1,140,90,100,1,220,150,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,18,17,0,1,0,0,0,0)
input_np_array=np.asarray(input_sample)

In [26]:
X.columns

Index(['Gender', 'Age', 'Chest pain', 'Shortness of breath', 'Fatigue',
       'Systolic', 'Diastolic', 'Heart rate (bpm)', 'Lung sounds',
       'Cholesterol level (mg/dL)', 'LDL level (mg/dL)', 'HDL level (mg/dL)',
       'Diabetes', 'Atrial fibrillation', 'Mitral valve prolapse',
       'Rheumatic fever', 'Mitral stenosis', 'Aortic stenosis',
       'Tricuspid stenosis', 'Pulmonary stenosis', 'Dilated cardiomyopathy',
       'Hypertrophic cardiomyopathy', 'Restrictive cardiomyopathy',
       'Arrhythmogenic right ventricular cardiomyopathy',
       'Takotsubo cardiomyopathy', 'Drug use', 'Fever', 'Chills', 'Joint pain',
       'Alcoholism', 'Hypertension', 'Fainting', 'Dizziness', 'Smoking',
       'High cholesterol', 'Echocardiogram', 'Blood culture', 'EKG',
       'Cardiac CT', 'Obesity', 'Murmur', 'Chest x-ray', 'Previous illnesses',
       'Pulmonary function tests', 'Spirometry'],
      dtype='object')

In [27]:
input_reshape=input_np_array.reshape(1,-1)

In [28]:
input_reshape

array([[  1,  55,   1,   1,   1, 140,  90, 100,   1, 220, 150,  40,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   1,   0,   0,   1,   1,   0,   0,  18,  17,
          0,   1,   0,   0,   0,   0]])

In [29]:
prediction = ranforcls.predict(input_reshape)
prediction



array([13], dtype=int64)

In [30]:
result = prediction.astype(int)
result

array([13])

In [31]:
df1=pd.read_csv("dataset/Dignosis.csv")
df1

Unnamed: 0,Coronary artery disease (CAD),Mitral regurgitation,Mitral stenosis,Aortic stenosis,Tricuspid stenosis,Pulmonary stenosis,Dilated cardiomyopathy,Hypertrophic cardiomyopathy,Restrictive cardiomyopathy,Arrhythmogenic right ventricular cardiomyopathy,...,Acute native valve endocarditis,Subacute native valve endocarditis,Infective endocarditis on prosthetic valve,Candidal endocarditis,Native valve endocarditis involving multiple valves,Primary pulmonary hypertension,Secondary pulmonary hypertension due to COPD,Secondary pulmonary hypertension due to obesity and sleep apnea,Secondary pulmonary hypertension due to scleroderma,Secondary pulmonary hypertension due to chronic thromboembolic disease
0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30


In [32]:
print("Person is suffering from", df1.iloc[:,prediction.astype(int)].columns[0])

Person is suffering from Proximal aortic aneurysm
