# Exploring the Data

In [2]:
#import Libraries
import pandas as pd
import numpy as np

In [None]:
#import dataset
blood_data = pd.read_excel('Book1.xlsx',sheet_name='Sheet1')
blood_data.head(10)

In [None]:
blood_data.info()

In [None]:
#convert categorical variables to lowercase to avoid mismatch of lower and upper case
data = blood_data
data['Class'] = [x.lower() for x in data['Class']]
data['Class'].value_counts()

In [5]:
#convert categorical variables to lowercase to avoid mismatch of lower and upper case
data['Gender'] = [x.lower() for x in data['Gender']]
data['Gender'].value_counts()

female    1016
male       362
child      122
Name: Gender, dtype: int64

In [6]:
#delete ID column 
data.drop('ID',axis=1,inplace=True)

# Preparing data for machine learning model

In [7]:
# split data to X and y to be inputes for Model
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
X

array([['female', 47.0, 12.4, 361.0, 8.7],
       ['male', 75.0, 11.2, 338.0, 9.8],
       ['female', 38.0, 11.6, 288.0, 5.6],
       ...,
       ['female', 68.0, 11.4, 233.0, 8.4],
       ['child', 13.0, 12.8, 345.0, 3.9],
       ['male', 47.0, 13.8, 218.0, 15.2]], dtype=object)

In [8]:
# Encoding the Independent Variable (X)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [9]:
# Encoding the Dependent Variable (y)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [10]:
#split the data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)

In [11]:
X_train

array([[1.0, 0.0, 0.0, ..., 10.8, 379.0, 12.3],
       [0.0, 1.0, 0.0, ..., 11.5, 257.0, 8.5],
       [0.0, 0.0, 1.0, ..., 15.3, 283.0, 2.3],
       ...,
       [0.0, 1.0, 0.0, ..., 12.3, 239.0, 6.0],
       [0.0, 1.0, 0.0, ..., 8.0, 209.0, 9.0],
       [0.0, 1.0, 0.0, ..., 10.0, 308.0, 10.4]], dtype=object)

In [12]:
# Feature Scaling as model senstive to data values we have to do feature scaling to improve results
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fit and Evaluate Machine Learning Models 

In [13]:
# Training the Random Forest Classification model on the Training set
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 200, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)
from sklearn.metrics import  accuracy_score
y_pred = classifier.predict(X_test)
print("accuracy : %.2f"%accuracy_score(y_test, y_pred))

accuracy : 0.95


# Predict New Data

In [14]:
#get classes names and prepare it for prediction
classes_names = le.inverse_transform(classifier.classes_)
classes_indecies = classifier.classes_
classes_list = [x for x in zip(classes_indecies,classes_names,)]
classes_prediction = dict(classes_list)
classes_prediction

{0: 'anemia',
 1: 'leucocytosis',
 2: 'leucopenia',
 3: 'normal ',
 4: 'thrombocytopenia',
 5: 'thrombocytosis'}

In [15]:
# Gender is categorical variable so it must be converted to encoded numbers 
d = {"child" : "1.0,0.0,0.0",
"female" : "0.0,1.0,0.0",
"male" : "0.0,0.0,1.0"}

In [17]:
gender = input("Input Patient Gender : ")
#get lower case of input
gender = gender.lower()
#retrieve corresponding encoded value of input gender
gender = d[gender]
# convert values from string to float
gender = [float(i) for i in gender.split(',')]
#input remaining results
age = float(input("Insert Patient Age : "))
haemoglobin = float(input("Insert Patient Haemoglobin : "))
platelets = float(input("Insert Patient Platelets : "))
wbc = float(input("Insert Patient WBCs : "))
#transform values for model to make prediction
var = sc.transform([[gender[0],gender[1],gender[2],age,haemoglobin,platelets,wbc]])
#get prediction result 
prediction = classifier.predict(var)
print("Prediction : %s"%classes_prediction[prediction[0]])

Input Patient Gender : female
Insert Patient Age : 47
Insert Patient Haemoglobin : 12.4
Insert Patient Platelets : 361
Insert Patient WBCs : 8.7
Prediction : normal 


In [None]:
#Female	47.0	12.4	361.0	8.7	Normal