In [16]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [17]:
maternal = pd.read_csv('/home/nakabuye/Desktop/maternal.csv')

#Getting the first 5 rows of the dataset
maternal.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [18]:
maternal['RiskLevel'].value_counts() # How many values do we have in the Risk Value column

RiskLevel
low risk     406
mid risk     336
high risk    272
Name: count, dtype: int64

In [19]:
label_encode = LabelEncoder()
labels = label_encode.fit_transform(maternal.RiskLevel)

In [20]:
maternal['risk'] = labels #Append the transformed labels to the dataframe
maternal.head()
#maternal = maternal_dataset.drop(columns = 'target', axis = 1)
# 0 - high risk
# 1 - low risk
# 2 - mid risk

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel,risk
0,25,130,80,15.0,98.0,86,high risk,0
1,35,140,90,13.0,98.0,70,high risk,0
2,29,90,70,8.0,100.0,80,high risk,0
3,30,140,85,7.0,98.0,70,high risk,0
4,35,120,60,6.1,98.0,76,low risk,1


In [21]:
maternal['risk'].value_counts()
# 0 - high risk
# 1 - low risk
# 2 - mid risk

risk
1    406
2    336
0    272
Name: count, dtype: int64

In [24]:
# Drop the RiskLevel column
maternal = maternal.drop(columns = 'RiskLevel', axis = 1)
maternal.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,risk
0,25,130,80,15.0,98.0,86,0
1,35,140,90,13.0,98.0,70,0
2,29,90,70,8.0,100.0,80,0
3,30,140,85,7.0,98.0,70,0
4,35,120,60,6.1,98.0,76,1


In [25]:
# Data analysis
maternal.shape # number of rows and columns

(1014, 7)

In [26]:
# Statistical measures of the dataset
maternal.describe()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,risk
count,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0
mean,29.871795,113.198225,76.460552,8.725986,98.665089,74.301775,1.063116
std,13.474386,18.403913,13.885796,3.293532,1.371384,8.088702,0.772146
min,10.0,70.0,49.0,6.0,98.0,7.0,0.0
25%,19.0,100.0,65.0,6.9,98.0,70.0,0.0
50%,26.0,120.0,80.0,7.5,98.0,76.0,1.0
75%,39.0,120.0,90.0,8.0,98.0,80.0,2.0
max,70.0,160.0,100.0,19.0,103.0,90.0,2.0


In [27]:
maternal.groupby('risk').mean()

Unnamed: 0_level_0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
risk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,36.216912,124.194853,85.073529,12.12261,98.899265,76.742647
1,26.869458,105.866995,72.534483,7.220271,98.368966,72.770936
2,28.363095,113.154762,74.232143,7.795744,98.833333,74.175595


In [28]:
# SEPARATING LABELS AND DATA
Data = maternal.drop(columns = 'risk', axis = 1)
Label = maternal['risk']

In [29]:
# Check to see if pervious step worked perfectly
Data.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
0,25,130,80,15.0,98.0,86
1,35,140,90,13.0,98.0,70
2,29,90,70,8.0,100.0,80
3,30,140,85,7.0,98.0,70
4,35,120,60,6.1,98.0,76


In [30]:
Label.head()

0    0
1    0
2    0
3    0
4    1
Name: risk, dtype: int64

In [31]:
#DATA STANDARDISATION
scaler = StandardScaler()
scaler.fit(Data)

In [32]:
standardized_data = scaler.transform(Data)

In [33]:
print(standardized_data)

[[-0.36173812  0.91339632  0.25502279  1.90589019 -0.4852155   1.44695615]
 [ 0.38077697  1.45702716  0.97553854  1.29833966 -0.4852155  -0.53208757]
 [-0.06473208 -1.26112705 -0.46549297 -0.22053665  0.97388449  0.70481475]
 ...
 [ 0.38077697 -1.53294248 -1.18600873  3.12099124 -0.4852155   1.44695615]
 [ 0.97478904  0.36976548  0.97553854  2.81721597 -0.4852155  -0.53208757]
 [ 0.15802244  0.36976548 -0.82575085 -0.82808717  1.70343448  0.21005383]]


In [36]:
# Need to remember
Data = standardized_data
Label = maternal['risk']

In [44]:
# TRAIN-TEST-SPLIT
Data_train, Data_test, Label_train, Label_test = train_test_split(Data,Label, test_size = 0.2, stratify = Label, random_state = 2)

In [45]:
print(Data.shape, Data_train.shape, Data_test.shape)

(1014, 6) (811, 6) (203, 6)


In [46]:
# TRAINING THE MODEL
classifier = svm.SVC(kernel = 'linear')

In [47]:
# Training the support vector machine
classifier.fit(Data_train, Label_train)

In [48]:
#MODEL EVALUATION
# Accuracy score on training data

Data_train_prediction = classifier.predict(Data_train)
training_data_accuracy = accuracy_score(Data_train_prediction, Label_train)

In [49]:
print('Accuracy score for training data:', training_data_accuracy)

Accuracy score for training data: 0.6596794081381011


In [50]:
# Accuracy score on testing data

Data_test_prediction = classifier.predict(Data_test)
testing_data_accuracy = accuracy_score(Data_test_prediction, Label_test)

In [51]:
print('Accuracy score for testing data:', testing_data_accuracy)

Accuracy score for testing data: 0.6699507389162561


In [63]:
# MAKING A PREDICTIVE SYSTEM
input_data = (16,100,80,9.2,98,75)

# changing input data to a numpy array
inputdataasnumpyarray = np.asarray(input_data)

#reshape the array)
input_data_reshape = inputdataasnumpyarray.reshape(1, -1)

#standardise the input data
std_data = scaler.transform(input_data_reshape)

# MAKE PREDICTION
prediction = classifier.predict(std_data)
print(prediction)

if prediction == 0:
  print('High Risk Pregnancy')
elif prediction == 1:
  print('Low Risk pregnancy')
elif prediction == 2:
    print('Mid Risk pregnancy')
else:
  print('Error Value')

[1]
Low Risk pregnancy


