In [37]:
# Import dependencies
import pandas as pd
from pathlib import Path

In [38]:
# Preview CSV data
data = Path('../Data/clean_fetal_data.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0,baseline value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_min,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,fetal_health
0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.5,43.0,...,62.0,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,2.0
1,132.0,0.006,0.0,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,68.0,198.0,6.0,1.0,141.0,136.0,140.0,12.0,0.0,1.0
2,133.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,68.0,198.0,5.0,1.0,141.0,135.0,138.0,13.0,0.0,1.0
3,134.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,1.0
4,132.0,0.007,0.0,0.008,0.0,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,1.0


In [39]:
# Isolate feature of interest from other feature
y = df['fetal_health']
X = df.drop(columns = 'fetal_health')

In [40]:
# Split the data in training and testing data sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 48, stratify = y)

In [41]:
# Scale X data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [42]:
# Create Random Forest model and fit
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators = 50, random_state = 1)
model.fit(X_train_scaled, y_train)

In [43]:
# Get model score
model.score(X_test_scaled, y_test)

0.9492481203007519

In [44]:
# Make predicitions
predictions = model.predict(X_test_scaled)
predictions_formatted = [f'{pred:.1f}' for pred in predictions]
predictions_formatted


['1.0',
 '3.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '3.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '3.0',
 '3.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '2.0',
 '1.0',
 '1.0',
 '3.0',
 '3.0',
 '1.0',
 '1.0',
 '3.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '2.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '3.0',
 '1.0',
 '2.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '1.0',
 '2.0',
 '1.0',
 '1.0',
 '1.0',


In [45]:
# Print classification report
from sklearn.metrics import classification_report
class_report = classification_report(y_test, predictions)
print(class_report)

              precision    recall  f1-score   support

         1.0       0.96      0.99      0.97       414
         2.0       0.88      0.78      0.83        74
         3.0       0.97      0.89      0.93        44

    accuracy                           0.95       532
   macro avg       0.94      0.89      0.91       532
weighted avg       0.95      0.95      0.95       532



In [46]:
# Generate a confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm,
    index=['Actual 1.0', 'Actual 2.0', 'Actual 3.0'], 
    columns=['Predicted 1.0', 'Predicted 2.0', 'Predicted 3.0']
)

cm_df

Unnamed: 0,Predicted 1.0,Predicted 2.0,Predicted 3.0
Actual 1.0,408,5,1
Actual 2.0,16,58,0
Actual 3.0,2,3,39


In [47]:
# Get accuracy and precision scores
from sklearn.metrics import accuracy_score, precision_score

accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy score: {round((accuracy), 2)}")

precision = precision_score(y_test, predictions, average = 'macro')
print(f"Precision score: {round((precision), 2)}")

Accuracy score: 0.95
Precision score: 0.94
