In [1]:
import pandas as pd

fetal_health = pd.read_csv('fetal_health.csv')

fetal_health['histogram_tendency'] = fetal_health['histogram_tendency'].astype(str)
fetal_health = pd.get_dummies(fetal_health)
fetal_health.fetal_health = fetal_health.fetal_health.astype(int).astype(str) #make outcome categorical

In [2]:
from sklearn.model_selection import train_test_split

fetal_health, fetal_health_test = train_test_split(fetal_health, 
                                    test_size = .1) ## withold our test set


In [33]:
import numpy as np
from sklearn.utils import resample
from sklearn.metrics import f1_score

def accuracy_by_class(predicted, actual):
    out_vector = []
    out_vector.append(np.mean(predicted == actual))
    for i in range(1, 4):
        out_vector.append(np.mean(predicted[actual == str(i)] == str(i)))
    out_vector.append(np.mean(out_vector[1:4]))
    out_vector.append(f1_score(y_true = actual, y_pred = predicted, average = "macro"))
    return(out_vector)
    
    
    
def rebalance_classes(input_df):
    class_1 = input_df.loc[input_df.fetal_health == '1']
    class_2 = input_df.loc[input_df.fetal_health == '2']
    class_3 = input_df.loc[input_df.fetal_health == '3']
    
    class_2 = resample(class_2, n_samples = class_1.shape[0])
    class_3 = resample(class_3, n_samples = class_1.shape[0])
    
    output_df = pd.concat([class_1, class_2, class_3], ignore_index = True)
    return(output_df)






In [34]:
from sklearn.ensemble import RandomForestClassifier
fetal_health = rebalance_classes(fetal_health)
RandomForest_100 = RandomForestClassifier(n_estimators = 100)

rf_100_fit = RandomForest_100.fit(X = fetal_health.drop('fetal_health', axis = 1), y = fetal_health.fetal_health)
rf_100_preds = rf_100_fit.predict(X= fetal_health_test.drop('fetal_health', axis = 1))



In [40]:
## Process for KNN
from sklearn.preprocessing import StandardScaler

fetal_health_train = rebalance_classes(fetal_health)

scale = StandardScaler()

fetal_health_scaler = scale.fit(X = fetal_health.drop('fetal_health', axis = 1).to_numpy())

fetal_health_x = fetal_health_scaler.transform(X = fetal_health_train.drop('fetal_health', axis = 1).to_numpy())

fetal_health_test_x = fetal_health_scaler.transform(X = fetal_health_test.drop('fetal_health', axis = 1).to_numpy())



In [43]:
## fit knn model

from sklearn.neighbors import KNeighborsClassifier

knn_50 = KNeighborsClassifier(n_neighbors= 50)

knn_50_fit = knn_50.fit(X = fetal_health_x, y = fetal_health.fetal_health)

knn_50_preds = knn_50_fit.predict(X = fetal_health_test_x)

In [44]:
def print_metrics(final_accuracy):
    print("Overall accuracy: {}".format(final_accuracy[0]))
    for i in range(1,4):
        print("Accuracy for class {}: {}".format(i, final_accuracy[i]))
    
    print("Weighted Accuracy: {}".format(final_accuracy[4]))

    print("Macro F1 score: {}".format(final_accuracy[5]))

In [45]:
print_metrics(accuracy_by_class(knn_50_preds, fetal_health_test.fetal_health))

Overall accuracy: 0.8450704225352113
Accuracy for class 1: 0.838150289017341
Accuracy for class 2: 0.9523809523809523
Accuracy for class 3: 0.7894736842105263
Weighted Accuracy: 0.8600016418696065
Macro F1 score: 0.7703373015873015


In [46]:
print_metrics(accuracy_by_class(rf_100_preds, fetal_health_test.fetal_health))

Overall accuracy: 0.9577464788732394
Accuracy for class 1: 0.9826589595375722
Accuracy for class 2: 0.8571428571428571
Accuracy for class 3: 0.8421052631578947
Weighted Accuracy: 0.8939690266127748
Macro F1 score: 0.9095021702880564
