In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from towbintools.foundation.worm_features import compute_worm_length
from towbintools.foundation.worm_features import compute_worm_volume
from towbintools.foundation.worm_features import compute_worm_type_features
from tifffile import imread
import matplotlib.pyplot as plt
from skimage.measure import regionprops
from skimage.measure import shannon_entropy

In [None]:
labels_csv = "./annotate/labels.csv"
database = pd.read_csv(labels_csv)

classes= ["worm", "egg", "error"]
classes = np.array(classes)

output_model_name = 'worm_type_classifier'
train_dataset, test_dataset = train_test_split(database, test_size=0.25, random_state=42)

In [None]:
def get_features_and_labels(dataframe, classes):
    features = []
    labels = []

    for e in dataframe:
        img_path = e[0]
        if "10x" in img_path:
            pixelsize = 0.65
        elif "20x" in img_path:
            pixelsize = 0.325
        else:
            pixelsize = 0.65
        img = imread(img_path)
        label = (classes == e[1]).astype(int)
        feature = compute_worm_type_features(img, pixelsize)

        features.append(feature)
        labels.append(label)
    
    features = np.array(features)
    labels = np.array(labels)
        
    return features, labels

train_features, train_labels = get_features_and_labels(train_dataset.values, classes)
test_features, test_labels = get_features_and_labels(test_dataset.values, classes)

dtrain = xgb.DMatrix(train_features, label=train_labels)
dtest = xgb.DMatrix(test_features, label=test_labels)

evallist = [(dtrain, 'train'), (dtest, 'eval')]

In [None]:
param = {'max_depth': 8, 'eta': 0.03, 'objective': "multi:softmax", 'num_class':3}
param['nthread'] = 32
param['eval_metric'] = 'rmse'

clf = xgb.XGBClassifier(n_estimators=100, kwargs=param, booster='gbtree')

clf.fit(train_features, train_labels)
clf.save_model(f'{output_model_name}.json')

In [None]:
pred_on_test_proba = clf.predict_proba(test_features)
errors = abs(pred_on_test_proba - test_labels)
print('Mean Absolute Error:', round(np.mean(errors), 6))

pred_class = np.argmax(pred_on_test_proba, axis=1)
pred_on_test = np.zeros(pred_on_test_proba.shape )
pred_on_test[np.arange(pred_on_test.shape[0]), pred_class] = 1
mean_class_accuracy_test = clf.score(test_features, test_labels)
print('Mean Class Accuracy Train:', round(np.mean(mean_class_accuracy_test), 6))

In [None]:
correct_predictions = []
for pred, ground_truth in zip(pred_on_test, test_labels):
    correct_predictions.append(all(pred == ground_truth))

correct_predictions = np.array(correct_predictions)	
wrong = np.logical_not(correct_predictions)
wrong_idx = np.where(wrong)[0]

print(wrong_idx)
# wrong_idx = np.where(wrong)[0]
for idx in wrong_idx:
    plt.imshow(imread(test_dataset.values[idx][0]))
    plt.title("Predicted: {}, True: {}".format(classes[np.where(pred_on_test[idx] == 1)[0].squeeze()], classes[np.where(test_labels[idx] == 1)[0].squeeze()]))
    plt.show()