Jupyter notebook to explore and evalute the extracted features, such as volume, surface or roughness 
Written by Dominik Waibel & Niklas Kiermeyer

In [None]:
#import dependencies
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn import model_selection
from sklearn.metrics import precision_recall_fscore_support
import random
from sklearn.metrics import plot_confusion_matrix

from sklearn.model_selection import train_test_split
import seaborn as sns
import os
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

import copy
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestClassifier

from imblearn.over_sampling import SMOTE
from sklearn.decomposition import PCA

In [None]:
#load the respective .csv files
data_mask = pd.read_csv("./features/mask_features.csv")
data_obj = pd.read_csv( "./features/ShapeAE_features.csv")
data_ellipse = pd.read_csv("./features//Ellipse_fit_features.csv")
data_cylinder = pd.read_csv( "./features/Cylinder_fit.csv")
data_gt = pd.read_csv( "./features/groundtruth_features.csv")

print(data_mask.shape)
print(data_obj.shape)
print(data_ellipse.shape)
print(data_cylinder.shape)
print(data_gt.shape)

data_mask = data_mask[data_mask["filename_msk"].isin(data_ellipse["filename"].values)]
data_cylinder = data_cylinder[data_cylinder["filename"].isin(data_ellipse["filename"].values)]
data_gt = data_gt[data_gt["filename"].isin(data_ellipse["filename"].values)]

data_mask = data_mask[data_mask["filename_msk"].isin(data_cylinder["filename"].values)]
data_ellipse = data_ellipse[data_ellipse["filename"].isin(data_cylinder["filename"].values)]
data_obj = data_obj[data_obj["filename"].isin(data_cylinder["filename"].values)]
data_gt = data_gt[data_gt["filename"].isin(data_cylinder["filename"].values)]

data_mask = data_mask[data_mask["filename_msk"].isin(data_obj["filename"].values)]
data_ellipse = data_ellipse[data_ellipse["filename"].isin(data_obj["filename"].values)]
data_cylinder = data_cylinder[data_cylinder["filename"].isin(data_obj["filename"].values)]
data_gt = data_gt[data_gt["filename"].isin(data_obj["filename"].values)]

data_mask = data_mask[data_mask["filename_msk"].isin(data_gt["filename"].values)]
data_ellipse = data_ellipse[data_ellipse["filename"].isin(data_gt["filename"].values)]
data_cylinder = data_cylinder[data_cylinder["filename"].isin(data_gt["filename"].values)]
data_obj = data_obj[data_obj["filename"].isin(data_gt["filename"].values)]

print(data_mask.shape)
print(data_obj.shape)
print(data_ellipse.shape)
print(data_cylinder.shape)
print(data_gt.shape)


In [None]:
filenames = data_obj["filename"].values
AE_vol = []; ellipse_vol = []; cylinder_vol = []; gt_vol = []; mask_area = []; fname = []

for filename in filenames: 
    AE_vol.append(data_obj[data_obj["filename"]==filename]["volume"].values[0])
    ellipse_vol.append(data_ellipse[data_ellipse["filename"]==filename]["volume"].values[0])
    cylinder_vol.append(data_cylinder[data_cylinder["filename"]==filename]["volume"].values[0])
    gt_vol.append(data_gt[data_gt["filename"]==filename]["volume"].values[0])
    mask_area.append(data_mask[data_mask["filename_msk"]==filename]["boundary_msk"].values[0])
    fname.append(filename)
AE_vol_error = [np.abs((AE_vol[i]-gt_vol[i])/gt_vol[i]) for i in range(len(gt_vol))]
ellipse_vol_error = [np.abs((ellipse_vol[i]-gt_vol[i])/gt_vol[i]) for i in range(len(ellipse_vol))]
cylinder_vol_error = [np.abs((cylinder_vol[i]-gt_vol[i])/gt_vol[i]) for i in range(len(cylinder_vol))]
print(len(AE_vol_error))
print(len(ellipse_vol_error))
print(len(cylinder_vol_error))

In [None]:
print(np.mean(AE_vol_error)*100, np.std(AE_vol_error)*100)
print(np.mean(cylinder_vol_error)*100,np.std(cylinder_vol_error)*100)
print(np.mean(ellipse_vol_error)*100,np.std(ellipse_vol_error)*100)
print("median")
print(np.median(AE_vol_error)*100)
print(np.median(cylinder_vol_error)*100)
print(np.median(ellipse_vol_error)*100)
from scipy.stats import wilcoxon
print(wilcoxon(AE_vol_error,cylinder_vol_error))
print(wilcoxon(AE_vol_error,ellipse_vol_error))
plt.figure(figsize =(8,6))
ax = sns.violinplot(data = [AE_vol_error, cylinder_vol_error, ellipse_vol_error], 
            showfliers=False,color='lightgray', boxprops={'facecolor':'None'},orient = "h")
ax = sns.swarmplot(data = [AE_vol_error, cylinder_vol_error, ellipse_vol_error],color=".25", size = 1.5, orient = "h")
#plt.title('Volume error', size = 15)
plt.xlabel('Wrongly labeled voxels', size = 15)
plt.yticks([0, 1, 2], ["ShapeAE","Cylinder",
                       "Ellipse"], size = 15)

plt.xlim(-0.15,1.3)

#plt.yscale("log")
plt.grid(b=None)

plt.locator_params(axis='x', nbins=4)
plt.tight_layout()
plt.grid(b=None)




plt.show()

In [None]:
for i,fn in enumerate(fname): 
    if AE_vol_error[i] > 1.5:
        print(fn)

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(mask_area, AE_vol, s = 4, c = "r", alpha = 0.5)
plt.scatter(mask_area, ellipse_vol, s = 4, c = "b", alpha = 0.5)
plt.scatter(mask_area, cylinder_vol, s =4, c = "g", alpha = 0.5)
plt.scatter(mask_area, gt_vol, s = 4, c = "black", alpha = 0.5)
plt.tight_layout()
plt.grid(b=None)
plt.locator_params(axis='y', nbins=5)
plt.locator_params(axis='x', nbins=5)
plt.grid(b=None)
plt.show()

In [None]:
position_csv = pd.read_csv("./ShapeAE_dataset/mask_positions.csv")
z_position = []
for fn in fname:
    z_position.append(position_csv[position_csv["filename"]==fn]["organoid_z_center_of_mass"].values[0])

In [None]:
z_position_shifted = [z-22 for z in z_position]
z_position_shifted = np.abs(z_position_shifted)
plt.figure(figsize = (6,6))
plt.scatter(AE_vol_error, z_position_shifted, s = 4, c = "black", alpha = 0.5)
plt.tight_layout()
plt.grid(b=None)
plt.locator_params(axis='y', nbins=5)
plt.locator_params(axis='x', nbins=5)
plt.grid(b=None)
plt.xlim(-0.05,1.3)

plt.show()