In [None]:
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os
import re
import pandas as pd
import matplotlib as mpl
from tifffile import imread

import yaml
       

In [None]:
output_folder = Path('output')
if not output_folder.is_dir():
    os.makedirs(str(output_folder))

In [None]:
def readDataset(datasetName, verbose=False):

    data_classes = ['images', 'masks']
    data_purposes = ['train', 'valid', 'test']
    
    verbose and print(os.path.join(datasetName, data_purposes[0], data_classes[0], '*.tif'))
    
    X, Y = tuple(
        {data_purpose:
            [imread(x) for x in sorted((Path(datasetName) / data_purpose / data_class).glob('*.tif'))]
        for data_purpose in data_purposes} for data_class in data_classes)

    return X, Y

In [None]:
with open('D:/Users/Eric/src/stardist_mpcdf/config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

datasetname = 'full_semimanual-raw'
outputname = 'test.png'

final_models = config['cellpose_models_raw_full_low']
final_models

def get_minor_models(modelname):
    
    tmp = modelname.split('_ep')[-1].split('_dep')
    epochs = int(tmp[0])
    delta_epochs = int(tmp[-1])
    minor_models = []
    
    for ep in range(epochs, 1, -delta_epochs):
        minor_models.append(modelname.replace('_ep500', f'_ep{ep}'))
        
    return minor_models

modelname = 'cellpose_patches-semimanual-raw-64x128x128_True_25prc_rep1_ep500_dep125'

models = []
for modelname in final_models:
    for m in get_minor_models(modelname):
        models.append(m)

accuracy_files = [Path(r'Y:\Eric\2021_Iterative_Biofilm_Annotation\data') / m / 'accuracy_manual_raw_v3.csv' for m in models]
#accuracy_files = [f'data/{m}/accuracy_full_semimanual-raw.csv' for m in models]

In [None]:
accuracy_files = [Path(f).parent for f in accuracy_files if Path(f).is_file()]


df = pd.DataFrame(columns=['path', 'type', 'percentage', 'replicate', 'epoch', 'cell_number', 'accuracy_manual', 'accuracy_semimanual'])

p = '.*True_(?P<percentage>[\d\.]+)prc_rep(?P<replicate>\d+)_ep(?P<epoch>\d+)_dep.*'
pattern = re.compile(p)

for f in accuracy_files:
    match = pattern.match(str(f))
    df = df.append({'path':str(f) , 'type':'cellpose', **match.groupdict()}, ignore_index=True)

In [None]:
df

# Get stardist accuracies

In [None]:
stardist_models = config['stardist_models_dependency'] # stardist_models_raw

In [None]:
accuracy_files = [f'Y:/Eric/2021_Iterative_Biofilm_Annotation/data/{m}/accuracy_full_semimanual-raw.csv' for m in stardist_models]
accuracy_files = [Path(f).parent for f in accuracy_files if Path(f).is_file()]

In [None]:
accuracy_files

In [None]:
p = '.*True_(?P<percentage>[\d\.]+)prc_rep(?P<replicate>\d+)'
pattern = re.compile(p)

for f in accuracy_files:
    match = pattern.match(str(f))
    df = df.append({'path':str(f) , 'type':'stardist', 'epoch':500, **match.groupdict()}, ignore_index=True)

In [None]:
df

In [None]:
Y = readDataset(r'Y:\Eric\2021_Iterative_Biofilm_Annotation\datasets\patches-semimanual-raw-64x128x128')[1]

Y['test'] = []
Y['valid'] = []

for s in Y.keys():
    sum_Y = [np.sum(y) for y in Y[s]]
    Y[s] = [Y[s][i] for i in range(len(Y[s])) if sum_Y[i] > 0]

In [None]:
N_cells = [len(np.unique(y))-1 for y in Y['train']]

In [None]:
for index, row in df.iterrows():
    seed = int(row.replicate) if row.type == 'cellpose' else 42
    rng = np.random.RandomState(int(row.replicate))
    ind = rng.permutation(len(Y['train']))
    n_val = max(1, int(round(float(row.percentage) / 100 * len(ind))))
    df.iloc[index]['cell_number'] = np.sum([N_cells[i] for i in ind[:n_val]])
    
    for data_name, col in zip(['accuracy_manual_raw_v3.csv', 'accuracy_full_semimanual-raw.csv'], ['accuracy_manual', 'accuracy_semimanual']):
    #for data_name, col in zip(['accuracy_full_semimanual-raw.csv', 'accuracy_full_semimanual-raw.csv'], ['accuracy_manual', 'accuracy_semimanual']):
        if (Path(row.path) / data_name).is_file():
            data = np.genfromtxt(Path(row.path) / data_name, delimiter=' ')
            df.iloc[index][col] = data[1][np.where(data[0]==0.5)[0]][0]

        else:
            df.iloc[index][col] = np.nan

df

In [None]:
df[df.type == 'cellpose'].replicate.unique()

In [None]:
df.epoch.unique()

df.replicate = df.replicate.astype(int)
df.epoch = df.epoch.astype(float)

marker_dict = ['^', 'o', 's', 'P', 'd']

cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=0, vmax=500)

f, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 5))
    
for i in df[df.type == 'cellpose'].replicate.unique():
    
    selection = (df.replicate == i) & (df.type == 'cellpose')
    s1 = ax1.scatter(df.cell_number[selection], df.accuracy_manual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
    s2 = ax2.scatter(df.cell_number[selection], df.accuracy_semimanual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')

        
    
        
ax1.legend()
ax1.set_xlabel('Number of cells in trainingset')
ax1.set_ylabel('Accuracy (at IoU = 0.5)')
ax1.set_title('manual_raw_v3')

ax2.legend()
ax2.set_xlabel('Number of cells in trainingset')
ax2.set_ylabel('Accuracy (at IoU = 0.5)')
ax2.set_title('full_semimanual-raw')

y_min = np.min([ax1.get_ylim()[0], ax2.get_ylim()[0]])
y_max = np.max([ax1.get_ylim()[1], ax2.get_ylim()[1]])

ax1.set_ylim(y_min, y_max)
ax2.set_ylim(y_min, y_max)
        
cb1 = f.colorbar(s1, ax=ax1, cmap=cmap, norm=norm)
cb1.set_label('Epochs')

cb2 = f.colorbar(s2, ax=ax2, cmap=cmap, norm=norm)
cb2.set_label('Epochs')


plt.savefig(str(output_folder / 'accuracy_cellpose_data_dependence_all.png'))

In [None]:
df.epoch.unique()

df.replicate = df.replicate.astype(int)
df.epoch = df.epoch.astype(float)

marker_dict = ['^', 'o', 's', 'P', 'd']

cmap = mpl.cm.cool
norm = mpl.colors.Normalize()

f, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 5))
    
for i in df[df.type == 'cellpose'].replicate.unique():
    
    selection = (df.replicate == i) & (df.type == 'cellpose')
    s1 = ax1.scatter(df.epoch[selection], df.accuracy_manual[selection], cmap=cmap, norm=norm, c=df.cell_number[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
    s2 = ax2.scatter(df.epoch[selection], df.accuracy_semimanual[selection], cmap=cmap, norm=norm, c=df.cell_number[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')

        
    
        
ax1.legend()
ax1.set_xlabel('Number of cells in trainingset')
ax1.set_ylabel('Accuracy (at IoU = 0.5)')
ax1.set_title('manual_raw_v3')

ax2.legend()
ax2.set_xlabel('Number of cells in trainingset')
ax2.set_ylabel('Accuracy (at IoU = 0.5)')
ax2.set_title('full_semimanual-raw')

y_min = np.min([ax1.get_ylim()[0], ax2.get_ylim()[0]])
y_max = np.max([ax1.get_ylim()[1], ax2.get_ylim()[1]])

#ax1.set_ylim(y_min, y_max)
#ax2.set_ylim(y_min, y_max)
        
cb1 = f.colorbar(s1, ax=ax1, cmap=cmap, norm=norm)
cb1.set_label('Epochs')

cb2 = f.colorbar(s2, ax=ax2, cmap=cmap, norm=norm)
cb2.set_label('Epochs')


plt.savefig(str(output_folder / 'accuracy_cellpose_data_dependence_all.png'))

In [None]:
df.epoch.unique()

df.replicate = df.replicate.astype(int)
df.epoch = df.epoch.astype(float)

marker_dict = ['^', 'o', 's', 'P', 'd']

cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=0, vmax=500)

f, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 5))
    
#for i in df[df.type=='cellpose'].replicate.unique():
for i in [1, 2, 3]:
    
    #selection = (df.replicate == i)
    #s1 = ax1.scatter(df.cell_number[selection], df.accuracy_manual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
    #s2 = ax2.scatter(df.cell_number[selection], df.accuracy_semimanual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')
    
    selection = (df.replicate == i) & (df.epoch == 500) & (df.type == 'cellpose')
    s1 = ax1.plot(df.cell_number[selection], df.accuracy_manual[selection], marker=marker_dict[i-1], label=f'Replicate {i}')
    s2 = ax2.plot(df.cell_number[selection], df.accuracy_semimanual[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')
        
    
        
ax1.legend()
ax1.set_xlabel('Number of cells in trainingset')
ax1.set_ylabel('Accuracy (at IoU = 0.5)')
ax1.set_title('manual_raw_v3')

ax2.legend()
ax2.set_xlabel('Number of cells in trainingset')
ax2.set_ylabel('Accuracy (at IoU = 0.5)')
ax2.set_title('full_semimanual-raw')

y_min = np.min([ax1.get_ylim()[0], ax2.get_ylim()[0]])
y_max = np.max([ax1.get_ylim()[1], ax2.get_ylim()[1]])

ax1.set_ylim(y_min, y_max)
ax2.set_ylim(y_min, y_max)
        
#cb1 = f.colorbar(s1, ax=ax1, cmap=cmap, norm=norm)
#cb1.set_label('Epochs')

#cb2 = f.colorbar(s2, ax=ax2, cmap=cmap, norm=norm)
#cb2.set_label('Epochs')


plt.savefig(str(output_folder / 'accuracy_cellpose_data_dependence.png'))

In [None]:
df = df.astype({'accuracy_manual': 'float', 'accuracy_semimanual':'float', 'percentage':'float'})

In [None]:
df[df.epoch==500].groupby(['percentage', 'type'], as_index=False)['accuracy_manual', 'accuracy_semimanual'].mean()

In [None]:
#df_ = df[(df.epoch==500)].groupby(['percentage', 'type'], as_index=False)['accuracy_manual'].agg({'acc_std':'std', 'acc_mean':'mean'})
df_ = df[(df.epoch==500)].groupby(['percentage', 'type'], as_index=False)['accuracy_semimanual'].agg({'acc_std':'std', 'acc_mean':'mean'})

In [None]:
df_n = df[(df.epoch==500)].groupby(['percentage', 'type'], as_index=False).agg(lambda x: np.mean(x))

In [None]:
df_n

In [None]:
f, ax1 = plt.subplots(1)


selection = (df_n.type == 'stardist')

ax1.errorbar(df_n[selection].cell_number, df_[selection]['acc_mean'], yerr=df_[selection]['acc_std'], label='stardist')
ax1.set_xlabel('Cell number')
ax1.set_ylabel('accuracy [a.u.]')

selection = (df_n.type == 'cellpose')

l = ax1.errorbar(df_n[selection].cell_number, df_[selection]['acc_mean'],
                 yerr=df_[selection]['acc_std'], ls='--')[0]
ax1.set_xlabel('Cell number')
ax1.set_ylabel('accuracy [a.u.]')


selection = (df_n.type == 'cellpose') & (df_n.percentage <= 25) & (np.logical_not(df_.acc_mean.isnull()))

print(df_[selection]['acc_mean'])

ax1.errorbar(df_n[selection].cell_number, df_[selection]['acc_mean'],
             yerr=df_[selection]['acc_std'], color=l.get_color(), ls='-',
             label='cellpose')
ax1.set_xlabel('Cell number')
ax1.set_ylabel('accuracy [a.u.]')
ax1.legend()
ax1.grid()


#for rep in range(1, 4):
#    df_rep = df[(df.percentage <= 25) & (df.replicate==rep) & (df.epoch == 500)]
#    ax.plot(df_rep.cell_number, df_rep.accuracy_semimanual)

plt.savefig(str(output_folder / 'data_dependence_full.png'))
plt.savefig(str(output_folder / 'data_dependence_full.svg'))

In [None]:
f, ax1 = plt.subplots(1)

selection = (df_n.type == 'stardist') & (df_n.percentage <= 25)

ax1.errorbar(df_n[selection].cell_number, df_[selection]['acc_mean'], yerr=df_[selection]['acc_std'], label='stardist')
ax1.set_xlabel('Cell number')
ax1.set_ylabel('accuracy [a.u.]')

selection = (df_n.type == 'cellpose') & (df_n.percentage <= 25) & (np.logical_not(df_.acc_mean.isnull()))

ax1.errorbar(df_n[selection].cell_number, df_[selection]['acc_mean'], yerr=df_[selection]['acc_std'], color=l.get_color(), ls='-', label='cellpose')
ax1.set_xlabel('Cell number')
ax1.set_ylabel('accuracy [a.u.]')
ax1.legend()
ax1.grid()

plt.savefig(str(output_folder / 'data_dependence_limited.png'))
plt.savefig(str(output_folder / 'data_dependence_limited.svg'))

In [None]:
for index, row in df.iterrows():
    seed = int(row.replicate) if row.type == 'cellpose' else 42
    rng = np.random.RandomState(int(row.replicate))
    ind = rng.permutation(len(Y['train']))
    n_val = max(1, int(round(float(row.percentage) / 100 * len(ind))))
    df.iloc[index]['cell_number'] = np.sum([N_cells[i] for i in ind[:n_val]])
    
    for data_name, col in zip(['accuracy_manual_raw_v3.csv', 'accuracy_full_semimanual-raw.csv'], ['accuracy_manual', 'accuracy_semimanual']):
    #for data_name, col in zip(['accuracy_full_semimanual-raw.csv', 'accuracy_full_semimanual-raw.csv'], ['accuracy_manual', 'accuracy_semimanual']):
        if (Path(row.path) / data_name).is_file():
            data = np.genfromtxt(Path(row.path) / data_name, delimiter=' ')
            df.iloc[index][col] = data[1][np.where(data[0]==0.5)[0]][0]

        else:
            df.iloc[index][col] = np.nan

df

In [None]:


# read default cellpose iterative training

cellpose_vals = []
for i, row in df[(df.type=='cellpose') & (df.percentage == 100) & (df.epoch==500)].iterrows():
    acc_file = sorted(Path(row.path).glob('accuracy_full_semimanual-raw.csv'))
    data = np.genfromtxt(str(acc_file[0]), delimiter=' ')
    cellpose_vals.append(data[1])

# read horovod
acc_files_horovod = sorted(Path('data').glob('horovod*prc100*\accuracy_full_semimanual-raw.csv'))

horovod_vals = []
for acc_file in acc_files_horovod:
    data = np.genfromtxt(str(acc_file), delimiter=' ')
    horovod_vals.append(data[1])

tau_vals = data[0]

horovod_mean = np.mean(horovod_vals, axis=0)
cellpose_mean = np.mean(cellpose_vals, axis=0)

horovod_std =  np.std(horovod_vals, axis=0)
cellpose_std =  np.std(cellpose_vals, axis=0)
    
f, ax = plt.subplots(1)
h, = ax.plot(tau_vals, horovod_mean, label='horovod')
c, = ax.plot(tau_vals, cellpose_mean, label='cellpose')

ax.fill_between(tau_vals, horovod_mean - horovod_std, horovod_mean + horovod_std,
    color=h.get_color(), alpha=0.2)

ax.fill_between(tau_vals, cellpose_mean - cellpose_std, cellpose_mean + cellpose_std,
    color=c.get_color(), alpha=0.2)
            

ax.legend()
ax.grid()

In [None]:
df.epoch.unique()

df.replicate = df.replicate.astype(int)
df.epoch = df.epoch.astype(float)

marker_dict = ['^', 'o', 's', 'P', 'd']

cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=0, vmax=500)

f, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 5))
    
#for i in df[df.type=='cellpose'].replicate.unique():
for i in [1, 2, 3]:
    
    #selection = (df.replicate == i)
    #s1 = ax1.scatter(df.cell_number[selection], df.accuracy_manual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
    #s2 = ax2.scatter(df.cell_number[selection], df.accuracy_semimanual[selection], cmap=cmap, norm=norm, c=df.epoch[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')
    
    selection = (df.replicate == i) & (df.epoch == 500) & (df.type == 'cellpose')
    s1 = ax1.plot(df.cell_number[selection], df.accuracy_manual[selection], marker=marker_dict[i-1], label=f'Replicate {i}')
    s2 = ax2.plot(df.cell_number[selection], df.accuracy_semimanual[selection] , marker=marker_dict[i-1], label=f'Replicate {i}')
        #ax.set_xscale('log')
        
    
        
ax1.legend()
ax1.set_xlabel('Number of cells in trainingset')
ax1.set_ylabel('Accuracy (at IoU = 0.5)')
ax1.set_title('manual_raw_v3')

ax2.legend()
ax2.set_xlabel('Number of cells in trainingset')
ax2.set_ylabel('Accuracy (at IoU = 0.5)')
ax2.set_title('full_semimanual-raw')

y_min = np.min([ax1.get_ylim()[0], ax2.get_ylim()[0]])
y_max = np.max([ax1.get_ylim()[1], ax2.get_ylim()[1]])

ax1.set_ylim(y_min, y_max)
ax2.set_ylim(y_min, y_max)
        
#cb1 = f.colorbar(s1, ax=ax1, cmap=cmap, norm=norm)
#cb1.set_label('Epochs')

#cb2 = f.colorbar(s2, ax=ax2, cmap=cmap, norm=norm)
#cb2.set_label('Epochs')


plt.savefig('results/accuracy_cellpose_data_dependence.png')