# Disector volumes
Read the newly counted disector volumes

In [1]:
#Load the data and set up notebook
import platform
import glob
import os
import pandas
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

In [2]:
plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w
plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
plt.rcParams['savefig.transparent'] = True  # Save figures with transparent background

In [3]:
# We copied everything from nas_schittny and the terastation to 'fast SSD'.
# Load the data from there
if 'debian' in platform.dist():
    drive = '/media/habi/Fast_SSD/'
else:
    drive = '\\\\anatera4\\'
# Load the data from this folder
RootPath = drive + os.path.join('Acini')
print('We are loading all the data from %s' % RootPath)

We are loading all the data from /media/habi/Fast_SSD/Acini


In [4]:
# Get a list of all the STEPanizer export files from Eveline
# Based on https://stackoverflow.com/a/14798263
StepanizerFiles_Eveline = sorted(glob.glob(os.path.join(RootPath, '**/*201[1234567]*.xls'), recursive=True))

In [5]:
print('Eveline counted the alveoli in %s acini' % len(StepanizerFiles_Eveline))

Eveline counted the alveoli in 287 acini


In [6]:
Eveline = pandas.DataFrame({'Location': StepanizerFiles_Eveline})
Eveline['Filename'] = [os.path.basename(f) for f in StepanizerFiles_Eveline]
Eveline['Beamtime'] = [os.path.dirname(f).split('Acini')[1].split(os.sep)[1] for f in StepanizerFiles_Eveline]
Eveline['Sample'] = [os.path.basename(f).split('-acinus')[0][1:] for f in StepanizerFiles_Eveline]
Eveline['Animal'] = [os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:3] for f in StepanizerFiles_Eveline]
Eveline['Day'] = [int(os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:2]) for f in StepanizerFiles_Eveline]
Eveline['Acinus'] = [int(os.path.basename(f).split('acinus')[1].split('_')[0]) for f in StepanizerFiles_Eveline]

In [7]:
# Get a list of *all* the excel files I counted are from the STEPanizer
# Differing to the ones from Eveline, we only have '2018' in the file name...
StepanizerFiles_David = sorted(glob.glob(os.path.join(RootPath, '**/*2018*.xls'), recursive=True))

In [8]:
print('David assessed the disector volume in %s acini' % len(StepanizerFiles_David))

David assessed the disector volume in 142 acini


In [9]:
David = pandas.DataFrame({'Location': StepanizerFiles_David})
David['Filename'] = [os.path.basename(f) for f in StepanizerFiles_David]
David['Beamtime'] = [os.path.dirname(f).split('Acini')[1].split(os.sep)[1] for f in StepanizerFiles_David]
David['Sample'] = [os.path.basename(f).split('-acinus')[0][1:] for f in StepanizerFiles_David]
David['Animal'] = [os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:3] for f in StepanizerFiles_David]
David['Day'] = [int(os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:2]) for f in StepanizerFiles_David]
David['Acinus'] = [int(os.path.basename(f).split('acinus')[1].split('_')[0]) for f in StepanizerFiles_David]

In [10]:
David.groupby(by=['Day', 'Animal'])['Acinus'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Day,Animal,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4,04A,21.0,47.666667,24.081805,11.0,26.0,47.0,74.0,83.0
4,04B,13.0,24.461538,11.573068,3.0,16.0,26.0,34.0,40.0
4,04C,25.0,29.16,19.787791,0.0,12.0,26.0,47.0,63.0
10,10A,15.0,27.866667,16.043987,9.0,15.0,22.0,43.5,53.0
10,10B,8.0,23.0,14.735768,4.0,12.25,22.5,33.75,43.0
10,10C,10.0,46.6,27.309746,1.0,27.5,53.5,64.0,86.0
21,21B,9.0,22.666667,16.881943,2.0,11.0,17.0,42.0,46.0
21,21D,10.0,40.8,31.981939,1.0,17.25,32.5,65.5,91.0
21,21E,4.0,41.75,17.745892,16.0,37.75,47.5,51.5,56.0
60,60B,11.0,16.181818,11.223352,0.0,7.0,20.0,24.5,30.0


In [11]:
# Merge 'Eveline' and 'David' so we know what is still to do
# Based on https://stackoverflow.com/a/33350050/323100
StillToDo = pandas.merge(Eveline, David,
                         on=['Animal', 'Acinus', 'Day', 'Beamtime', 'Sample'],
                         how='outer', suffixes=['_Eveline', '_David'],
                         indicator=True)
StillToDo = StillToDo[StillToDo._merge != 'both']
print('We still need to assess the disector volume in %s acini...' % len(StillToDo))

We still need to assess the disector volume in 146 acini...


In [12]:
# Merge 'Eveline' and 'David' so have the one we already did
# Based on https://stackoverflow.com/a/33350050/323100
Done = pandas.merge(Eveline, David,
                         on=['Animal', 'Acinus', 'Day', 'Beamtime', 'Sample'],
                         how='inner', suffixes=['_Eveline', '_David'],
                         indicator=True)
print('We have the data of %s acini...' % len(Done))

We have the data of 142 acini...


In [13]:
# Get last image file
StillToDo['LastFile'] = [os.path.basename(sorted(glob.glob(os.path.join(os.path.dirname(location),
                                                                         '*_??_b.jpg')))[-1]) for
                          location in StillToDo.Location_Eveline]

In [14]:
# See if we have more than 99 images...
StillToDo['LastImage'] = [[int(os.path.basename(i).split('_')[-2]) for i in glob.glob(os.path.join(os.path.dirname(location),
                                          '*.jpg'))] for
                          location in StillToDo.Location_Eveline]
StillToDo['LastImage'] = [max(li) for li in StillToDo['LastImage']]

In [15]:
# Print the 'Still to do' counts in random order.
# Use this order to assess the disector volume
print('From the %s acini still to count, here are some, randomly selected' % len(StillToDo))
StillToDo.sample(n=5)[['Beamtime', 'Sample', 'Acinus', 'LastImage', 'LastFile']]

From the 146 acini still to count, here are some, randomly selected


Unnamed: 0,Beamtime,Sample,Acinus,LastImage,LastFile
2,2009f,R108C60Dt-mrg,7,120,R108C60Dt-mrg-acinus07_99_b.jpg
78,2010a,R108C04Bt-mrg,25,37,R108C04Bt-mrg-acinus25_37_b.jpg
281,2010c_tape,R108C60B_B1_mrg,24,93,R108C60B_B1_mrg-acinus24_93_b.jpg
17,2009f,R108C60Et-mrg,13,87,R108C60Et-mrg-acinus13_87_b.jpg
106,2010a,R108C04Ct-mrg,15,39,R108C04Ct-mrg-acinus15_39_b.jpg
