# Disector volumes
Read the newly counted disector volumes

In [1]:
#Load the data and set up notebook
import platform
import glob
import os
import pandas
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

In [2]:
plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w
plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
plt.rcParams['savefig.transparent'] = True  # Save figures with transparent background

In [3]:
# We copied everything from nas_schittny and the terastation to 'fast SSD'.
# Load the data from there
if 'debian' in platform.dist():
    drive = '/media/habi/Fast_SSD/'
else:
    drive = 'F:\\'
# Load the data from this folder
RootPath = drive + os.path.join('Acini')
print('We are loading all the data from %s' % RootPath)

We are loading all the data from /media/habi/Fast_SSD/Acini


In [4]:
# Get a list of all the STEPanizer export files from Eveline
# Based on https://stackoverflow.com/a/14798263
StepanizerFiles_Eveline = sorted(glob.glob(os.path.join(RootPath, '**/*201[1234567]*.xls'), recursive=True))

In [5]:
print('Eveline counted the alveoli in %s acini' % len(StepanizerFiles_Eveline))

Eveline counted the alveoli in 287 acini


In [6]:
Eveline = pandas.DataFrame({'Location': StepanizerFiles_Eveline})
Eveline['Filename'] = [os.path.basename(f) for f in StepanizerFiles_Eveline]
Eveline['Beamtime'] = [os.path.dirname(f).split('Acini')[1].split(os.sep)[1] for f in StepanizerFiles_Eveline]
Eveline['Sample'] = [os.path.basename(f).split('-acinus')[0][1:] for f in StepanizerFiles_Eveline]
Eveline['Animal'] = [os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:3] for f in StepanizerFiles_Eveline]
Eveline['Day'] = [int(os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:2]) for f in StepanizerFiles_Eveline]
Eveline['Acinus'] = [int(os.path.basename(f).split('acinus')[1].split('_')[0]) for f in StepanizerFiles_Eveline]

In [7]:
# Get a list of *all* the excel files I counted are from the STEPanizer
# Differing to the ones from Eveline, we only have '2018' in the file name...
StepanizerFiles_David = sorted(glob.glob(os.path.join(RootPath, '**/*2018*.xls'), recursive=True))

In [8]:
print('David assessed the disector volume in %s acini' % len(StepanizerFiles_David))

David assessed the disector volume in 287 acini


In [9]:
David = pandas.DataFrame({'Location': StepanizerFiles_David})
David['Filename'] = [os.path.basename(f) for f in StepanizerFiles_David]
David['Beamtime'] = [os.path.dirname(f).split('Acini')[1].split(os.sep)[1] for f in StepanizerFiles_David]
David['Sample'] = [os.path.basename(f).split('-acinus')[0][1:] for f in StepanizerFiles_David]
David['Animal'] = [os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:3] for f in StepanizerFiles_David]
David['Day'] = [int(os.path.basename(f).split('_R108C')[1].split('mrg-')[0][:2]) for f in StepanizerFiles_David]
David['Acinus'] = [int(os.path.basename(f).split('acinus')[1].split('_')[0]) for f in StepanizerFiles_David]

In [10]:
David.groupby(by=['Day', 'Animal'])['Acinus'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Day,Animal,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4,04A,50.0,54.24,22.463176,11.0,36.5,56.5,74.75,88.0
4,04B,23.0,26.913043,10.413772,3.0,18.5,29.0,35.5,41.0
4,04C,51.0,29.372549,18.909216,0.0,13.5,27.0,45.5,63.0
10,10A,27.0,26.703704,14.940433,2.0,13.5,25.0,39.0,53.0
10,10B,14.0,22.285714,12.862758,4.0,12.5,20.0,31.25,43.0
10,10C,17.0,40.647059,26.636303,1.0,20.0,38.0,61.0,86.0
21,21B,14.0,27.214286,17.742511,2.0,11.75,28.5,41.25,54.0
21,21D,17.0,46.764706,30.173518,1.0,21.0,49.0,68.0,98.0
21,21E,11.0,27.545455,18.468154,1.0,13.0,27.0,41.5,56.0
60,60B,24.0,15.5,10.121007,0.0,7.5,14.5,24.5,31.0


In [11]:
# Merge 'Eveline' and 'David' so we know what is still to do
# Based on https://stackoverflow.com/a/33350050/323100
StillToDo = pandas.merge(Eveline, David,
                         on=['Animal', 'Acinus', 'Day', 'Beamtime', 'Sample'],
                         how='outer', suffixes=['_Eveline', '_David'],
                         indicator=True)
StillToDo = StillToDo[StillToDo._merge != 'both']
print('We still need to assess the disector volume in %s acini...' % len(StillToDo))

We still need to assess the disector volume in 0 acini...


In [12]:
# Merge 'Eveline' and 'David' so have the one we already did
# Based on https://stackoverflow.com/a/33350050/323100
Done = pandas.merge(Eveline, David,
                         on=['Animal', 'Acinus', 'Day', 'Beamtime', 'Sample'],
                         how='inner', suffixes=['_Eveline', '_David'],
                         indicator=True)
print('We have the data of %s acini...' % len(Done))

We have the data of 287 acini...


In [13]:
# Get last image file
StillToDo['LastFile'] = [os.path.basename(sorted(glob.glob(os.path.join(os.path.dirname(location),
                                                                         '*_??_b.jpg')))[-1]) for
                          location in StillToDo.Location_Eveline]

In [14]:
# See if we have more than 99 images...
StillToDo['LastImage'] = [[int(os.path.basename(i).split('_')[-2]) for i in glob.glob(os.path.join(os.path.dirname(location),
                                          '*.jpg'))] for
                          location in StillToDo.Location_Eveline]
StillToDo['LastImage'] = [max(li) for li in StillToDo['LastImage']]

In [15]:
# Print the 'Still to do' counts in random order.
# Use this order to assess the disector volume
print('From the %s acini still to count, here are some, randomly selected' % len(StillToDo))
try:
    StillToDo.sample(n=len(StillToDo))[['Beamtime', 'Sample', 'Acinus', 'LastImage', 'LastFile']]
except ValueError:
    print('We are all done!')

From the 0 acini still to count, here are some, randomly selected
We are all done!
