# Handle and check the 'data' of the all the scans we did
Wrestle with the data, check parameters and generate some helping files

In [1]:
import platform
import os
import glob
import pandas
from tqdm import notebook

In [2]:
# # Set dask temporary folder
# # Do this before creating a client: https://stackoverflow.com/a/62804525/323100
# import tempfile
# if 'Linux' in platform.system():
#     tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
# elif 'Darwin' in platform.system():
#     tmp = tempfile.gettempdir()
# else:
#     if 'anaklin' in platform.node():
#         tmp = os.path.join('F:\\')
#     else:
#         tmp = os.path.join('D:\\')
# dask.config.set({'temporary_directory': os.path.join(tmp, 'tmp')})
# print('Dask temporarry files go to %s' % dask.config.get('temporary_directory'))

In [3]:
# # Start cluster and client now, after setting tempdir
# cluster = LocalCluster(n_workers=8)
# client = Client(cluster)

In [4]:
# print('You can seee what DASK is doing at "http://localhost:%s/status"' % client.scheduler_info()['services']['dashboard'])

In [5]:
# # Ignore warnings in the notebook
# import warnings
# warnings.filterwarnings("ignore")

In [6]:
# # Set up figure defaults
# plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w and with 'nearest' interpolation
# plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
# plt.rcParams['figure.dpi'] = 200

In [7]:
# # Setup scale bar defaults
# plt.rcParams['scalebar.location'] = 'lower right'
# plt.rcParams['scalebar.frameon'] = False
# plt.rcParams['scalebar.color'] = 'white'

In [8]:
# # Display all plots identically
# lines = 3
# # And then do something like
# # plt.subplot(lines, numpy.ceil(len(Data) / float(lines)), c + 1)

In [9]:
# Different locations if running either on Linux or Windows
Archive = False # Load the data directly from the iee-research_storage drive
# to speed things up significantly
if Archive:
    if 'Linux' in platform.system():
        BasePath = os.path.join(os.sep, 'home', 'habi', 'research-storage-uct', 'Archiv_Tape')
    elif 'Windows' in platform.system():
        BasePath = os.path.join('R:\\Archiv_Tape')
else:
    BasePath = os.path.join(os.getcwd(), 'Data')
Root = os.path.join(BasePath, 'Liver-Semela')
print('We are loading all the data from %s' % Root)

We are loading all the data from /home/habi/P/Documents/Semela-Liver/Data/Liver-Semela


In [10]:
def get_pixelsize(logfile):
    """Get the pixel size from the scan log file"""
    pixelsize=None    
    with open(logfile, 'r') as f:
        for line in f:
            if 'Image Pixel' in line and 'Scaled' not in line:
                pixelsize = float(line.split('=')[1])
    return(pixelsize)

In [11]:
def get_operator(logfile):
    """Get the operator who scanned the samples"""
    operator = None
    with open(logfile, 'r') as f:
        for line in f:
            if 'User Name' in line:
                operator = line.split('=')[1].strip()
    return(operator)

In [12]:
def get_experiment(i):
    '''Categorize  into 'Notch' or 'Control' '''
    if 'notch' in i:
        return 'Notch'
    if 'ctrl' in i:
        return 'Control'

In [13]:
def get_vein(i):
    if 'portal' in i:
        return 'Portal'
    elif 'cava' in i:
        return 'Cava'
    else:
        return None

In [14]:
def get_git_hash():
    '''
    Get the current git hash from the repository.
    Based on http://stackoverflow.com/a/949391/323100 and
    http://stackoverflow.com/a/18283905/323100
    '''
    from subprocess import Popen, PIPE
    import os
    gitprocess = Popen(['git',
                        '--git-dir',
                        os.path.join(os.getcwd(), '.git'),
                        'rev-parse',
                        '--short',
                        '--verify',
                        'HEAD'],
                       stdout=PIPE)
    (output, _) = gitprocess.communicate()
    return output.strip().decode("utf-8")

In [15]:
# # Make directory for output
# OutPutDir = os.path.join(os.getcwd(), 'Output', get_git_hash())
# print('We are saving all the output to %s' % OutPutDir)
# os.makedirs(OutPutDir, exist_ok=True)

Mario Novkovic told us that 
> We have used the ds17 livers in the paper, specifically ctrl4 and notch1_2 in the first batch (training dataset), while the second batch consisted of 3 datasets from each mouse type: ctrl1, ctrl2, ctrl5 and notch1_1, notch1_3, notch1_4.

So let's only use *those* folders for the remainder of the notebook.
We copied all the relevant data from the archive to the `Data`-subfolder here with
````bash
rsync --verbose --recursive --times --update --omit-dir-times --include="*/" --include="*.?og" --include="*.c?v" --include="*.?oi" --include="*.?at" --include="*_spr*.bmp" --include="*.txt" --include="*.md" --include="*.mp" --include="*.sb" --include="*.info" --include="*.?nc" --include="*.bkp" --exclude="*" ~/research-storage-uct/Archiv_Tape/Liver-Semela/ /home/habi/P/Documents/Semela-Liver/Data/

````
(which is our standard `rsync` blurb for putting stuff *to* the archive, but without the `*.tif` files, so we get back all the relevant things :).

We then delete all non-`ds17*`-folders with
````bash
cd /home/habi/P/Documents/Semela-Liver/Data/Liver-Semela
find . ! -path './ds*' -type d -exec rm -r {} +
````
which is based on https://unix.stackexchange.com/a/153863/199252 and works even though it traverses a lot of empty directories.

Now that we have *all* the necessary data (and some more), let's get to work!

In [16]:
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()

In [17]:
# Get *all* log files
Data['LogFile'] = [f for f in sorted(glob.glob(os.path.join(Root, '**', '*.log'), recursive=True))]

In [18]:
# Get all folders and generate sample, scan and experiment name
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]
Data['SampleName'] = [f[len(Root):].split(os.path.sep)[1].replace('ds17_','') for f in Data['Folder']]
Data['Sample'] = [sn.replace('_rescan','').replace('_portal','').replace('_cava','') for sn in Data['SampleName']]
Data['Scan'] = [f[len(Root):].split(os.path.sep)[2] for f in Data['Folder']]
Data['Subfolder'] = [f[len(Root):].split(os.path.sep)[3] for f in Data['Folder']]
Data['Experiment'] = [get_experiment(s) for s in Data['Sample']]
Data['Vein'] = [get_vein(s) for s in Data['SampleName']]

In [19]:
# Check what we did there...
print(Data.iloc[33])
print(80*'-')
for i in Data.iloc[33]:
    print(i)

LogFile       /home/habi/P/Documents/Semela-Liver/Data/Liver...
Folder        /home/habi/P/Documents/Semela-Liver/Data/Liver...
SampleName                                         ctrl3_portal
Sample                                                    ctrl3
Scan                                                   overview
Subfolder                                                  proj
Experiment                                              Control
Vein                                                     Portal
Name: 33, dtype: object
--------------------------------------------------------------------------------
/home/habi/P/Documents/Semela-Liver/Data/Liver-Semela/ds17_ctrl3_portal/overview/proj/ds17_ctrl3_portal~00.log
/home/habi/P/Documents/Semela-Liver/Data/Liver-Semela/ds17_ctrl3_portal/overview/proj
ctrl3_portal
ctrl3
overview
proj
Control
Portal


In [20]:
# Read the voxelsize from each logfile
Data['Voxelsize'] = [get_pixelsize(log) for log in Data['LogFile']]

In [21]:
# One log file is empty, so we drop it
# If we e cannot read a voxel size out of it, it's safe to drop
Data.drop(Data[Data['Voxelsize'].isna()].index, inplace=True)
# We asked Mario et al. if they have a good copy of it, but didn't get it until today

In [22]:
Data['Operator'] = [get_operator(log) for log in Data['LogFile']]
print('The scans were performed by the users %s' % Data.Operator.unique())

The scans were performed by the users ['haberthu']


In [23]:
# What sample names do we have in all the log files?
for i in Data.Sample.unique():
    print(i)

ctrl1
ctrl2
ctrl3
ctrl4
ctrl5
ctrl6
notch1_1
notch1_2
notch1_3
notch1_4


In [24]:
# These are the folders that were used according to Marios email from 21/03/2022, 16:43
whichones = ['ctrl4', 'notch1_2', 'ctrl1', 'ctrl2', 'ctrl5', 'notch1_1', 'notch1_3', 'notch1_4']

In [25]:
# What Mario et al looked at
sorted(whichones)

['ctrl1',
 'ctrl2',
 'ctrl4',
 'ctrl5',
 'notch1_1',
 'notch1_2',
 'notch1_3',
 'notch1_4']

In [26]:
# Which ones are 'surplus'?
set(Data.Sample.unique())-set(whichones)

{'ctrl3', 'ctrl6'}

In [27]:
# Mario wrote us in an email on 06/04/2022, 18:54 that they "mostly used the high resolution rescan" with "a final voxel resolution of 5 x 5 x 5 um".
# Specifically scans

# Batch 1
# ds17_ctrl4_cava
# ds17_notch1_2

# Batch 2
# ds17_ctrl1_portal_rescan
# ds17_ctrl2_portal_rescan
# ds17_ctrl5_cava_rescan
# ds17_notch1_1_rescan
# ds17_notch1_3_rescan
# ds17_notch1_4_rescan 

In [28]:
# Get rid of all non-5um scans
# Since we have severaly 5um voxel sizes we round them before dropping
Data.drop(Data[Data['Voxelsize'].round() != 5.0].index, inplace=True)

In [29]:
# Drop all 'rec' folders, which we don't need for what we do here
Data.drop(Data[Data['Subfolder'] == 'rec'].index, inplace=True)

In [30]:
# Drop all non-'highresolution' folders, since these are surplus
Data.drop(Data[Data['Scan'] != 'highresolution'].index, inplace=True)

In [31]:
# Drop all logfiles from subscans, e.g. the ones that end with ~00.log, ~01.log, ~02.log, etc.
Data.drop(Data[Data['LogFile'].str.contains('~0')].index, inplace=True)

In [32]:
for i in Data.LogFile:
    print(i[len(Root):])

/ds17_ctrl1_portal/highresolution/proj/ds17_ctrl1_portal.log
/ds17_ctrl1_portal_rescan/highresolution/proj/ds17_ctrl1_portal_rescan.log
/ds17_ctrl2_portal_rescan/highresolution/proj/ds17_ctrl2_portal_rescan.log
/ds17_ctrl4_cava/highresolution/proj/ds17_ctrl4_cava.log
/ds17_ctrl5_cava_rescan/highresolution/proj/ds17_ctrl5_cava_rescan.log
/ds17_notch1_1/highresolution/proj/ds17_notch1_1.log
/ds17_notch1_1_rescan/highresolution/proj/ds17_notch1_1_rescan.log
/ds17_notch1_3_rescan/highresolution/proj/ds17_notch1_3_rescan.log
/ds17_notch1_4/highresolution/proj/ds17_notch1_4.log
/ds17_notch1_4_rescan/highresolution/proj/ds17_notch1_4_rescan.log


In [33]:
# Let's see what is left
for wanted in sorted(whichones):
    print(15 * '-', 'For sample %s we have the data below' % wanted, 15 * '-', )
    print(Data[Data['Sample'].str.contains(wanted)][['Sample', 'Vein', 'SampleName', 'Scan', 'Voxelsize']])
    print(80*'-')

--------------- For sample ctrl1 we have the data below ---------------
   Sample    Vein           SampleName            Scan  Voxelsize
0   ctrl1  Portal         ctrl1_portal  highresolution   5.000040
16  ctrl1  Portal  ctrl1_portal_rescan  highresolution   5.000018
--------------------------------------------------------------------------------
--------------- For sample ctrl2 we have the data below ---------------
   Sample    Vein           SampleName            Scan  Voxelsize
28  ctrl2  Portal  ctrl2_portal_rescan  highresolution   5.000018
--------------------------------------------------------------------------------
--------------- For sample ctrl4 we have the data below ---------------
   Sample  Vein  SampleName            Scan  Voxelsize
36  ctrl4  Cava  ctrl4_cava  highresolution    5.00004
--------------------------------------------------------------------------------
--------------- For sample ctrl5 we have the data below ---------------
   Sample  Vein         Sampl

In [34]:
# TODO: Drop original if there's a '_rescan' folder
# for sample in Data.Sample.unique():
#     if len(Data[Data.Sample == sample]) > 1:
#         print(Data[Data.Sample == sample][['SampleName', 'Sample']])        
#         print(Data[Data.Sample == sample][Data.SampleName == Data.Sample].index)

In [35]:
Data.SampleName.unique()

array(['ctrl1_portal', 'ctrl1_portal_rescan', 'ctrl2_portal_rescan',
       'ctrl4_cava', 'ctrl5_cava_rescan', 'notch1_1', 'notch1_1_rescan',
       'notch1_3_rescan', 'notch1_4', 'notch1_4_rescan'], dtype=object)

In [36]:
import re
def scanner(logfile, verbose=False):
    hardwareversion = []
    with open(logfile, 'r') as f:
        for line in f:
            if 'Scanner' in line:
                if verbose:
                    print(line)
                # Sometimes it's SkyScan, sometimes Skyscan, so we have to regex it :)
                machine = re.split('Sky.can', line)[1].strip()
            if 'Hardware' in line:
                if verbose:
                    print(line)
                hardwareversion = line.split('=')[1].strip()
    if hardwareversion:
        return('SkyScan %s (Version %s)' % (machine, hardwareversion))
    else:
        return('SkyScan ' + machine)    

In [37]:
def controlsoftware(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Software Ver' in line:
                if verbose:
                    print(line)
                version = line.split('=')[1].strip()
    return(version)

In [38]:
# Get parameters which we'll need for the paragraph
Data['Scanner'] = [scanner(log) for log in Data['LogFile']]
Data['ControlSoftware'] = [controlsoftware(log) for log in Data['LogFile']]

In [39]:
def source(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Source Ty' in line:
                if verbose:
                    print(line)
                source = line.split('=')[1].strip()
                if 'HAMAMA' in source:
                    # We split the string at '_L' to separate HAMAMATSU_L118...
                    # Afterwards we properly capitalize HAMAMATSU and
                    # join the strings back with ' L' to get the beginning of the reference back
                    source = ' L'.join([s.capitalize() for s in source.split('_L')])
    return(source)

In [40]:
def camera(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Camera T' in line or 'Camera=' in line:
                if verbose:
                    print(line)
                cam = line.split('=')[1].strip().strip(' camera')
    return(cam)

In [41]:
Data['Source'] = [source(log) for log in Data['LogFile']]
Data['Camera'] = [camera(log) for log in Data['LogFile']]

In [42]:
def voltage(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Voltage' in line:
                if verbose:
                    print(line)
                V = float(line.split('=')[1])
    return(V)

In [43]:
def current(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Source Current' in line:
                if verbose:
                    print(line)
                A = float(line.split('=')[1])
    return(A)

In [44]:
def whichfilter(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Filter=' in line:
                if verbose:
                    print(line)
                fltr = line.split('=')[1].strip().replace('  ', ' ')
                if fltr=='No Filter':
                    fltr=False
    return(fltr)

In [45]:
Data['Voltage'] = [voltage(log) for log in Data['LogFile']]
Data['Current'] = [current(log) for log in Data['LogFile']]
Data['Filter'] = [whichfilter(log) for log in Data['LogFile']]

In [46]:
def camerasize(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Columns' in line:
                if verbose:
                    print(line)
                columns = int(line.split('=')[1])
            if 'Rows' in line:
                if verbose:
                    print(line)
                rows = int(line.split('=')[1])
    return(columns, rows)

In [47]:
def numproj(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'f Files' in line:
                if verbose:
                    print(line)
                numproj = int(line.split('=')[1])
    return(numproj)

In [48]:
def rotationstep(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Rotation Step' in line:
                if verbose:
                    print(line)
                rotstep = float(line.split('=')[1])
    return(rotstep)

In [49]:
Data['CameraSize'] = [camerasize(log) for log in Data['LogFile']]
Data['NumberOfProjections'] = [numproj(log) for log in Data['LogFile']]
Data['RotationStep'] = [rotationstep(log) for log in Data['LogFile']]

In [50]:
def stacks(logfile, verbose=False):
    with open(logfile, 'r') as f:
        # If only one stack, then there's nothing in the log file
        numstacks = 0
        for line in f:
            if 'of connected scans' in line:
                if verbose:
                    print(line)
                # The 'Sub-scan scan length' is listed in the log file
                # We simply select the last one, and add 1, since Bruker also starts to count at zero
                numstacks = int(line.split('=')[1])
    return(numstacks)

In [51]:
def overlapscan(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Horizontal Offset Positions' in line:
                if verbose:
                    print(line)
                wide = int(line.split('=')[1])
                if wide > 1:
                    return(True)
                else:
                    return(False)

In [52]:
def threesixtyscan(logfile, verbose=False):
    threesixty = False
    with open(logfile, 'r') as f:
        for line in f:
            if '0 Rotation' in line:
                if verbose:
                    print(line)
                threesixty = line.split('=')[1].strip()
                if threesixty == 'YES':
                    return(True)
                else:
                    return(False)

In [53]:
Data['Stacks'] = [stacks(log) for log in Data['LogFile']]
Data['Wide'] = [overlapscan(log) for log in Data.LogFile]
Data['ThreeSixty'] = [threesixtyscan(log) for log in Data['LogFile']]

In [54]:
def exposure(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Exposure' in line:
                if verbose:
                    print(line)
                exp = int(line.split('=')[1])
    return(exp)

In [55]:
def averaging(logfile, verbose=False):
    with open(logfile, 'r') as f:
        for line in f:
            if 'Avera' in line:
                if verbose:
                    print(line)
                details = line.split('=')[1]
                if 'ON' in details:
                    # https://stackoverflow.com/a/4894156/323100
                    avg = int(details[details.find("(")+1:details.find(")")])
                else:
                    avg=False
    return(avg)

In [56]:
Data['Exposure'] = [exposure(log) for log in Data['LogFile']]
Data['Averaging'] = [averaging(log) for log in Data['LogFile']]

In [57]:
def ringremoval(logfile, verbose=False):
    ring = None
    with open(logfile, 'r') as f:
        for line in f:
            if 'Ring' in line:
                if verbose:
                    print(line)
                ring = int(line.split('=')[1].strip())
    return(ring)

In [58]:
def beamhardening(logfile, verbose=False):
    bh = None
    with open(logfile, 'r') as f:
        for line in f:
            if 'ardeni' in line:
                if verbose:
                    print(line)
                bh = int(line.split('=')[1].strip())
    return(bh)

In [59]:
def get_reconstruction_grayvalue(logfile):
    grayvalue = None
    """How did we map the brightness of the reconstructions?"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Maximum for' in line:
                grayvalue = float(line.split('=')[1])
    return(grayvalue)

In [60]:
Data['RingRemoval'] = [ringremoval(log) for log in Data['LogFile']]
Data['Beamhardening'] = [beamhardening(log) for log in Data['LogFile']]
Data['GrayValueMax'] = [get_reconstruction_grayvalue(log) for log in Data['LogFile']]

In [61]:
import datetime
def duration(logfile, verbose=False):
    '''Returns scantime in *seconds*'''
    with open(logfile, 'r') as f:
        for line in f:
            if 'Scan duration' in line:
                if verbose:
                    print(line)
                duration = line.split('=')[1].strip()
    # Sometimes it's '00:24:26', sometimes '0h:52m:53s' :-/
    if 'h' in duration:
        scantime = datetime.datetime.strptime(duration, '%Hh:%Mm:%Ss')
    else:
        scantime = datetime.datetime.strptime(duration, '%H:%M:%S')
    return((scantime-datetime.datetime(1900,1,1)).total_seconds())

In [62]:
Data['Scan time'] = [duration(log) for log in Data['LogFile']]
Data['Scan time total'] = [ st * stk  for st, stk in zip(Data['Scan time'], Data['Stacks'])]

In [63]:
def nreconversion(logfile, verbose=False):
    Program = None
    Version = None
    with open(logfile, 'r') as f:
        for line in f:
            if 'Reconstruction Program' in line:
                if verbose:
                    print(line)
                Program = line.split('=')[1].strip()
            if 'Program Version' in line:
                if verbose:
                    print(line)
                Version = line.split('sion:')[1].strip()
    return(Program, Version)

In [64]:
# Only get the version string, not which software
Data['NRecon'] = [nreconversion(log)[1] for log in Data['LogFile']]

In [65]:
# Check voxel sizes (*rounded* to two after-comma values)
# If different, spit out which values
roundto = 4
if len(Data['Voxelsize'].round(roundto).unique()) > 1:
    print('We scanned all datasets with %s different voxel sizes' % len(Data['Voxelsize'].round(roundto).unique()))
    for vs in sorted(Data['Voxelsize'].round(roundto).unique()):
        print('-', vs, 'um for ', end='')
        for c, row in Data.iterrows():
            if float(vs) == round(row['Voxelsize'], roundto):
                print(os.path.join(row['Sample'], row['Scan']), end=', ')
        print('')
else:
    print('We scanned all datasets with equal voxel size, namely %s um.' % float(Data['Voxelsize'].round(roundto).unique()))

We scanned all datasets with equal voxel size, namely 5.0 um.


In [66]:
## Get an overview over the total scan time
## Nice output based on https://stackoverflow.com/a/8907407/323100
#total_seconds = int(Data['Scan time total'].sum().total_seconds())
#hours, remainder = divmod(total_seconds,60*60)
#minutes, seconds = divmod(remainder,60)
#print('In total, we scanned for %s hours and %s minutes)' % (hours, minutes))
#for machine in Data['Scanner'].unique():
#    total_seconds = int(Data[Data['Scanner'] == machine]['Scan time total'].sum().total_seconds())
#    hours, remainder = divmod(total_seconds,60*60)
#    minutes, seconds = divmod(remainder,60)
#    print('\t - Of these, we scanned %s hours and %s minutes on the %s,'
#          'for %s scans' % (hours,
#                            minutes,
#                            machine,
#                            len(Data[Data['Scanner'] == machine])))

In [67]:
Data.to_excel('Details.xlsx')

In [68]:
Data.to_excel(os.path.join(Root,'Details.xlsx'))

In [69]:
Data.groupby('Sample').mean()

Unnamed: 0_level_0,Voxelsize,Voltage,Current,NumberOfProjections,RotationStep,Stacks,Wide,ThreeSixty,Exposure,Averaging,RingRemoval,Beamhardening,GrayValueMax,Scan time,Scan time total
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ctrl1,5.000029,100.0,100.0,1895.0,0.1,2.0,False,False,4895.5,3.0,13.5,0.0,0.105714,29849.5,59699.0
ctrl2,5.000018,100.0,100.0,1895.0,0.1,3.0,False,False,4400.0,3.0,13.0,0.0,0.033987,25022.0,75066.0
ctrl4,5.00004,100.0,100.0,1895.0,0.1,2.0,False,False,5391.0,3.0,20.0,0.0,0.15,34677.0,69354.0
ctrl5,5.000018,100.0,100.0,1895.0,0.1,2.0,False,False,4400.0,3.0,13.0,0.0,0.115389,25022.0,50044.0
notch1_1,5.000029,100.0,100.0,1895.0,0.1,2.0,False,False,4895.5,3.0,16.5,0.0,0.274951,29819.5,59639.0
notch1_3,5.000018,100.0,100.0,1895.0,0.1,2.0,False,False,4400.0,3.0,13.0,0.0,0.098418,25022.0,50044.0
notch1_4,5.000029,100.0,100.0,1895.0,0.1,2.0,False,False,4895.5,3.0,56.5,0.0,0.151584,29736.5,59473.0


In [70]:
Data.groupby('SampleName').mean()

Unnamed: 0_level_0,Voxelsize,Voltage,Current,NumberOfProjections,RotationStep,Stacks,Wide,ThreeSixty,Exposure,Averaging,RingRemoval,Beamhardening,GrayValueMax,Scan time,Scan time total
SampleName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ctrl1_portal,5.00004,100.0,100.0,1895,0.1,2,False,False,5391,3,20,0,0.15,34677.0,69354.0
ctrl1_portal_rescan,5.000018,100.0,100.0,1895,0.1,2,False,False,4400,3,7,0,0.061429,25022.0,50044.0
ctrl2_portal_rescan,5.000018,100.0,100.0,1895,0.1,3,False,False,4400,3,13,0,0.033987,25022.0,75066.0
ctrl4_cava,5.00004,100.0,100.0,1895,0.1,2,False,False,5391,3,20,0,0.15,34677.0,69354.0
ctrl5_cava_rescan,5.000018,100.0,100.0,1895,0.1,2,False,False,4400,3,13,0,0.115389,25022.0,50044.0
notch1_1,5.00004,100.0,100.0,1895,0.1,2,False,False,5391,3,20,0,0.500102,34617.0,69234.0
notch1_1_rescan,5.000018,100.0,100.0,1895,0.1,2,False,False,4400,3,13,0,0.049799,25022.0,50044.0
notch1_3_rescan,5.000018,100.0,100.0,1895,0.1,2,False,False,4400,3,13,0,0.098418,25022.0,50044.0
notch1_4,5.00004,100.0,100.0,1895,0.1,2,False,False,5391,3,100,0,0.231324,34451.0,68902.0
notch1_4_rescan,5.000018,100.0,100.0,1895,0.1,2,False,False,4400,3,13,0,0.071843,25022.0,50044.0


In [71]:
Data.groupby('Experiment').mean()

Unnamed: 0_level_0,Voxelsize,Voltage,Current,NumberOfProjections,RotationStep,Stacks,Wide,ThreeSixty,Exposure,Averaging,RingRemoval,Beamhardening,GrayValueMax,Scan time,Scan time total
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Control,5.000027,100.0,100.0,1895.0,0.1,2.2,False,False,4796.4,3.0,14.6,0.0,0.102161,28884.0,62772.4
Notch,5.000027,100.0,100.0,1895.0,0.1,2.0,False,False,4796.4,3.0,31.8,0.0,0.190297,28826.8,57653.6


----

My microct blurb from http://simp.ly/publish/NBhZhH

In [72]:
print('Based on the %s log files in %s' % (len(Data), Root))

Based on the 10 log files in /home/habi/P/Documents/Semela-Liver/Data/Liver-Semela


In [73]:
sorted(whichones)

['ctrl1',
 'ctrl2',
 'ctrl4',
 'ctrl5',
 'notch1_1',
 'notch1_2',
 'notch1_3',
 'notch1_4']

In [74]:
sorted(Data.Sample.unique())

['ctrl1', 'ctrl2', 'ctrl4', 'ctrl5', 'notch1_1', 'notch1_3', 'notch1_4']

TODO: The logfile of `notch1_2` is missing/empty, hence `len(Data.Sample.unique())` is 8 instead of 7!

TODO: Also, the control software was updated between the scans and _rescans, use only one version number string

In [75]:
print('After $PREPARATION, the',
      len(Data.Sample.unique()),
      'samples were imaged on a Bruker',
      " OR ".join(str(value) for value in Data.Scanner.unique()),
      'high-resolution microtomography machine (Control software version',
      " or ".join(str(value) for value in Data.ControlSoftware.unique()) + 
      ', Bruker microCT, Kontich, Belgium).')

After $PREPARATION, the 7 samples were imaged on a Bruker SkyScan 1272 high-resolution microtomography machine (Control software version 1.1.9 or 1.1.19, Bruker microCT, Kontich, Belgium).


In [76]:
print('The machine is equipped with a',
      " OR ".join(str(value) for value in Data.Source.unique()),
      'X-ray source and a',
      " OR ".join(str(value) for value in Data.Camera.unique()),
      'camera which converts the x-rays into visible light and records the projection images.')

The machine is equipped with a Hamamatsu L11871_20 X-ray source and a XIMEA xiRAY16 camera which converts the x-rays into visible light and records the projection images.


In [77]:
print('The X-ray source was set to a tube voltage of', 
      " OR ".join(str(value) for value in Data.Voltage.unique()),
      'kV and a tube current of',
      " OR ".join(str(value) for value in Data.Current.unique()),
      'µA, the x-ray spectrum was', end=' ')
if Data.Filter.unique():
    print('filtered by', " OR ".join(str(value) for value in Data.Filter.unique()), end=' ')
else:
    print('not filtered', end=' ')
print('prior to incidence onto the sample.')

The X-ray source was set to a tube voltage of 100.0 kV and a tube current of 100.0 µA, the x-ray spectrum was filtered by Cu 0.11mm prior to incidence onto the sample.


In [78]:
Data.ThreeSixty.unique()

array([False])

In [79]:
print('For each sample, we recorded a set of', end=' ')
if Data.Filter.unique().tolist():   
    print(" or ".join(str(value) for value in Data.Stacks.unique()),
          'stacked scans overlapping the sample height, each stack was recorded with', end=' ')
print(" or ".join(str(value) for value in Data.NumberOfProjections.unique()), 'projections of', end=' ')
for cs in Data['CameraSize'].unique():
    print(cs[0], end=' ')
print('x', end=' ')
for cs in Data['CameraSize'].unique():
    print(cs[1], end=' ')
print('pixels', end=' ')
if Data.Wide.unique():
    print('(' + " or ".join(str(value) for value in Data.Wide.unique()), 'projections stitched laterally)', end=' ')
print('at every',
       str(" or ".join(str(value) for value in Data.RotationStep.unique())) + '° over a ', end='')
if Data.ThreeSixty.unique():
     print('360°', end=' ')
else:
    print('180°', end=' ')
print('sample rotation.')

For each sample, we recorded a set of 2 or 3 stacked scans overlapping the sample height, each stack was recorded with 1895 projections of 4904 x 3280 pixels at every 0.1° over a 180° sample rotation.


In [80]:
print('On average, every single projection was exposed for %s' % round(Data.Exposure.mean()),
      'ms,',
      " or ".join(str(value) for value in Data.Averaging.unique()),
      'projections were again averaged to greatly reduce image noise.')

On average, every single projection was exposed for 4796 ms, 3 projections were again averaged to greatly reduce image noise.


In [81]:
#print('This resulted in a scan time of approximately ', end='')
#if duration(log)/3600 > 1:
#    # Scan took hours
#    print(timeformat(datetime.timedelta(seconds=duration(log)),
#                     '{hours} hours and {minutes} minutes'), end=' ')
#else:
#    print(timeformat(datetime.timedelta(seconds=duration(log)),
#                     '{minutes} minutes'), end=' ')
#if not stacks(log) == 1:
#    print('per stack and about',
#          timeformat(stacks(log) * datetime.timedelta(seconds=duration(log)),
#                     '{hours} hours and {minutes} minutes'), end=' ')
#print('per sample', end='')
#if stacks(log) == 1:
#    print('.')
#else:
#    print(' (with', stacks(log), 'stacks).')

In [82]:
Data['Scan time'].mean()

28855.4

In [83]:
#print('In total, we scanned', Data.Stacks.sum(), 'stacks.')
#print('Each stack took approximately',
#      Data['Scan time'].mean() // 60,
#      'minutes (' + str(datetime.timedelta(seconds=Data['Scan time'].mean())) + ')')
#print('In total, we thus scanned for about', 
#      timeformat(Data.Stacks.sum() *
#                 datetime.timedelta(seconds=Data.Duration.mean()),
#                 '{days} days, {hours} hours and {minutes} minutes.'))
#hourlyrate = 125
#print('At the MIC rate of %s CHF/h, this would have cost %s CHF' % (
#    hourlyrate,
#    int(round(Data.Stacks.sum() * Data.Duration.mean() / 60 / 60 * hourlyrate))))

In [84]:
#print('In total, we scanned %s samples at %s stacks on average' % (len(Data.Scan.unique()), Data.Stacks.mean()))
#print('Each stack took approximately',
#      Data.Duration.mean() // 60,
#      'minutes (' + str(datetime.timedelta(seconds=Data.Duration.mean())) + ')')
#print('In total, we thus scanned for about', 
#      timeformat(len(Data.Scan.unique()) * Data.Stacks.mean() *
#                 datetime.timedelta(seconds=Data.Duration.mean()),
#                 '{days} days, {hours} hours and {minutes} minutes.'))
#print('At the MIC rate, this would have cost',
#      int(round(len(Data.Scan.unique()) * Data.Stacks.mean() * Data.Duration.mean() / 60 / 60 * 75)),
#      'CHF.')

In [85]:
Data.NRecon.unique()

array(['1.7.0.4', '1.7.4.2', '1.7.3.0'], dtype=object)

In [86]:
print('The projection images were then subsequently reconstructed into a 3D stack',
      'of images with NRecon (Version',
      Data.NRecon.dropna().unique(),
      ', Bruker microCT, Kontich Belgium)', end=' ')
#if ringremoval(log):
#    print('using a ring artifact correction of',
#          ringremoval(log), end='')
#if beamhardening(log):
#    print(' and a beam hardening correction of',
#          beamhardening(log),
#          '%.')
#else:
#    print('.')
   

The projection images were then subsequently reconstructed into a 3D stack of images with NRecon (Version ['1.7.0.4' '1.7.4.2' '1.7.3.0'] , Bruker microCT, Kontich Belgium) 

In [87]:
print('The whole process resulted in datasets with an isometric voxel size of',
      round(Data.Voxelsize.mean()),
      'µm.') 

The whole process resulted in datasets with an isometric voxel size of 5 µm.


In [88]:
Data[['Sample', 'Scan', 'Subfolder', 'Beamhardening', 'RingRemoval']]

Unnamed: 0,Sample,Scan,Subfolder,Beamhardening,RingRemoval
0,ctrl1,highresolution,proj,0,20
16,ctrl1,highresolution,proj,0,7
28,ctrl2,highresolution,proj,0,13
36,ctrl4,highresolution,proj,0,20
61,ctrl5,highresolution,proj,0,13
69,notch1_1,highresolution,proj,0,20
87,notch1_1,highresolution,proj,0,13
103,notch1_3,highresolution,proj,0,13
107,notch1_4,highresolution,proj,0,100
131,notch1_4,highresolution,proj,0,13
