This notebook generates the paragraph about the microCT-scanning from logfiles of the scans.

In [None]:
import platform
import os
import pandas
import glob
import numpy

In [None]:
from parsing_functions import *

In [None]:
# Different locations if running either on Linux or Windows
if 'Linux' in platform.system():
    BasePath = os.path.join(os.path.sep, 'home', 'habi', 'P')
elif 'Windows' in platform.system():
    BasePath = os.path.join('P:', os.sep)
# Use *this* folder for the bone microvasculature manuscript
Root = os.path.join(BasePath, 'Documents', 'Publications', 'Ruslan Bone', 'manubot', 'content', 'data')
print('We are loading all the data from the folder %s' % Root)

In [None]:
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()

In [None]:
# Get *all* log files
# Using os.walk is way faster than using recursive glob.glob, see DataWrangling.ipynb for details
# Not sorting the found logfiles is also making it quicker
Data['LogFile'] = [os.path.join(root, name)
                   for root, dirs, files in os.walk(Root)
                   for name in files
                   if name.endswith((".log"))]

In [None]:
print('We found a total of %s log files in %s' % (len(Data), Root))

In [None]:
# Exclude all substack log files
# They end in "~01.log", "~02.log", etc
# A simple regex searching for "~ digit digit" helps us to drop those
regex = r"~\d\d"
for c, row in Data.iterrows():
    if re.search(regex, row.LogFile):
        # print(row.LogFile)
        Data.drop([c], inplace=True)
# Reset dataframe index
Data = Data.reset_index(drop=True)

In [None]:
print('Excluding subscan log files, we now have %s log files' % len(Data))

Parse the data from all the log files

In [None]:
Data['Scanner'] = [scanner(log) for log in Data['LogFile']]
Data['ControlSoftware'] = [str(controlsoftware(log)) for log in Data['LogFile']]

In [None]:
Data['Voxelsize'] = [pixelsize(log) for log in Data['LogFile']]
Data['Voxelsize_rounded'] = [pixelsize(log,rounded=True) for log in Data['LogFile']]

In [None]:
Data['Source'] = [source(log) for log in Data['LogFile']]
Data['Camera'] = [camera(log) for log in Data['LogFile']]
Data['Exposure'] = [exposuretime(log) for log in Data['LogFile']]
Data['Averaging'] = [averaging(log) for log in Data['LogFile']]

In [None]:
Data['Voltage'] = [voltage(log) for log in Data['LogFile']]
Data['Current'] = [current(log) for log in Data['LogFile']]
Data['Filter'] = [whichfilter(log) for log in Data['LogFile']]

In [None]:
Data['Stacks'] = [stacks(log) for log in Data['LogFile']]
Data['NumProj'] = [numproj(log) for log in Data['LogFile']]
Data['ProjSize'] = [projection_size(log) for log in Data['LogFile']]
Data['ThreeSixty'] = [threesixtyscan(log) for log in Data['LogFile']]
Data['RotationStep'] = [rotationstep(log) for log in Data['LogFile']]
Data['Wide'] = [overlapscan(log) for log in Data.LogFile]
Data['Duration'] = [duration(log) for log in Data['LogFile']]
Data['Date'] = [scandate(log) for log in Data['LogFile']]

In [None]:
Data['NRecon'] = [nreconversion(log)[1] for log in Data['LogFile']]
Data['RingRemoval'] = [ringremoval(log) for log in Data['LogFile']]
Data['Beamhardening'] = [beamhardening(log) for log in Data['LogFile']]
Data['DefectPixelMasking'] = [defectpixelmasking(log) for log in Data['LogFile']]
Data['GrayValue'] = [reconstruction_grayvalue(log) for log in Data['LogFile']]

In [None]:
Data['RecSize'] = [reconstruction_size(log) for log in Data['LogFile']]
Data['ROI'] = [region_of_interest(log, verbose=False) for log in Data['LogFile']]

In [None]:
Data['Duration'] = [duration(log) for log in Data['LogFile']]
Data['Date'] = [scandate(log) for log in Data['LogFile']]

----

Now that we loaded the data, we customize the standard log file parser notebook for the microvasculature manuscript.

In [None]:
# Extract folder name
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]

In [None]:
# Generate sample name to then match to figure number
# We bluntly split the path at the `os.path.sep` and user the first item of this separated list
Data['Sample'] = [(foldername[len(Root)+1:]).split(os.path.sep)[0] for foldername in Data['Folder']]

In [None]:
Data

In [None]:
def match_to_figure(samplename):
    figurenumber = None
    if 'Israel' in samplename:
        figurenumber = 1
    elif '11O' in samplename:
        # Some are named 11OKT, some 11Okt
        figurenumber = 2
    elif 'c1m5' in samplename:
        figurenumber = 3
    elif '0_99' in samplename:
        figurenumber = 4    
    elif 'Vreni' in samplename:
        figurenumber = 5
    elif '_sample4' in samplename:
        figurenumber = 6
    elif 'Mouse_1EAR' in samplename:
        figurenumber = 7
    return(int(figurenumber))

In [None]:
# Map sample names to our figures 
Data['Figure'] = [match_to_figure(s) for s in Data['Sample']]

In [None]:
# Sort dataframe by figure number
Data.sort_values(by=['Figure', 'Sample'], inplace=True)
# Reset dataframe index
Data = Data.reset_index(drop=True)

In [None]:
# Cull irrelevant beginning of path from logfile
Data['LogFile'] = [lf[len(Root)+1:] for lf in Data['LogFile']]

In [None]:
# Cull irrelevant beginning of path from logfile
Data['Folder'] = [folder.split('data')[1] for folder in Data['Folder']]

Add data on animals to the table, since one reviewer specifically asked us to

> insert a descriptive table of each animal model studied, reporting strain, age, number, sex,
> model of the tomograph used to study it, pixel size, Energy, current, etc.

(the tomography data was already present).

In [None]:
# Set most generally used values
Data['Animal'] = 'Mouse'
Data['Strain'] = 'CB17SCID'
Data['Age'] = '10 weeks'
Data['Sex'] = 'm'
Data['N'] = 5

In [None]:
# The mice from Israel are different
Data.loc[Data['LogFile'].str.contains('Israel'), 'Strain'] = 'BALB/cOlaHsd and C57BL/6J crossing'
Data.loc[Data['LogFile'].str.contains('Israel'), 'Age'] = '21 months'
Data.loc[Data['LogFile'].str.contains('Israel'), 'N'] = 1

In [None]:
# The mice from the dental experiment are different
Data.loc[Data['LogFile'].str.contains('_kiefer'), 'Strain'] = 'C57BL/6'
Data.loc[Data['LogFile'].str.contains('_kiefer'), 'Age'] = '12 weeks'
Data.loc[Data['LogFile'].str.contains('_kiefer'), 'N'] = 3

In [None]:
# Vreni is a Göttingen minipig
Data.loc[Data['LogFile'].str.contains('Vreni'), 'Animal'] = 'Minipig'
Data.loc[Data['LogFile'].str.contains('Vreni'), 'Strain'] = 'Göttingen'
Data.loc[Data['LogFile'].str.contains('Vreni'), 'Age'] = '30 months'  # Aproximation, we haven't heard back from Petr
Data.loc[Data['LogFile'].str.contains('Vreni'), 'Sex'] = 'f'
Data.loc[Data['LogFile'].str.contains('Vreni'), 'N'] = 1

In [None]:
# The Straumann minipigs are also from Göttingen minipig
Data.loc[Data['LogFile'].str.contains('sample4'), 'Animal'] = 'Minipig'
Data.loc[Data['LogFile'].str.contains('sample4'), 'Strain'] = 'Göttingen'
Data.loc[Data['LogFile'].str.contains('sample4'), 'Age'] = '30 months'  # We were told that their age was 28-32 months
Data.loc[Data['LogFile'].str.contains('sample4'), 'Sex'] = 'f'
Data.loc[Data['LogFile'].str.contains('sample4'), 'N'] = 5

In [None]:
# The dual energy mouse is different
Data.loc[Data['LogFile'].str.contains('1EAR'), 'Strain'] = 'C57BL/6'
Data.loc[Data['LogFile'].str.contains('1EAR'), 'Age'] = '60 weeks'
Data.loc[Data['LogFile'].str.contains('1EAR'), 'Sex'] = 'f'
Data.loc[Data['LogFile'].str.contains('1EAR'), 'N'] = 1

In [None]:
Data[['Figure', 'Animal', 'Strain', 'Age', 'Sex', 'N', 'Sample', 'Scanner']]

In [None]:
Data.sample(n=5)

In [None]:
# Save out a subset of the columns of the dataframe as a CSV file for adding to the supplementary materials
# With renamed column names
Data[['Figure', 'Animal', 'Strain', 'Age', 'Sex', 'N',
      'Sample', 'Scanner', 'ControlSoftware',
      'Voxelsize_rounded', 'Source', 'Voltage', 'Current', 'Filter',
      'Camera', 'ProjSize', 'NumProj', 'ThreeSixty', 'RotationStep', 'Averaging', 'Exposure', 'Stacks', 'Wide', 'Duration',
      'NRecon', 'RingRemoval', 'Beamhardening', 'LogFile'
     ]].to_csv(os.path.join(Root, 'SampleAndScanData.csv'),
               index=False,
               header=['Figure',
                       'Animal', 'Strain', 'Age', 'Sex', 'Number of animals [N]',
                       'Sample name', 'Scanner', 'Control software version',
                       'Voxelsize [μm]', 'X-ray source', 'Source voltage [kV]', 'Source current [μA]', 'Filter',
                       'Detector', 'Projection size [px]', 'Number of Projections', '360°-scan', 'Rotation step [°]', 'Frame averaging', 'Exposure time [ms]', 'Stacked scans', 'Offset positions', 'Scan duration [s]',
                       'NRecon version', 'Ring removal correction', 'Beam hardening correction', 'Log file'
                     ])
# This CSV file is nicely shown online on GitHub at https://github.com/microct-ana-unibe-ch/microvasculature-manuscript/blob/main/content/data/SampleAndScanData.csv

----
Now that we loaded all the relevant data from the log files, we can produce some text.
Copy-paste this text into the manuscript and edit accordingly.

In [None]:
# Print relevant data for each figure
for figure in Data.Figure.unique():
    print(40*'v', figure, 40*'v')
    print('The %s scans for Figure %s' % (len(Data[Data.Figure == figure]), figure), end=' ')
    print('were performed on a %s:' % Data[Data.Figure == figure].Scanner.unique(), end=' ')
    print('with control software version %s.' % Data[Data.Figure == figure].ControlSoftware.unique())
    print('The scans are:')
    for folder in Data[Data.Figure == figure].Folder:
        print('- %s' % folder)
    print('The X-ray source was set to a tube voltage of',
          " OR ".join(str(value) for value in Data[Data.Figure == figure].Voltage.unique()),
          'kV and a tube current of',
          " OR ".join(str(value) for value in Data[Data.Figure == figure].Current.unique()),
          'µA', end='')
    if Data[Data.Figure == figure].Filter.unique()[0]:  
        print(', the x-ray spectrum was filtered by',
              " OR ".join(str(value) for value in Data[Data.Figure == figure].Filter.unique()),
              'prior to incidence onto the sample.')
    else:
        print('.')
    print('For each scan, we acquired %s projections.' % Data[Data.Figure == figure].NumProj.unique(), end=' ')
    print('Projection images were recorded over a sample rotation of', end=' ')
    if Data[Data.Figure == figure].ThreeSixty.unique():
        print('360°', end=', ')
    else:
        print('180°', end=', ')
    print('with one projection acquired at each %s°' % Data[Data.Figure == figure].RotationStep.unique(), end=', ')
    print('with %s projections averaged for noise reduction.' % Data[Data.Figure == figure].Averaging.unique())
    if len(Data[Data.Figure == figure].Wide.unique()) > 1:
        print('%s projections were stitched to cover the full extent of the sample' % Data[Data.Figure == figure].Wide.unique())
    else:
        if Data[Data.Figure == figure].Wide.unique():
            print(Data[Data.Figure == figure].Wide.unique())
    print('Each projection image with a size of %s pixels' % Data[Data.Figure == figure].ProjSize.unique(), end= ' ')
    if len(Data[Data.Figure == figure].Exposure.unique()) > 1:
        print('was exposed for %s ms (on average).' % (round(numpy.mean(Data[Data.Figure == figure].Exposure.unique()))))
    else:
        print('was exposed for %s ms.' % Data[Data.Figure == figure].Exposure.unique())
    print('This resulted in datasets with an isotropic voxel size of %s μm.' % Data[Data.Figure == figure].Voxelsize.unique())
    print(40*'^', figure, 40*'^')