# Generate the figures for the manuscript

In [None]:
import platform
import os
import glob
import pandas
import dask
from dask.distributed import Client, LocalCluster
import dask_image.imread
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.gridspec import GridSpec
import seaborn
import numpy
from tqdm.auto import tqdm, trange
import imageio

In [None]:
# Import our own parsing functions which we've added as submodule
from BrukerSkyScanLogfileRuminator.parsing_functions import *

In [None]:
# Set dask temporary folder
# Do this before creating a client: https://stackoverflow.com/a/62804525/323100
import tempfile
if 'Linux' in platform.system():
    tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
elif 'Darwin' in platform.system():
    tmp = tempfile.gettempdir()
else:
    if 'anaklin' in platform.node():
        tmp = os.path.join('F:\\')
    else:
        tmp = os.path.join('D:\\')
dask.config.set({'temporary_directory': os.path.join(tmp, 'tmp')})
print('Dask temporary files go to %s' % dask.config.get('temporary_directory'))

In [None]:
# Start cluster and client now, after setting tempdir
try:
    cluster = LocalCluster()
except PermissionError:
    print('Mount the Fast_SSD, otherwise we cannot use it for saving the temporary files!')
    print('Then rerun this cell.')
client = Client(cluster)

In [None]:
print('You can seee what DASK is doing at "http://localhost:%s/status"' % client.scheduler_info()['services']['dashboard'])

In [None]:
# # Ignore warnings in the notebook
# import warnings
# warnings.filterwarnings("ignore")

In [None]:
# Set up figure defaults
plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w and with 'nearest' interpolation
plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
plt.rcParams['figure.dpi'] = 300

In [None]:
# Setup scale bar defaults
plt.rcParams['scalebar.location'] = 'lower right'
plt.rcParams['scalebar.frameon'] = False
plt.rcParams['scalebar.color'] = 'white'

In [None]:
# Set seaborn context
# context: one of {paper, notebook, talk, poster}
seaborn.set_context('paper')

In [None]:
# Different locations if running either on Linux or Windows
FastSSD = True
# to speed things up significantly
if 'Linux' in platform.system():
    if FastSSD:
        BasePath = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
    else:
        BasePath = os.path.join(os.sep, 'home', 'habi', 'research-storage-djonov')
elif 'Darwin' in platform.system():
    # First mount smb://resstore.unibe.ch/ana_rs_djonov/data in the Finder
    FastSSD = False
    BasePath = os.path.join('/Volumes/data/')
elif 'Windows' in platform.system():
    if FastSSD:
        BasePath = os.path.join('F:\\')
    else:
        if 'anaklin' in platform.node():
            BasePath = os.path.join('V:\\')
        else:
            BasePath = os.path.join('V:\\')
Root = os.path.join(BasePath, 'Aaldijk')
print('We are loading all the data from %s' % Root)

In [None]:
def get_git_hash():
    '''
    Get the current git hash from the repository.
    Based on http://stackoverflow.com/a/949391/323100 and
    http://stackoverflow.com/a/18283905/323100
    '''
    from subprocess import Popen, PIPE
    import os
    gitprocess = Popen(['git',
                        '--git-dir',
                        os.path.join(os.getcwd(), '.git'),
                        'rev-parse',
                        '--short',
                        '--verify',
                        'HEAD'],
                       stdout=PIPE)
    (output, _) = gitprocess.communicate()
    return output.strip().decode("utf-8")

In [None]:
# Make directory for output
OutPutDir = os.path.join(os.getcwd(), 'Output', get_git_hash())
print('We are saving all the output to %s' % OutPutDir)
os.makedirs(OutPutDir, exist_ok=True)

In [None]:
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()

In [None]:
# Get *all* log files, unsorted but faster than with glob
print('Searching for all log files in %s' % Root)
Data['LogFile'] = [os.path.join(root, name)
                   for root, dirs, files in os.walk(Root)
                   for name in files
                   if name.endswith((".log"))]

In [None]:
# Drop all non-mouse scans
for c, row in Data.iterrows():
    if 'Foetus02' not in row.LogFile:
        Data.drop([c], inplace=True)
Data.reset_index(inplace=True)

In [None]:
# Get all folders
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]

In [None]:
# Get rid of all logfiles that we don't want and need
for c, row in Data.iterrows():
    if 'rec' not in row.Folder:  # drop all non-rec folders
        Data.drop([c], inplace=True)
    elif 'SubScan' in row.Folder:  # drop all partial reconstructions which might be there from synchronization
        Data.drop([c], inplace=True)        
    elif 'rectmp.log' in row.LogFile:  # drop all temporary logfiles
        Data.drop([c], inplace=True)
# Reset dataframe to something that we would get if we only would have loaded the 'rec' files
Data = Data.reset_index(drop=True)

In [None]:
# Generate us some meaningful colums
Data['Sample'] = [l[len(Root)+1:].split(os.sep)[0] for l in Data['LogFile']]
Data['SampleName'] = [sn.split('_')[0] for sn in Data['Sample']]
Data['Scan'] = ['_'.join(l[len(Root)+1:].split(os.sep)[1:-1]) for l in Data['LogFile']]

In [None]:
# Get the file names of the reconstructions
Data['Reconstructions'] = [sorted(glob.glob(os.path.join(f, '*rec0*.png'))) for f in Data['Folder']]
Data['Number of reconstructions'] = [len(r) for r in Data.Reconstructions]

In [None]:
# Drop samples which have either not been reconstructed yet or of which we deleted the reconstructions with
# `find . -name "*rec*.png" -type f -mtime +333 -delete`
# Based on https://stackoverflow.com/a/13851602
# for c,row in Data.iterrows():
#     if not row['Number of reconstructions']:
#         print('%s contains no PNG files, we might be currently reconstructing it' % row.Folder)
Data = Data[Data['Number of reconstructions'] > 0]
Data.reset_index(drop=True, inplace=True)
print('We have %s folders with reconstructions' % (len(Data)))

In [None]:
# Get scanning parameters to doublecheck from logfiles
Data['Scanner'] = [scanner(log) for log in Data['LogFile']]
Data['Voltage'] = [voltage(log) for log in Data['LogFile']]
Data['Current'] = [current(log) for log in Data['LogFile']]
Data['Voxelsize'] = [pixelsize(log, rounded=True) for log in Data['LogFile']]
Data['CameraWindow'] = [projection_size(log) for log in Data['LogFile']]
Data['Exposuretime'] = [exposure(log) for log in Data['LogFile']]
Data['Averaging'] = [averaging(log) for log in Data['LogFile']]
Data['Stacks'] = [stacks(log) for log in Data['LogFile']]
Data['RotationStep'] = [rotationstep(log) for log in Data['LogFile']]
Data['Scan date'] = [scandate(log) for log in Data['LogFile']]
Data['Scan time'] = [duration(log) for log in Data['LogFile']]

In [None]:
# Sort our dataframe by scan date
Data.sort_values(by='Scan date', inplace=True, ignore_index=True)

In [None]:
# Get reconstruction parameters to doublecheck from logfiles
Data['Grayvalue'] = [reconstruction_grayvalue(log) for log in Data['LogFile']]
Data['RingartefactCorrection'] = [ringremoval(log) for log in Data['LogFile']]
Data['BeamHardeningCorrection'] = [beamhardening(log) for log in Data['LogFile']]
Data['DefectPixelMasking'] = [defectpixelmasking(log) for log in Data['LogFile']]
Data['ROI'] = [region_of_interest(log) for log in Data['LogFile']]
Data['Rot'] = [crosssection_rotation(l) for l in Data.LogFile]

In [None]:
# Calculate time 'spent' since start
Data['Time passed'] = [sd - Data['Scan date'].min() for sd in Data['Scan date']]
# Also extract days, rounded
Data['Days passed'] = [t.round('d') for t in Data['Time passed']]

In [None]:
# Load all reconstructions into ephemereal DASK arrays
Reconstructions = [None] * len(Data)
for c, row in tqdm(Data.iterrows(),
                   desc='Load reconstructions',
                   total=len(Data)):
    Reconstructions[c] = dask_image.imread.imread(os.path.join(row['Folder'],
                                                               '*rec*.png'))

In [None]:
# How big are the datasets?
Data['Size'] = [rec.shape for rec in Reconstructions]

In [None]:
# Calculate mean brightness of the reconstructions
# Subsample for speed reasons
subsample = 5
Data['MeanBrightness'] = [rec[::subsample,::subsample,::subsample].mean().compute() for rec in Reconstructions]

In [None]:
Data[['Sample', 'Scan', 'Size', 'ROI', 'Days passed', 'Grayvalue', 'RingartefactCorrection', 'BeamHardeningCorrection', 'DefectPixelMasking', 'Rot', 'Grayvalue']]

In [None]:
# The three cardinal directions
directions = ['Axial',
              'Coronal',
              'Sagittal']

In [None]:
# Read or calculate the middle slices, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
    Data['Mid_' + direction] = [None] * len(Reconstructions)
for c, row in tqdm(Data.iterrows(), desc='Middle images', total=len(Data), leave=False):
    for d, direction in tqdm(enumerate(directions),
                             desc='%s/%s' % (row['Sample'], row['Scan']),
                             leave=False,
                             total=len(directions)):
        outfilepath = os.path.join(os.path.dirname(row['Folder']),
                                   '%s.%s.Middle.%s.png' % (row['Sample'],
                                                            row['Scan'],
                                                            direction))
        if os.path.exists(outfilepath):
            Data.at[c, 'Mid_' + direction] = dask_image.imread.imread(outfilepath).squeeze()
        else:
            # Generate requested axial view
            if 'Axial' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][Data['Size'][c][0] // 2].compute().squeeze()
            if 'Coronal' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, Data['Size'][c][1] // 2, :].compute().squeeze()
            if 'Sagittal' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, :, Data['Size'][c][2] // 2].compute().squeeze()
            # Save the calculated 'direction' view to disk
            imageio.imwrite(outfilepath, (Data.at[c, 'Mid_' + direction]))

In [None]:
# Read or calculate the directional MIPs, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
    Data['MIP_' + direction] = [None] * len(Reconstructions)
for c, row in tqdm(Data.iterrows(), desc='MIPs', total=len(Data), leave=False):
    for d, direction in tqdm(enumerate(directions),
                             desc='%s/%s' % (row['Sample'], row['Scan']),
                             leave=False,
                             total=len(directions)):
        outfilepath = os.path.join(os.path.dirname(row['Folder']),
                                   '%s.%s.MIP.%s.png' % (row['Sample'],
                                                      row['Scan'],
                                                      direction))
        if os.path.exists(outfilepath):
            Data.at[c, 'MIP_' + direction] = dask_image.imread.imread(outfilepath).squeeze()
        else:
            # Generate MIP
            Data.at[c, 'MIP_' + direction] = Reconstructions[c].max(axis=d).compute().squeeze()
            # Save it out
            imageio.imwrite(outfilepath, Data.at[c, 'MIP_' + direction].astype('uint8'))

Define us some helper functions for the figures.

In [None]:
# Adapted from AcinarSize_Johannes/MicroscopyFigure.ipynb
def label_image(image, labeltext, x=None, y=None, color='white', boxcolor=None):
    '''We have to print a label over the image several times'''
    # If we didn't set coordinates, then use defaults
    if not x:
        x=numpy.shape(image)[1] * 0.0309
    if not y:
        y=numpy.shape(image)[0] - (numpy.shape(image)[0] * 0.0309)
    t = plt.gca().text(x,
                       y,
                       labeltext,
                       color=color,
                       fontsize=12,
                       verticalalignment='center',
                       horizontalalignment='center')
    if boxcolor is not None:
        t.set_bbox(dict(facecolor=boxcolor,
                        edgecolor=boxcolor,
                        alpha=0.618))
    return

In [None]:
def markregion(image, coordinates, width, height, showimage=True):
    """Mark a rectancular region in an image"""
    from matplotlib.patches import Rectangle
    if showimage:
        plt.imshow(image)    
    # Mark center
    plt.scatter(coordinates[0], coordinates[1], color=seaborn.color_palette()[0], marker='x')
    # Draw rectangle
    plt.gca().add_patch(Rectangle((coordinates[0] - width / 2, coordinates[1] - height / 2), width, height,
                                  # edgecolor=seaborn.color_palette()[0],
                                  edgecolor='white',
                                  linestyle='--',
                                  facecolor='none'))
    return()

In [None]:
def extract(stack, coordinates, width=None, height=None, verbose=False):
    """Get a (small) region from a stack"""
    extract=stack[coordinates[2]][coordinates[1] - int(height/2):coordinates[1] + int(height/2),coordinates[0] - int(width/2):coordinates[0]+int(width/2)]
    if verbose:
        fig = plt.subplot(121)
        # plt.imshow(stack[coordinates[2]])
        # show region mark
        markregion(stack[coordinates[2]], coordinates, width, height, showimage=True)
        plt.title('Slice %s of input stack\n'
                  'Centered at x=%s and y=%s' % (coordinates[2], coordinates[0], coordinates[1]))
        plt.subplot(122)
        plt.imshow(extract)
        plt.title('Extract\n'
                 '%s x %s px' % (extract.shape[0], extract.shape[1]))
        plt.show()
    return(extract)

In [None]:
extract(Reconstructions[0],
        (800, 900, 1634),
        width=1200, height=750,
        verbose=True)

----
# Figure 1

We want to show a figure with the gray value curve along the timeframe we stained and scanned.
In addition some representative slices of a dataset at the start, middle and end of the duration.
And some detailed view of marked regions in these slices.

In [None]:
# Automatically find the one day closest to the middle of the duration.
# https://stackoverflow.com/a/30112305/323100
middleone = Data.index[(Data['Days passed'] - Data['Days passed'].mean()).abs().argsort()[:1]][0]

In [None]:
len(Data)

In [None]:
# Put coordinates we want to show into dataframe
Data['Coordinates'] = ''
Data.at[0, 'Coordinates'] = [800, 900, 1634]
Data.at[middleone, 'Coordinates'] = [800, 900, 1634]
Data.at[len(Data)-1, 'Coordinates'] = [1000, 1500, 2614]

In [None]:
Data[['Sample', 'Scan', 'Voxelsize', 'Coordinates']]

In [None]:
11.0/20.0

In [None]:
# Extract relevant regions from relevant images
width = 1000
height = 750
detail_start = extract(Reconstructions[0],
                       Data['Coordinates'][0],
                       width=width, height=height, verbose=True)
detail_mid = extract(Reconstructions[middleone],
                     Data['Coordinates'][middleone], 
                     width=width, height=height, verbose=True)
# Scale width/height with voxel size difference (11/20 um)
detail_end = extract(Reconstructions[-1],
                     Data['Coordinates'].iloc[-1], width=width/.55, height=height/.55, verbose=True)

Fig. 1: A) Detail of a native scan of the human foetal pelvis, the ossification centres of the iliac bone (bottom) and two ossification centres of the sacrum (top left and right) can be seen. The soft tissue is not distinguishable. B) The same detail after 158 days of Lugol immersion. Besides the bony structures, now the cartilage, muscles, blood vessels and connective tissue can be distinguished. 

In [None]:
# Plot the mean brightness of the scan with their dates
seaborn.scatterplot(data=Data,
                    x='Scan date',
                    y='MeanBrightness',
                    # size='MeanBrightness'
                   )
# Label text: https://matplotlib.org/stable/tutorials/text/annotations.html
for c,row in Data.iterrows():
    plt.gca().annotate(row.Sample.replace('Foetus01', 'F1').replace('_Lugol','').replace('_05pct','').replace('_10pct','').replace('_15pct',''),
                       xy=(row['Scan date'], row.MeanBrightness),
                       xycoords='data',
                       xytext=(-3, -75),
                       textcoords='offset points',
                       ha='left',
                       rotation=-60)
plt.ylim(ymin=0)
plt.title('Average gray value of the %s-times subsampled reconstructions' % subsample)
plt.show()

In [None]:
# Collate figure 1

plt.subplot(311)
# Plot the mean brightness of the scan with their dates
seaborn.lineplot(data=Data,
                 x='Scan date',
                 y='MeanBrightness',
                 # size='MeanBrightness'
                   )
# Label text: https://matplotlib.org/stable/tutorials/text/annotations.html
for c,row in Data.iterrows():
    plt.gca().annotate(row.Sample.replace('Foetus01', 'F1').replace('_Lugol','').replace('_05pct','').replace('_10pct','').replace('_15pct',''),
                       xy=(row['Scan date'], row.MeanBrightness),
                       xycoords='data',
                       xytext=(-3, -75),
                       textcoords='offset points',
                       ha='left',
                       rotation=-60)
from string import ascii_uppercase    
for c, i in enumerate([0, middleone, len(Data)-1]):
    plt.gca().annotate('%s/%s' % (ascii_uppercase[c+1],ascii_uppercase[c+1+3]),
                       xy = (Data['Scan date'][i], Data['MeanBrightness'][i]),
                       xycoords='data',
                       color='red')
plt.ylim(ymin=0)
plt.title('Average gray value of the %s-times subsampled reconstructions' % subsample)

plt.subplot(334)
markregion(Reconstructions[0][Data['Coordinates'][0][2]], 
           Data['Coordinates'][0], width, height)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'][0],'um'))
label_image(Reconstructions[0][Data['Coordinates'][0][2]], 'B')
plt.axis('off')

plt.subplot(335)
markregion(Reconstructions[middleone][Data['Coordinates'][middleone][2]],
           Data['Coordinates'][middleone], width, height)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'][middleone],'um'))
label_image(Reconstructions[middleone][Data['Coordinates'][middleone][2]], 'C')
plt.axis('off')

plt.subplot(336)
markregion(Reconstructions[-1][Data['Coordinates'].iloc[-1][2]],
           Data['Coordinates'].iloc[-1],
           width=width/.55, height=height/.55)
           
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(Reconstructions[-1][Data['Coordinates'].iloc[-1][2]], 'D')
plt.axis('off')

plt.subplot(337)
plt.imshow(detail_start)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'][0],'um'))
label_image(detail_start, 'E')
plt.axis('off')

plt.subplot(338)
plt.imshow(detail_mid)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'][middleone],'um'))
label_image(detail_mid, 'F')
plt.axis('off')

plt.subplot(339)
plt.imshow(detail_end)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(detail_end, 'G')
plt.axis('off')
plt.savefig(os.path.join(OutPutDir, 'Fig01.png'),
            bbox_inches='tight')
plt.show()

----
# Figure 2

Fig. 2: Details of different anatomical structures in the human pelvis, seen after 158 days of Lugol immersion. A) Greater vestibular glands on both sides of the vulvar vestibule. B) Uterus (bottom of the image) and several cuts through the left fallopian tube, parts of the ovary are also visible. C) Muscle layers of the distal rectum with circular and longitudinally oriented muscle fibre bundles. 

Koordinaten (in 11um stack 152d)

A) x: 2249, y: 1075, z:819 --> war gedreht, jetzt 1936,2218,819

B) x:2223 y: 1308 z:2527 --> war gedreht, jetzt 2230,2093,2527

C) x: 1939, y: 1063, z: 819 --> war gedreht, jetzt 2061,1878,819

In [None]:
# Collect figure 2

plt.subplot(231)
markregion(Reconstructions[-1][819], 
           (1936, 2218, 819), 500, 500)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(Reconstructions[-1][819], 'A')
plt.axis('off')
plt.subplot(232)
markregion(Reconstructions[-1][2527], 
           (2230, 2093, 2527), 500, 500)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(Reconstructions[-1][819], 'B')
plt.axis('off')
plt.subplot(233)
markregion(Reconstructions[-1][819], 
           (2061, 1878, 819), 500, 500)
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(Reconstructions[-1][819], 'C')
plt.axis('off')
#---
plt.subplot(234)
plt.imshow(extract(Reconstructions[-1],
                   (1936, 2218, 819), 500, 500))
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(extract(Reconstructions[-1],
                   (1936, 2218, 819), 500, 500),
            'D')
plt.axis('off')
plt.subplot(235)
plt.imshow(extract(Reconstructions[-1],
                   (2230, 2093, 2527), 500, 500))
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(extract(Reconstructions[-1],
                   (2230, 2093, 2527), 500, 500),
            'E')
plt.axis('off')
plt.subplot(236)
plt.imshow(extract(Reconstructions[-1],
                   (2061, 1878, 819), 500, 500))
plt.gca().add_artist(ScaleBar(Data['Voxelsize'].iloc[-1],'um'))
label_image(extract(Reconstructions[-1],
                   (2061, 1878, 819), 500, 500),
            'F')
plt.axis('off')
plt.tight_layout(pad=0.1)
plt.savefig(os.path.join(OutPutDir, 'Fig02.png'),
            bbox_inches='tight')
plt.show()

In [None]:
print('Saved all figures to %s' % OutPutDir)