# Datawrangling the pelvic floor scans of the mouse samples
Let's see what we did there...

In [1]:
import platform
import os
import glob
import pandas
import dask
from dask.distributed import Client, LocalCluster
import dask_image.imread
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.gridspec import GridSpec
import seaborn
import numpy
from tqdm.auto import tqdm, trange
import imageio

In [2]:
# Import our own parsing functions which we've added as submodule
from BrukerSkyScanLogfileRuminator.parsing_functions import *

In [3]:
# Set dask temporary folder
# Do this before creating a client: https://stackoverflow.com/a/62804525/323100
import tempfile
if 'Linux' in platform.system():
    tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
elif 'Darwin' in platform.system():
    tmp = tempfile.gettempdir()
else:
    if 'anaklin' in platform.node():
        tmp = os.path.join('F:\\')
    else:
        tmp = os.path.join('D:\\')
dask.config.set({'temporary_directory': os.path.join(tmp, 'tmp')})
print('Dask temporary files go to %s' % dask.config.get('temporary_directory'))

Dask temporary files go to /media/habi/Fast_SSD/tmp


In [4]:
# Start cluster and client now, after setting tempdir
cluster = LocalCluster()
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 40531 instead


In [5]:
print('You can seee what DASK is doing at "http://localhost:%s/status"' % client.scheduler_info()['services']['dashboard'])

You can seee what DASK is doing at "http://localhost:40531/status"


In [6]:
# # Ignore warnings in the notebook
# import warnings
# warnings.filterwarnings("ignore")

In [7]:
# Set up figure defaults
plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w and with 'nearest' interpolation
plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
plt.rcParams['figure.dpi'] = 300

In [8]:
# Setup scale bar defaults
plt.rcParams['scalebar.location'] = 'lower right'
plt.rcParams['scalebar.frameon'] = False
plt.rcParams['scalebar.color'] = 'white'

In [9]:
# Display all plots identically
lines = 3
# And then do something like
# plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)

In [10]:
# Different locations if running either on Linux or Windows
FastSSD = False
# to speed things up significantly
if 'Linux' in platform.system():
    if FastSSD:
        BasePath = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
    else:
        BasePath = os.path.join(os.sep, 'home', 'habi', 'research-storage-djonov')
elif 'Darwin' in platform.system():
    # First mount smb://resstore.unibe.ch/ana_rs_djonov/data in the Finder
    FastSSD = False
    BasePath = os.path.join('/Volumes/data/')
elif 'Windows' in platform.system():
    if FastSSD:
        BasePath = os.path.join('F:\\')
    else:
        if 'anaklin' in platform.node():
            BasePath = os.path.join('V:\\')
        else:
            BasePath = os.path.join('V:\\')
Root = os.path.join(BasePath, 'Aaldijk')
print('We are loading all the data from %s' % Root)

We are loading all the data from /home/habi/research-storage-djonov/Aaldijk


In [11]:
def get_git_hash():
    '''
    Get the current git hash from the repository.
    Based on http://stackoverflow.com/a/949391/323100 and
    http://stackoverflow.com/a/18283905/323100
    '''
    from subprocess import Popen, PIPE
    import os
    gitprocess = Popen(['git',
                        '--git-dir',
                        os.path.join(os.getcwd(), '.git'),
                        'rev-parse',
                        '--short',
                        '--verify',
                        'HEAD'],
                       stdout=PIPE)
    (output, _) = gitprocess.communicate()
    return output.strip().decode("utf-8")

In [12]:
# # Make directory for output
# OutPutDir = os.path.join(os.getcwd(), 'Output', get_git_hash())
# print('We are saving all the output to %s' % OutPutDir)
# os.makedirs(OutPutDir, exist_ok=True)

In [13]:
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()

In [14]:
# Get *all* log files, unsorted but faster than with glob
print('Searching for all log files in %s' % Root)
Data['LogFile'] = [os.path.join(root, name)
                   for root, dirs, files in os.walk(Root)
                   for name in files
                   if name.endswith((".log"))]

Searching for all log files in /home/habi/research-storage-djonov/Aaldijk


In [15]:
# Drop all non-mouse scans
for c, row in Data.iterrows():
    if 'Mouse' not in row.LogFile:
        Data.drop([c], inplace=True)
Data.reset_index(inplace=True)

In [16]:
# Get all folders
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]

In [17]:
#Check for samples which are not yet reconstructed
for c, row in Data.iterrows():
    # Iterate over every 'proj' folder
    if 'proj' in row.Folder:
        if not 'TScopy' in row.Folder and not 'PR' in row.Folder:
            # If there's nothing with 'rec*' on the same level, then tell us        
            if not glob.glob(row.Folder.replace('proj', '*rec*')):
                print('- %s is missing matching reconstructions' % row.LogFile[len(Root)+1:])

In [18]:
Data['XYAlignment'] = [glob.glob(os.path.join(f, '*.csv')) for f in Data['Folder']]

In [19]:
# Check for samples which are missing the .csv-files for the XY-alignment
for c, row in Data.iterrows():
    # Iterate over every 'proj' folder
    if 'proj' in row.Folder:
        if not len(row.XYAlignment):
            if not any(x in row.LogFile for x in ['rectmp.log']):
                # 'rectmp.log' because we only exclude it afterwards :)
                print('- %s has *not* been X/Y aligned' % row.LogFile[len(Root)+1:])

In [20]:
# Get rid of all logfiles that we don't want
for c, row in Data.iterrows():
    if 'rec' not in row.Folder:  # drop all non-rec folders
        Data.drop([c], inplace=True)
    elif 'SubScan' in row.Folder:  # drop all partial reconstructions which might be there from synchronization
        Data.drop([c], inplace=True)        
    elif 'rectmp.log' in row.LogFile:  # drop all temporary logfiles
        Data.drop([c], inplace=True)
# Reset dataframe to something that we would get if we only would have loaded the 'rec' files
Data = Data.reset_index(drop=True)

In [21]:
# Generate us some meaningful colums
Data['Sample'] = [l[len(Root)+1:].split(os.sep)[0] for l in Data['LogFile']]
Data['SampleName'] = [sn.split('_')[0] for sn in Data['Sample']]
Data['Scan'] = ['_'.join(l[len(Root)+1:].split(os.sep)[1:-1]) for l in Data['LogFile']]

In [22]:
Data.SampleName.unique()

array(['Mouse01', 'Mouse02'], dtype=object)

In [23]:
# if not len(Data.SampleName.unique()) == 1:
#     print('Something went wrong with the extration of the common sample name')

In [24]:
# Get the file names of the reconstructions
Data['Reconstructions'] = [sorted(glob.glob(os.path.join(f, '*rec0*.png'))) for f in Data['Folder']]
Data['Number of reconstructions'] = [len(r) for r in Data.Reconstructions]

In [25]:
# Drop samples which have either not been reconstructed yet or of which we deleted the reconstructions with
# `find . -name "*rec*.png" -type f -mtime +333 -delete`
# Based on https://stackoverflow.com/a/13851602
# for c,row in Data.iterrows():
#     if not row['Number of reconstructions']:
#         print('%s contains no PNG files, we might be currently reconstructing it' % row.Folder)
Data = Data[Data['Number of reconstructions'] > 0]
Data.reset_index(drop=True, inplace=True)
print('We have %s folders with reconstructions' % (len(Data)))

We have 21 folders with reconstructions


In [27]:
# Get scanning parameters to doublecheck from logfiles
Data['Scanner'] = [scanner(log) for log in Data['LogFile']]
Data['Voltage'] = [voltage(log) for log in Data['LogFile']]
Data['Current'] = [current(log) for log in Data['LogFile']]
Data['Voxelsize'] = [pixelsize(log, rounded=True) for log in Data['LogFile']]
Data['CameraWindow'] = [projection_size(log) for log in Data['LogFile']]
Data['Exposuretime'] = [exposure(log) for log in Data['LogFile']]
Data['Averaging'] = [averaging(log) for log in Data['LogFile']]
Data['Stacks'] = [stacks(log) for log in Data['LogFile']]
Data['RotationStep'] = [rotationstep(log) for log in Data['LogFile']]
Data['Scan date'] = [scandate(log) for log in Data['LogFile']]
Data['Scan time'] = [duration(log) for log in Data['LogFile']]

In [28]:
Data.Voltage.unique()

array([60.])

In [29]:
Data.Current.unique()

array([140.])

In [30]:
Data.Averaging.unique()

array([False])

In [31]:
Data.Voxelsize.unique()

array([15.])

In [32]:
Data['CameraWindow'].unique()

array([(3072, 1944), (2457, 1944)], dtype=object)

In [33]:
# Get reconstruction parameters to doublecheck from logfiles
Data['Grayvalue'] = [reconstruction_grayvalue(log) for log in Data['LogFile']]
Data['RingartefactCorrection'] = [ringremoval(log) for log in Data['LogFile']]
Data['BeamHardeningCorrection'] = [beamhardening(log) for log in Data['LogFile']]
Data['DefectPixelMasking'] = [defectpixelmasking(log) for log in Data['LogFile']]
Data['ROI'] = [region_of_interest(log) for log in Data['LogFile']]

In [53]:
Data['ROI']

0     (2543, 616, 430, 2357)
1     (2633, 489, 573, 2717)
2     (2642, 698, 500, 2444)
3     (2517, 573, 640, 2584)
4     (2368, 424, 715, 2659)
5     (2369, 425, 652, 2596)
6     (2420, 476, 530, 2474)
7     (2414, 470, 656, 2600)
8     (2395, 451, 564, 2508)
9     (2424, 480, 645, 2589)
10    (2040, 178, 358, 2220)
11    (2334, 390, 595, 2539)
12    (2272, 328, 525, 2469)
13    (2529, 585, 526, 2470)
14    (2385, 441, 622, 2566)
15    (2368, 424, 589, 2533)
16    (2326, 382, 418, 2362)
17    (2485, 541, 588, 2532)
18    (2537, 593, 590, 2534)
19    (2490, 546, 627, 2571)
20    (2388, 661, 620, 2347)
Name: ROI, dtype: object

In [34]:
Data['Grayvalue'].unique()

array([0.1])

In [35]:
Data['RingartefactCorrection'].unique()

array([1])

In [36]:
# Sort our dataframe by scan date
Data.sort_values(by='Scan date', inplace=True, ignore_index=True)

In [37]:
for c,row in Data.iterrows():
    if row.Grayvalue != 0.1:
        print(row.Sample, row.Grayvalue)

In [38]:
for c,row in Data.iterrows():
    if not row.RingartefactCorrection:
        print(row.Sample, row.RingartefactCorrection)

In [39]:
Data[['Sample', 'Scan', 'Grayvalue', 'RingartefactCorrection', 'BeamHardeningCorrection', 'DefectPixelMasking']]

Unnamed: 0,Sample,Scan,Grayvalue,RingartefactCorrection,BeamHardeningCorrection,DefectPixelMasking
0,Mouse01_Nativ_20220905,rec,0.1,1,0,0
1,Mouse02_Nativ_20220905,rec,0.1,1,0,0
2,Mouse01_Lugol_15pct_20220914,rec,0.1,1,0,0
3,Mouse01_Lugol_15pct_20220921,rec,0.1,1,0,0
4,Mouse01_Lugol_15pct_20220928,rec,0.1,1,0,0
5,Mouse01_Lugol_15pct_20221017,rec,0.1,1,0,0
6,Mouse01_Lugol_15pct_20221027,rec,0.1,1,0,0
7,Mouse01_Lugol_15pct_20221107,rec,0.1,1,0,0
8,Mouse01_Lugol_15pct_20221117,rec,0.1,1,0,0
9,Mouse01_Lugol_15pct_20221128,rec,0.1,1,0,0


In [40]:
# Calculate time 'spent' since start
Data['Time passed'] = [sd - Data['Scan date'].min() for sd in Data['Scan date']]
# Also extract days, rounded
Data['Days passed'] = [t.round('d') for t in Data['Time passed']]

In [41]:
Data[['Sample', 'Scan date', 'Time passed', 'Days passed']]

Unnamed: 0,Sample,Scan date,Time passed,Days passed
0,Mouse01_Nativ_20220905,2022-09-05 12:05:31,0 days 00:00:00,0 days
1,Mouse02_Nativ_20220905,2022-09-05 14:43:22,0 days 02:37:51,0 days
2,Mouse01_Lugol_15pct_20220914,2022-09-14 11:48:21,8 days 23:42:50,9 days
3,Mouse01_Lugol_15pct_20220921,2022-09-21 11:45:53,15 days 23:40:22,16 days
4,Mouse01_Lugol_15pct_20220928,2022-09-28 12:46:39,23 days 00:41:08,23 days
5,Mouse01_Lugol_15pct_20221017,2022-10-17 13:08:30,42 days 01:02:59,42 days
6,Mouse01_Lugol_15pct_20221027,2022-10-27 14:58:04,52 days 02:52:33,52 days
7,Mouse01_Lugol_15pct_20221107,2022-11-07 14:10:21,63 days 02:04:50,63 days
8,Mouse01_Lugol_15pct_20221117,2022-11-17 11:34:45,72 days 23:29:14,73 days
9,Mouse01_Lugol_15pct_20221128,2022-11-28 12:20:20,84 days 00:14:49,84 days


Some consistency checks

In [42]:
# Check ringremoval parameters
for machine in Data['Scanner'].unique():
    print('For the %s we have '
          'ringartefact-correction values of %s' % (machine,
                                                    Data[Data.Scanner==machine]['RingartefactCorrection'].unique()))

For the SkyScan 2214 we have ringartefact-correction values of [1]


In [43]:
# Check beamhardening parameters
for scanner in Data.Scanner.unique():
    print('For the %s we have '
          'beamhardening correction values of %s' % (scanner,
                                                     Data[Data.Scanner==scanner]['BeamHardeningCorrection'].unique()))

For the SkyScan 2214 we have beamhardening correction values of [0]


In [44]:
# Check defect pixel masking parameters
for scanner in Data.Scanner.unique():
    print('For the %s we have '
          'defect pixel masking values of %s' % (scanner,
                                                 Data[Data.Scanner==scanner]['DefectPixelMasking'].unique()))

For the SkyScan 2214 we have defect pixel masking values of [0]


In [45]:
# Check defect pixel masking parameters
for scanner in Data.Scanner.unique():
    print('For the %s we have '
          'reconstruction gray values of %s' % (scanner,
                                                Data[Data.Scanner==scanner]['Grayvalue'].unique()))

For the SkyScan 2214 we have reconstruction gray values of [0.1]


Check and display scan times

In [46]:
Data['Scan time total'] = [ st * stk  for st, stk in zip(Data['Scan time'], Data['Stacks'])]

In [47]:
# # https://www.geeksforgeeks.org/iterating-over-rows-and-columns-in-pandas-dataframe/
# columns = list(Data)
# columns.remove('Folder') 
# columns.remove('Sample')
# columns.remove('LogFile')
# columns.remove('Reconstructions')
# columns.remove('Number of reconstructions')
# columns.remove('Grayvalue')
# columns.remove('Scan time')
# columns.remove('Scan time total')
# columns.remove('Scan date')
# print(columns)
# for col in columns:
#     print(col)
#     print(Data[col].unique())
#     print(80*'-')    

In [48]:
# Check voxel sizes (*rounded* to two after-comma values)
# If different, spit out which values
roundto = 2
if len(Data['Voxelsize'].round(roundto).unique()) > 1:
    print('We scanned all datasets with %s different voxel sizes' % len(Data['Voxelsize'].round(roundto).unique()))
    for vs in sorted(Data['Voxelsize'].round(roundto).unique()):
        print('-', vs, 'um for ', end='')
        for c, row in Data.iterrows():
            if float(vs) == round(row['Voxelsize'], roundto):
                print(os.path.join(row['Sample'], row['Scan']), end=', ')
        print('')
else:
    print('We scanned all datasets with equal voxel size, namely %s um.' % float(Data['Voxelsize'].round(roundto).unique()))

We scanned all datasets with equal voxel size, namely 15.0 um.


In [54]:
Data[['Sample', 'Scan',
      'BeamHardeningCorrection', 'DefectPixelMasking',
      'RingartefactCorrection', 'Grayvalue',]]

Unnamed: 0,Sample,Scan,BeamHardeningCorrection,DefectPixelMasking,RingartefactCorrection,Grayvalue
0,Mouse01_Nativ_20220905,rec,0,0,1,0.1
1,Mouse02_Nativ_20220905,rec,0,0,1,0.1
2,Mouse01_Lugol_15pct_20220914,rec,0,0,1,0.1
3,Mouse01_Lugol_15pct_20220921,rec,0,0,1,0.1
4,Mouse01_Lugol_15pct_20220928,rec,0,0,1,0.1
5,Mouse01_Lugol_15pct_20221017,rec,0,0,1,0.1
6,Mouse01_Lugol_15pct_20221027,rec,0,0,1,0.1
7,Mouse01_Lugol_15pct_20221107,rec,0,0,1,0.1
8,Mouse01_Lugol_15pct_20221117,rec,0,0,1,0.1
9,Mouse01_Lugol_15pct_20221128,rec,0,0,1,0.1


In [55]:
# if len(Data['Grayvalue'].unique()) > 1:
#     print('We reconstructed the datasets with different maximum gray values, namely')
#     for gv in Data['Grayvalue'].unique():
#         print(gv, 'for Samples ', end='')
#         for c, row in Data.iterrows():
#             if float(gv) == row['Grayvalue']:
#                 print(os.path.join(row['Sample'], row['Scan']), end=', ')
#         print('')
# else:
#     print('We reconstructed all datasets with equal maximum gray value, namely %s.' % Data['Grayvalue'].unique()[0])

In [56]:
# Data[['Sample', 'Scan',
#       'Voxelsize', 'Scanner',
#       'Scan date', 'CameraWindow', 'RotationStep', 'Averaging',
#       'Scan time', 'Stacks', 'Scan time total']]

In [57]:
# Get an overview over the total scan time
# Nice output based on https://stackoverflow.com/a/8907407/323100
total_seconds = int(Data['Scan time total'].sum())
hours, remainder = divmod(total_seconds,60*60)
minutes, seconds = divmod(remainder,60)
print('In total, we scanned for %s hours and %s minutes)' % (hours, minutes))
for machine in Data['Scanner'].unique():
    total_seconds = int(Data[Data['Scanner'] == machine]['Scan time total'].sum())
    hours, remainder = divmod(total_seconds,60*60)
    minutes, seconds = divmod(remainder,60)
    print('\t - Of these, we scanned %s hours and %s minutes on the %s,'
          ' for %s scans' % (hours,
                             minutes,
                             machine,
                             len(Data[Data['Scanner'] == machine])))

In total, we scanned for 43 hours and 14 minutes)
	 - Of these, we scanned 43 hours and 14 minutes on the SkyScan 2214, for 21 scans


In [58]:
Data[['Sample', 'Scan',
      'Voxelsize', 'Scanner',
      'Scan date', 'CameraWindow', 'RotationStep', 'Averaging', 'Scan time', 'Stacks' ]].to_excel('Mouse.Details.xlsx')

In [59]:
Data[['Sample', 'Scan',
      'Voxelsize', 'Scanner',
      'Scan date', 'CameraWindow',
      'RotationStep', 'Averaging', 'Scan time', 'Stacks' ]].to_excel(os.path.join(Root,'Mouse.Details.xlsx'))

In [60]:
Data.tail()

Unnamed: 0,index,LogFile,Folder,XYAlignment,Sample,SampleName,Scan,Reconstructions,Number of reconstructions,Scanner,...,Scan date,Scan time,Grayvalue,RingartefactCorrection,BeamHardeningCorrection,DefectPixelMasking,Time passed,Days passed,Scan time total,ROI
16,12,/home/habi/research-storage-djonov/Aaldijk/Mou...,/home/habi/research-storage-djonov/Aaldijk/Mou...,[],Mouse01_Lugol_15pct_20230214,Mouse01,rec,[/home/habi/research-storage-djonov/Aaldijk/Mo...,1941,SkyScan 2214,...,2023-02-14 11:57:24,6784.0,0.1,1,0,0,161 days 23:51:53,162 days,6784.0,"(2326, 382, 418, 2362)"
17,30,/home/habi/research-storage-djonov/Aaldijk/Mou...,/home/habi/research-storage-djonov/Aaldijk/Mou...,[],Mouse01_Lugol_15pct_20230227,Mouse01,rec,[/home/habi/research-storage-djonov/Aaldijk/Mo...,1941,SkyScan 2214,...,2023-02-27 15:01:38,7990.0,0.1,1,0,0,175 days 02:56:07,175 days,7990.0,"(2485, 541, 588, 2532)"
18,71,/home/habi/research-storage-djonov/Aaldijk/Mou...,/home/habi/research-storage-djonov/Aaldijk/Mou...,[],Mouse01_Lugol_15pct_20230317,Mouse01,rec,[/home/habi/research-storage-djonov/Aaldijk/Mo...,1941,SkyScan 2214,...,2023-03-17 11:20:03,7082.0,0.1,1,0,0,192 days 23:14:32,193 days,7082.0,"(2537, 593, 590, 2534)"
19,22,/home/habi/research-storage-djonov/Aaldijk/Mou...,/home/habi/research-storage-djonov/Aaldijk/Mou...,[],Mouse01_Lugol_15pct_20230330,Mouse01,rec,[/home/habi/research-storage-djonov/Aaldijk/Mo...,1941,SkyScan 2214,...,2023-03-30 11:28:02,7226.0,0.1,1,0,0,205 days 23:22:31,206 days,7226.0,"(2490, 546, 627, 2571)"
20,88,/home/habi/research-storage-djonov/Aaldijk/Mou...,/home/habi/research-storage-djonov/Aaldijk/Mou...,[],Mouse01_Lugol_15pct_20230411,Mouse01,rec,[/home/habi/research-storage-djonov/Aaldijk/Mo...,1941,SkyScan 2214,...,2023-04-11 12:25:39,6818.0,0.1,1,0,0,218 days 00:20:08,218 days,6818.0,"(2388, 661, 620, 2347)"


In [61]:
# Data['PreviewImagePath'] = [sorted(glob.glob(os.path.join(f, '*_spr.bmp')))[0] for f in Data['Folder']]
# Data['PreviewImage'] = [dask_image.imread.imread(pip)
#                         if pip
#                         else numpy.random.random((100, 100)) for pip in Data['PreviewImagePath']]

In [62]:
# Make an approximately square overview image
lines = 3

In [63]:
# for c, row in Data.iterrows():
#     plt.subplot(lines, int(numpy.ceil(len(Data) / float(lines))), c + 1)
#     plt.imshow(row.PreviewImage.squeeze())
#     plt.title('%s at day %s' % (row['SampleName'], row['Days passed'].days))
#     plt.gca().add_artist(ScaleBar(row['Voxelsize'],
#                                   'um',
#                                   color='black',
#                                   frameon=True))
#     plt.axis('off')
# plt.tight_layout()
# plt.savefig(os.path.join(Root, 'ScanOverviews.png'),
#             bbox_inches='tight')
# plt.show()

In [64]:
# Load all reconstructions into ephemereal DASK arrays
Reconstructions = [None] * len(Data)
for c, row in tqdm(Data.iterrows(),
                   desc='Load reconstructions',
                   total=len(Data)):
    Reconstructions[c] = dask_image.imread.imread(os.path.join(row['Folder'],
                                                               '*rec*.png'))

Load reconstructions:   0%|          | 0/21 [00:00<?, ?it/s]

In [65]:
# Check if something went wrong
# for file in Data['OutputNameRec']:
#     print(file)
#     dask.array.from_zarr(file)

In [66]:
# How big are the datasets?
Data['Size'] = [rec.shape for rec in Reconstructions]

In [69]:
Data[['Sample', 'Scan', 'Size', 'ROI']]

Unnamed: 0,Sample,Scan,Size,ROI
0,Mouse01_Nativ_20220905,rec,"(1941, 1924, 1924)","(2543, 616, 430, 2357)"
1,Mouse02_Nativ_20220905,rec,"(1941, 3072, 3072)","(2633, 489, 573, 2717)"
2,Mouse01_Lugol_15pct_20220914,rec,"(1941, 1944, 1944)","(2642, 698, 500, 2444)"
3,Mouse01_Lugol_15pct_20220921,rec,"(1941, 1944, 1944)","(2517, 573, 640, 2584)"
4,Mouse01_Lugol_15pct_20220928,rec,"(1941, 1944, 1944)","(2368, 424, 715, 2659)"
5,Mouse01_Lugol_15pct_20221017,rec,"(1941, 1944, 1944)","(2369, 425, 652, 2596)"
6,Mouse01_Lugol_15pct_20221027,rec,"(1941, 1944, 1944)","(2420, 476, 530, 2474)"
7,Mouse01_Lugol_15pct_20221107,rec,"(1941, 1944, 1944)","(2414, 470, 656, 2600)"
8,Mouse01_Lugol_15pct_20221117,rec,"(1941, 1944, 1944)","(2395, 451, 564, 2508)"
9,Mouse01_Lugol_15pct_20221128,rec,"(1941, 1944, 1944)","(2424, 480, 645, 2589)"


In [70]:
HOMOGENIZE THE ROI

SyntaxError: invalid syntax (<ipython-input-70-1e5d5b052d76>, line 1)

In [None]:
# The three cardinal directions
directions = ['Axial',
              'Coronal',
              'Sagittal']

In [None]:
# Read or calculate the middle slices, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
    Data['Mid_' + direction] = [None] * len(Reconstructions)
for c, row in tqdm(Data.iterrows(), desc='Middle images', total=len(Data), leave=False):
    for d, direction in tqdm(enumerate(directions),
                             desc='%s/%s' % (row['Sample'], row['Scan']),
                             leave=False,
                             total=len(directions)):
        outfilepath = os.path.join(os.path.dirname(row['Folder']),
                                   '%s.%s.Middle.%s.png' % (row['Sample'],
                                                            row['Scan'],
                                                            direction))
        if os.path.exists(outfilepath):
            Data.at[c, 'Mid_' + direction] = dask_image.imread.imread(outfilepath).squeeze()
        else:
            # Generate requested axial view
            if 'Axial' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][Data['Size'][c][0] // 2].compute().squeeze()
            if 'Sagittal' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, Data['Size'][c][1] // 2, :].compute().squeeze()
            if 'Coronal' in direction:
                Data.at[c, 'Mid_' + direction] = Reconstructions[c][:, :, Data['Size'][c][2] // 2].compute().squeeze()
            # Save the calculated 'direction' view to disk
            imageio.imwrite(outfilepath, (Data.at[c, 'Mid_' + direction]))

In [None]:
# Show middle slices
for c, row in tqdm(Data.iterrows(),
                   desc='Saving middle images overview',
                   total=len(Data),
                   leave=False):
    outfilepath = os.path.join(os.path.dirname(row['Folder']),
                               '%s.%s.MiddleSlices.png' % (row['Sample'], row['Scan']))
    if not os.path.exists(outfilepath):    
        for d, direction in tqdm(enumerate(directions),
                                 desc='%s/%s' % (row['Sample'], row['Scan']),
                                 leave=False,
                                 total=len(directions)):
            plt.subplot(1, 3, d + 1)
            plt.imshow(row['Mid_' + direction].squeeze())
            if d == 0:
                plt.axhline(row.Size[1] // 2, c=seaborn.color_palette()[0])
                plt.axvline(row.Size[2] // 2, c=seaborn.color_palette()[1])
                plt.gca().add_artist(ScaleBar(row['Voxelsize'],
                                              'um',
                                              color=seaborn.color_palette()[2]))
            elif d == 1:
                plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])
                plt.axvline(row.Size[d] // 2, c=seaborn.color_palette()[1])
                plt.gca().add_artist(ScaleBar(row['Voxelsize'],
                                              'um',
                                              color=seaborn.color_palette()[0]))
            else:
                plt.axhline(row.Size[0] // 2, c=seaborn.color_palette()[2])
                plt.axvline(row.Size[d] // 2, c=seaborn.color_palette()[0])
                plt.gca().add_artist(ScaleBar(row['Voxelsize'],
                                              'um',
                                              color=seaborn.color_palette()[1]))
            plt.title('%s at day %s\n%s' % (row['SampleName'],
                                            row['Days passed'].days,
                                            direction + ' Middle slice'))
            plt.axis('off')
            plt.savefig(outfilepath,
                        transparent=True,
                        bbox_inches='tight')
        plt.show()

In [None]:
# Read or calculate the directional MIPs, put them into the dataframe and save them to disk
for d, direction in enumerate(directions):
    Data['MIP_' + direction] = [None] * len(Reconstructions)
for c, row in tqdm(Data.iterrows(), desc='MIPs', total=len(Data), leave=False):
    for d, direction in tqdm(enumerate(directions),
                             desc='%s/%s' % (row['Sample'], row['Scan']),
                             leave=False,
                             total=len(directions)):
        outfilepath = os.path.join(os.path.dirname(row['Folder']),
                                   '%s.%s.MIP.%s.png' % (row['Sample'],
                                                      row['Scan'],
                                                      direction))
        if os.path.exists(outfilepath):
            Data.at[c, 'MIP_' + direction] = dask_image.imread.imread(outfilepath).squeeze()
        else:
            # Generate MIP
            Data.at[c, 'MIP_' + direction] = Reconstructions[c].max(axis=d).compute().squeeze()
            # Save it out
            imageio.imwrite(outfilepath, Data.at[c, 'MIP_' + direction].astype('uint8'))

In [None]:
# Show MIP slices
for c, row in tqdm(Data.iterrows(),
                   desc='Saving MIP images overview',
                   total=len(Data),
                   leave=False):
    outfilepath = os.path.join(os.path.dirname(row['Folder']),
                               '%s.%s.MIPs.png' % (row['Sample'], row['Scan']))
    if not os.path.exists(outfilepath):    
        for d, direction in tqdm(enumerate(directions),
                                          desc='%s/%s' % (row['Sample'], row['Scan']),
                                          leave=False,
                                          total=len(directions)):
            plt.subplot(1, 3, d + 1)
            plt.imshow(row['MIP_' + direction].squeeze())
            plt.gca().add_artist(ScaleBar(row['Voxelsize'],
                                          'um'))
            plt.title('%s at day %s\n%s' % (row['SampleName'],
                                            row['Days passed'].days,
                                            direction + ' MIP'))
            plt.axis('off')
        plt.savefig(outfilepath,
                    transparent=True,
                    bbox_inches='tight')
        plt.show()

In [None]:
for direction in directions:
    # Show each directional middle slice, concatenated
    # Concatenate figures with GridSpec
    # https://matplotlib.org/stable/gallery/subplots_axes_and_figures/gridspec_multicolumn.html#sphx-glr-gallery-subplots-axes-and-figures-gridspec-multicolumn-py
    # Set up figure
    fig = plt.figure(figsize=(len(Data)*3,3), constrained_layout=False)
    gs = GridSpec(1,len(Data), figure=fig, wspace=0, hspace=0)
    # plot the selected images
    for c,row in Data.iterrows():
        fig.add_subplot(gs[0, c])
        plt.imshow(row['Mid_%s' % direction].squeeze())
        plt.gca().add_artist(ScaleBar(row['Voxelsize'],'um'))
        plt.title('%s at day %s\n%s' % (row['SampleName'], row['Days passed'].days,
                                        direction + ' Middle slice'))
        plt.axis('off')
    plt.savefig(os.path.join(Root, 'Mouse.Mid_%s.png' % direction),
                transparent=False,
                bbox_inches='tight')
    plt.show()

In [None]:
for direction in directions:
    # Show each directional MIP, concatenated
    fig = plt.figure(figsize=(len(Data)*3,3), constrained_layout=False)
    gs = GridSpec(1,len(Data), figure=fig, wspace=0, hspace=0)
    # plot the selected images
    for c,row in Data.iterrows():
        fig.add_subplot(gs[0, c])
        plt.imshow(row['MIP_%s' % direction].squeeze())
        plt.gca().add_artist(ScaleBar(row['Voxelsize'],'um'))
        plt.title('%s at day %s\n%s' % (row['SampleName'],
                                        row['Days passed'].days,
                                        direction + ' MIP'))
        plt.axis('off')
    plt.savefig(os.path.join(Root, 'Mouse.MIP_%s.png' % direction),
                transparent=False,
                bbox_inches='tight')
    plt.show()

In [None]:
# Show one slice of each of the scans, concatenated
iteration = 300
for whichslice in range(iteration, Data['Number of reconstructions'][0] - iteration, iteration):
    # Concatenate figures with GridSpec
    # https://matplotlib.org/stable/gallery/subplots_axes_and_figures/gridspec_multicolumn.html#sphx-glr-gallery-subplots-axes-and-figures-gridspec-multicolumn-py
    from matplotlib.gridspec import GridSpec
    # Set up figure
    fig = plt.figure(figsize=(len(Data)*3,3), constrained_layout=False)
    gs = GridSpec(1,len(Data), figure=fig, wspace=0, hspace=0)
    # plot the selected images
    for c,row in Data.iterrows():
        fig.add_subplot(gs[0, c])
        plt.imshow(Reconstructions[c][whichslice])
        plt.gca().add_artist(ScaleBar(row['Voxelsize'],'um'))
        plt.title('%s at day %s\nReconstruction %s' % (row['SampleName'],
                                                       row['Days passed'].days,
                                                       whichslice))
        plt.axis('off')
    plt.savefig(os.path.join(Root, 'Mouse.Slice%04d.png' % whichslice),
                transparent=False,
                bbox_inches='tight')
    plt.show()

In [None]:
mipseries = 

In [None]:
# Write out GIF of progression
for direction in directions:
    # Concatenate directional MIPs into a GIF
    # Since we `dask`ed the images, we first need to compute them...
    mipseries = [mip.compute() for mip in Data['MIP_%s' % direction]]
    imageio.mimwrite('Animation.MIP.%s.gif' % direction, mipseries, format= '.gif', fps = 1)
    # Concatenate directional middle images into a GIF
    # Since we `dask`ed the images, we first need to compute them...
    midseries = [mid.compute() for mid in Data['Mid_%s' % direction]]
    imageio.mimwrite('Animation.Middle.%s.gif' % direction, midseries, format= '.gif', fps = 1)

In [None]:
# Calculate the histograms of one of the MIPs (or reconstructions, depending on which line we comment)
# Caveat: dask.da.histogram returns histogram AND bins, making each histogram a 'nested' list of [h, b]
Data['Histogram'] = [dask.array.histogram(dask.array.array(mip.squeeze()),
                                          bins=2**8,
                                          range=[0, 2**8]) for mip in Data['MIP_Coronal']]  
# Data['Histogram'] = [dask.array.histogram(rec,
#                                           bins=2**8,
#                                           range=[0, 2**8]) for rec in Reconstructions]
# Actually compute the data and put only h into the dataframe, since we use it quite often below.
# Discard the bins
Data['Histogram'] = [h.compute() for h,b in Data['Histogram']] 

In [None]:
for c, row in sorted(Data.iterrows()):
    plt.semilogy(row.Histogram, label='%s/%s' % (row.Sample, row.Scan))
plt.xlim([0, 255])
plt.legend()
plt.show()

In [None]:
def overeexposecheck(item, threshold=222, howmanypercent=0.1, whichone='Coronal', verbose=False):
    '''Function to check if a certain amount of voxels are brighter than a certain value'''
    if (Data['MIP_%s' % whichone][item]>threshold).sum() > (Data['MIP_%s' % whichone][item].size * howmanypercent / 100):
        if verbose:
            plt.imshow(Data['MIP_%s' % whichone][item].squeeze())
            plt.imshow(numpy.ma.masked_equal(Data['MIP_%s' % whichone][item].squeeze()>threshold, False),
                       cmap='viridis_r',
                       alpha=.618)
            plt.title('%s/%s\n%s px of %s Mpixels (>%s%%) are brighter '
                      'than %s' % (Data['Sample'][item],
                                   Data['Scan'][item],
                                   (Data['MIP_%s' % whichone][item]>threshold).sum().compute(),
                                   round(1e-6 * Data['MIP_%s' % whichone][item].size,2),
                                   howmanypercent,
                                   threshold))
            plt.axis('off')
            plt.gca().add_artist(ScaleBar(Data['Voxelsize'][item],
                                          'um'))
            plt.show()
        return(True)
    else:
        return(False)    

In [None]:
# Check if 'too much' of the MIP is overexposed
# TODO: How much is 'too much'?
Data['OverExposed'] = [overeexposecheck(c,
                                        whichone='Coronal',
                                        verbose=True) for c, row in Data.iterrows()]

In [None]:
print('At the moment, we have previewed %s *Mouse* scans of %s samples in %s' % (len(Data),
                                                                                 len(Data.Sample.unique()),
                                                                                 Root))