# Handle the 'data' of the fishes
Wrestle with our data and Mikkis XLS sheet

In [1]:
import platform
import os
import glob
import pandas
import imageio
import numpy
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar
import seaborn
import dask
import dask_image.imread
from dask.distributed import Client, LocalCluster
from numcodecs import Blosc
import skimage
from tqdm import notebook

In [2]:
# Set dask temporary folder
# Do this before creating a client: https://stackoverflow.com/a/62804525/323100
import tempfile
if 'Linux' in platform.system():
    tmp = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
elif 'Darwin' in platform.system():
    tmp = tempfile.gettempdir()
else:
    if 'anaklin' in platform.node():
        tmp = os.path.join('F:\\')
    else:
        tmp = os.path.join('D:\\')
dask.config.set({'temporary_directory': os.path.join(tmp, 'tmp')})
print('Dask temporarry files go to %s' % dask.config.get('temporary_directory'))

Dask temporarry files go to D:\tmp


In [3]:
# Start cluster and client now, after setting tempdir
cluster = LocalCluster(n_workers=8)
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 63712 instead


In [4]:
print('You can seee what DASK is doing at "http://localhost:%s/status"' % client.scheduler_info()['services']['dashboard'])

You can seee what DASK is doing at "http://localhost:63712/status"


In [5]:
# # Ignore warnings in the notebook
# import warnings
# warnings.filterwarnings("ignore")

In [6]:
# Set up figure defaults
plt.rc('image', cmap='gray', interpolation='nearest')  # Display all images in b&w and with 'nearest' interpolation
plt.rcParams['figure.figsize'] = (16, 9)  # Size up figures a bit
plt.rcParams['figure.dpi'] = 200

In [7]:
# Setup scale bar defaults
plt.rcParams['scalebar.location'] = 'lower right'
plt.rcParams['scalebar.frameon'] = False
plt.rcParams['scalebar.color'] = 'white'

In [8]:
# Display all plots identically
lines = 3
# And then do something like
# plt.subplot(lines, numpy.ceil(len(Data) / float(lines)), c + 1)

In [9]:
# Different locations if running either on Linux or Windows
FastSSD = False
# to speed things up significantly
if 'Linux' in platform.system():
    if FastSSD:
        BasePath = os.path.join(os.sep, 'media', 'habi', 'Fast_SSD')
    else:
        BasePath = os.path.join(os.sep, 'home', 'habi', '1272')
elif 'Darwin' in platform.system():
    FastSSD = False
    BasePath = os.path.join('/Users/habi/Dev/EAWAG/Data')
elif 'Windows' in platform.system():
    if FastSSD:
        BasePath = os.path.join('F:\\')
    else:
        if 'anaklin' in platform.node():
            BasePath = os.path.join('S:\\')
        else:
            BasePath = os.path.join('D:\\Results')
Root = os.path.join(BasePath, 'EAWAG')
print('We are loading all the data from %s' % Root)

We are loading all the data from D:\Results\EAWAG


In [10]:
def get_pixelsize(logfile):
    """Get the pixel size from the scan log file"""
    pixelsize=None    
    with open(logfile, 'r') as f:
        for line in f:
            if 'Image Pixel' in line and 'Scaled' not in line:
                pixelsize = float(line.split('=')[1])
    return(pixelsize)

In [11]:
def get_projectionsize(logfile):
    """How big did we set the camera?"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Number Of Rows' in line:
                y = int(line.split('=')[1])
            if 'Number Of Columns' in line:
                x = int(line.split('=')[1])                
    return(x*y)

In [12]:
def get_filter(logfile):
    """Get the filter we used whole scanning from the scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Filter=' in line:
                whichfilter = line.split('=')[1].strip()
    return(whichfilter)

In [13]:
def get_exposuretime(logfile):
    """Get the exposure time size from the scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Exposure' in line:
                exposuretime = int(line.split('=')[1])
    return(exposuretime)

In [14]:
def get_ringartefact(logfile):
    """Get the ring artefact correction from the  scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Ring Artifact' in line:
                ringartefactcorrection = int(line.split('=')[1])
    return(ringartefactcorrection)

In [15]:
def get_reconstruction_grayvalue(logfile):
    grayvalue = None
    """How did we map the brightness of the reconstructions?"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Maximum for' in line:
                grayvalue = float(line.split('=')[1])
    return(grayvalue)

In [16]:
def get_beamhardening(logfile):
    """Get the beamhardening correction from the  scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Hardening' in line:
                beamhardeningcorrection = int(line.split('=')[1])
    return(beamhardeningcorrection)

In [17]:
def get_rotationstep(logfile):
    """Get the rotation step from the scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Rotation Step' in line:
                rotstep = float(line.split('=')[1])
    return(rotstep)

In [18]:
def get_frameaveraging(logfile):
    """Get the frame averaging from the scan log file"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Averaging' in line:
                avg = line.split('=')[1]
    return(avg)

In [19]:
def get_machine(logfile):
    """Get the machine we used to scan"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Scanner' in line:
                machine = line.split('=')[1].strip()
    return(machine)

In [20]:
def get_scantime(logfile):
    """How long did we scan?"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Scan duration' in line:
                time = line.split('=')[1].strip()
    return(pandas.to_timedelta(time))

In [21]:
def get_stacks(logfile):
    """How many stacks/connected scans did we make?"""
    stacks = 1
    with open(logfile, 'r') as f:
        for line in f:
            if 'conn' in line:
                stacks = int(line.split('=')[1])
    return(stacks)

In [22]:
def get_scandate(logfile, verbose=False):
    """When did we scan the fish?"""
    with open(logfile, 'r') as f:
        for line in f:
            if 'Study Date and Time' in line:
                if verbose:
                    print('Found "date" line: %s' % line.strip())
                datestring = line.split('=')[1].strip().replace('  ', ' ')
                if verbose:
                    print('The date string is: %s' % datestring)
                date = pandas.to_datetime(datestring , format='%d %b %Y %Hh:%Mm:%Ss')
                if verbose:
                    print('Parsed to: %s' % date)
                (date)
    return(date.isoformat())

In [124]:
def get_git_hash():
    '''
    Get the current git hash from the repository.
    Based on http://stackoverflow.com/a/949391/323100 and
    http://stackoverflow.com/a/18283905/323100
    '''
    from subprocess import Popen, PIPE
    import os
    gitprocess = Popen(['git',
                        '--git-dir',
                        os.path.join(os.getcwd(), '.git'),
                        'rev-parse',
                        '--short',
                        '--verify',
                        'HEAD'],
                       stdout=PIPE)
    (output, _) = gitprocess.communicate()
    return output.strip().decode("utf-8")

In [125]:
# # Make directory for output
# OutPutDir = os.path.join(os.getcwd(), 'Output', get_git_hash())
# print('We are saving all the output to %s' % OutPutDir)
# os.makedirs(OutPutDir, exist_ok=True)

In [126]:
# Make us a dataframe for saving all that we need
Data = pandas.DataFrame()

In [127]:
# Get *all* log files
# Sort them by time, not name
Data['LogFile'] = [f for f in sorted(glob.glob(os.path.join(Root, '**', '*.log'), recursive=True), key=os.path.getmtime)]

In [128]:
# Get all folders
Data['Folder'] = [os.path.dirname(f) for f in Data['LogFile']]

In [129]:
# Check for samples which are not yet reconstructed
for c, row in Data.iterrows():
    # Iterate over every 'proj' folder
    if 'proj' in row.Folder:
        if not 'TScopy' in row.Folder and not 'PR' in row.Folder:
            # If there's nothing with 'rec*' on the same level, then tell us        
            if not glob.glob(row.Folder.replace('proj', 'rec')):
                print(glob.glob(row.Folder.replace('proj', 'rec')))
                print('- %s is missing matching reconstructions' % row.LogFile[len(Root)+1:])

[]
- 105005_104015\proj\105005_104015~00.log is missing matching reconstructions
[]
- 105005_104015\proj\105005_104015~01.log is missing matching reconstructions
[]
- 105005_104015\proj\105005_104015~02.log is missing matching reconstructions
[]
- 105005_104015\proj\105005_104015~03.log is missing matching reconstructions
[]
- 105005_104015\proj\105005_104015~04.log is missing matching reconstructions
[]
- 105005_104015\proj\105005_104015.log is missing matching reconstructions
[]
- 104671_156645\proj\104671_156645~00.log is missing matching reconstructions
[]
- 104671_156645\proj\104671_156645~01.log is missing matching reconstructions
[]
- 104671_156645\proj\104671_156645~02.log is missing matching reconstructions
[]
- 104671_156645\proj\104671_156645.log is missing matching reconstructions


In [130]:
# Get rid of all non-rec logfiles
for c, row in Data.iterrows():
    if 'rec' not in row.Folder:
        Data.drop([c], inplace=True)
    elif 'rectmp.log' in row.LogFile:
        Data.drop([c], inplace=True)
# Reset dataframe to something that we would get if we only would have loaded the 'rec' files
Data = Data.reset_index(drop=True)

In [131]:
# Generate us some meaningful colums
Data['Fish'] = [l[len(Root)+1:].split(os.sep)[0] for l in Data['LogFile']]
Data['Scan'] = ['_'.join(l[len(Root)+1:].split(os.sep)[1:-1]) for l in Data['LogFile']]

In [132]:
# Get the file names of the reconstructions
Data['Reconstructions'] = [sorted(glob.glob(os.path.join(f, '*rec0*.png'))) for f in Data['Folder']]
Data['Number of reconstructions'] = [len(r) for r in Data.Reconstructions]

In [133]:
# Drop samples which have not been reconstructed yet
# Based on https://stackoverflow.com/a/13851602
# for c,row in Data.iterrows():
#     if not row['Number of reconstructions']:
#         print('%s contains no PNG files, we might be currently reconstructing it' % row.Folder)
Data = Data[Data['Number of reconstructions'] > 0]
Data.reset_index(drop=True, inplace=True)
print('We have %s folders with reconstructions' % (len(Data)))

We have 108 folders with reconstructions


In [134]:
# Get parameters to doublecheck from logfiles
Data['Voxelsize'] = [get_pixelsize(log) for log in Data['LogFile']]
Data['Filter'] = [get_filter(log) for log in Data['LogFile']]
Data['Exposuretime'] = [get_exposuretime(log) for log in Data['LogFile']]
Data['Scanner'] = [get_machine(log) for log in Data['LogFile']]
Data['Averaging'] = [get_frameaveraging(log) for log in Data['LogFile']]
Data['ProjectionSize'] = [get_projectionsize(log) for log in Data['LogFile']]
Data['RotationStep'] = [get_rotationstep(log) for log in Data['LogFile']]
Data['CameraWindow'] = [round((ps ** 0.5)/100)*100  for ps in Data['ProjectionSize']]
Data['Grayvalue'] = [get_reconstruction_grayvalue(log) for log in Data['LogFile']]
Data['RingartefactCorrection'] = [get_ringartefact(log) for log in Data['LogFile']]
Data['BeamHardeningCorrection'] = [get_beamhardening(log) for log in Data['LogFile']]
Data['Scan date'] = [get_scandate(log) for log in Data['LogFile']]
Data['Scan time'] = [get_scantime(log) for log in Data['LogFile']]
Data['Stacks'] = [get_stacks(log) for log in Data['LogFile']]

In [135]:
Data['Scan time total'] = [ st * stk  for st, stk in zip(Data['Scan time'], Data['Stacks'])]

In [136]:
# Generate a text file for each rec-folder, in which we can note what's going on with the fish
# Generate filename
for c,row in Data.iterrows():
    Data.at[c, 'commentsfile'] = os.path.join(os.path.dirname(row.Folder),
                                              row.Fish + '.' + row.Scan + '.md')
# Create actual file on disk
for c,row in Data.iterrows():
    # Only do this if the file does not already exist
    if not os.path.exists(row.commentsfile):
        with open(row.commentsfile, 'w', encoding='utf-8') as f:
            f.write('# Fish %s, Scan %s\n\n' % (row.Fish, row.Scan))
            f.write('This fish was scanned on %s on the %s, with a voxel size of %s μm.\n\n'
                    % (row['Scan date'], row.Scanner, numpy.round(row.Voxelsize, 2)))
            f.write('## Comments')

In [137]:
# # https://www.geeksforgeeks.org/iterating-over-rows-and-columns-in-pandas-dataframe/
# columns = list(Data)
# columns.remove('Folder') 
# columns.remove('Fish')
# columns.remove('LogFile')
# columns.remove('Reconstructions')
# columns.remove('Number of reconstructions')
# columns.remove('Grayvalue')
# columns.remove('Scan time')
# columns.remove('Scan time total')
# columns.remove('Scan date')
# print(columns)
# for col in columns:
#     print(col)
#     print(Data[col].unique())
#     print(80*'-')    

In [138]:
# # Check voxel sizes (*rounded* to two after-comma values)
# # If different, spit out which values
# roundto = 2
# if len(Data['Voxelsize'].round(roundto).unique()) > 1:
#     print('We scanned all datasets with %s different voxel sizes' % len(Data['Voxelsize'].round(roundto).unique()))
#     for vs in sorted(Data['Voxelsize'].round(roundto).unique()):
#         print('-', vs, 'um for ', end='')
#         for c, row in Data.iterrows():
#             if float(vs) == round(row['Voxelsize'], roundto):
#                 print(os.path.join(row['Fish'], row['Scan']), end=', ')
#         print('')
# else:
#     print('We scanned all datasets with equal voxel size, namely %s um.' % float(Data['Voxelsize'].round(roundto).unique()))

In [139]:
# if len(Data['Grayvalue'].unique()) > 1:
#     print('We reconstructed the datasets with different maximum gray values, namely')
#     for gv in Data['Grayvalue'].unique():
#         print(gv, 'for Samples ', end='')
#         for c, row in Data.iterrows():
#             if float(gv) == row['Grayvalue']:
#                 print(os.path.join(row['Fish'], row['Scan']), end=', ')
#         print('')
# else:
#     print('We reconstructed all datasets with equal maximum gray value, namely %s.' % Data['Grayvalue'].unique()[0])

In [140]:
# Data[['Fish', 'Scan',
#       'Voxelsize', 'Scanner',
#       'Scan date', 'CameraWindow', 'RotationStep', 'Averaging',
#       'Scan time', 'Stacks', 'Scan time total']]

In [141]:
# Get an overview over the total scan time
# Nice output based on https://stackoverflow.com/a/8907407/323100
total_seconds = int(Data['Scan time total'].sum().total_seconds())
hours, remainder = divmod(total_seconds,60*60)
minutes, seconds = divmod(remainder,60)
print('In total, we scanned for %s hours and %s minutes)' % (hours, minutes))
for machine in Data['Scanner'].unique():
    total_seconds = int(Data[Data['Scanner'] == machine]['Scan time total'].sum().total_seconds())
    hours, remainder = divmod(total_seconds,60*60)
    minutes, seconds = divmod(remainder,60)
    print('\t - Of these, we scanned %s hours and %s minutes on the %s,'
          'for %s scans' % (hours,
                            minutes,
                            machine,
                            len(Data[Data['Scanner'] == machine])))

In total, we scanned for 360 hours and 12 minutes)
	 - Of these, we scanned 266 hours and 41 minutes on the SkyScan2214,for 101 scans
	 - Of these, we scanned 93 hours and 31 minutes on the SkyScan1272,for 7 scans


In [142]:
Data[['Fish', 'Scan',
      'Voxelsize', 'Scanner',
      'Scan date', 'CameraWindow', 'RotationStep', 'Averaging', 'Scan time', 'Stacks' ]].to_excel('Details.xlsx')

In [143]:
Data[['Fish', 'Scan',
      'Voxelsize', 'Scanner',
      'Scan date', 'CameraWindow',
      'RotationStep', 'Averaging', 'Scan time', 'Stacks' ]].to_excel(os.path.join(Root,'Details.xlsx'))

In [144]:
# Read Mikkis datafile
MikkisFile = sorted(glob.glob(os.path.join(Root, 'X_ArchiveFiles', '*CTscanFishList.xlsx')))[0]
# Read excel file and use the first column as index
print('Reading in %s' % MikkisFile)
DataMikki = pandas.read_excel(MikkisFile)

Reading in D:\Results\EAWAG\17.11.2021_CTscanFishList.xlsx


In [145]:
DataMikki.head()

Unnamed: 0,Fishec,FieldID,OtherID,ReplacementID,Length(cm),TemporaryJar,Genus,Species,Ecology,Scan date,...,UpperOralJaw,LowerOralJaw,PharyngealJawScan,UpperPharyngealJaw,LowerPharyngealJaw,ScanComments,QualityChecked,Unnamed: 19,SpecimenReturned,Comments
0,103635,,,,< 7,< 7cm,"""Astatotilapia""",nubila swamp blue,insectivore,2021-02-08T14:24:12,...,,,yes,,,"2-3 inner rows of tricuspid teeth, low meandep...",,,,
1,103658,,,,8,Mark4,"""Astatotilapia""",nubila swamp red,insectivore,2021-04-23T10:36:23,...,load and enter info,,yes,load and enter info,,2 inner rows of tricuspid teeth,,,,
2,11500,,,,14,Mark4,"""Astatotilapia""",nubila rocks,insectivore,,...,load and enter info,,yes,load and enter info,,1-2 inner rows of tricuspid teeth,,,,
3,84713,63,,need replacement?,,,"""Astatotilapia""",velifer,insectivore,,...,,,,,,,,,,
4,81022,KC-31,,,,LE,"""Harpagochromis""",squamipinnis,piscivore,,...,load and enter info,,yes,load and enter info,,"inner rows uni, bi? and tricuspid teeth",,,,


In [430]:
# Find the fish we look at and display all the info we know about it
# Set a substring you're looking for to the variable below
# In which jar can we find it?
fish = 'MA3'

In [431]:
# Do we have something from this fish on disk?
ondisk = glob.glob(os.path.join(Root, '*%s*' % fish))
if len(ondisk):
    for found in ondisk:
        print('*%s*: Found on disk in %s' % (fish, found))
        foundondisk = 1
else:
    print('*%s*: Nothing found in %s' % (fish, Root))

*MA3*: Found on disk in D:\Results\EAWAG\MA31
*MA3*: Found on disk in D:\Results\EAWAG\MA38


In [432]:
# Did we scan it already?
found = 0
for c, row in Data.iterrows():
    if fish in row.Fish:
        print('*%s*: Sample %s/%s was scanned on %s' % (fish, row['Fish'], row['Scan'], row['Scan date']))
        found = 1
if not found:
    if foundondisk:
        print('*%s*: We have a folder (%s) for this sample, but nothing in the dataframe, so it probably is all good' % (fish, ondisk[0]))
        print('Check the folder to be shure')
    else:
        print('*%s*: Nothing about this sample is found in our dataframe' % fish)

*MA3*: Sample MA31/rec_rescan was scanned on 2021-08-20T14:28:47
*MA3*: Sample MA38/rec was scanned on 2021-09-06T11:35:24
*MA3*: Sample MA31/rec_rescan_rereconstruct_OJ was scanned on 2021-08-20T14:28:47
*MA3*: Sample MA31/head_rec was scanned on 2021-10-19T11:15:30


In [433]:
# Can we find it in FullHeadList.txt?
def findinFullHeadList(sample):
    ''' Look for the sample in the FullHeadList.txt file'''
    fullheadlist = glob.glob(os.path.join(Root, 'FullHeadList.*'))[0]    
    found = 0
    with open(fullheadlist, 'r') as f:
        for line in f:
            if str(sample) in line:
                print(line.strip())
                found = 1
    if not found:
        return('*%s*: Nothing found in %s' % (sample, fullheadlist))
    else:
        return(None)
findinFullHeadList(fish)

'*MA3*: Nothing found in D:\\Results\\EAWAG\\FullHeadList.txt'

In [434]:
# In which jar should it be/go?
foundfishes = 0
for d, row in DataMikki.iterrows():
    if (str(fish).lower() in str(row.Fishec).lower()) or \
    (str(fish).lower() in str(row.FieldID).lower()) or \
    (str(fish).lower() in str(row.OtherID).lower()) or \
    (str(fish).lower() in str(row.ReplacementID).lower()):
        foundfishes = (row.Fishec, row.FieldID, row.OtherID, row.ReplacementID)
        # remove nan from the list of hits
        foundfishes = [str(x).lower() for x in foundfishes if pandas.isnull(x) == False]
        print('*%s*: A fish called ' % fish, end='')        
        if len(foundfishes) > 1:
            for found in foundfishes:
                print(found.upper(), end='/')
        else:
            print(foundfishes[0].upper(), end='')
        print(' should be found in jar "length=%s cm" (%s))' % (row['Length(cm)'],
                                                                row['TemporaryJar']))
if not foundfishes:
    print('*%s*: Nothing found in %s' % (fish, MikkisFile))

*MA3*: A fish called 14175/MA38/80351/ should be found in jar "length=10.5 cm" (Mark1))
*MA3*: A fish called 80344/MA31/ should be found in jar "length=11 cm" (Mark6))


In [62]:
Data[Data['Fish']=='IG94']['Scan date']

103    2021-11-29T20:27:08
Name: Scan date, dtype: object

In [50]:
# Find fishes to be rescanned, e.g. 'grep' through the comments files.
for c, row in Data.iterrows():
    with open(row.commentsfile, 'r', encoding='utf-8') as f:
        for line in f:
            if 'resc' in line and 'rec_rescan' not in line:
                print(row.Fish, 'needs to be rescanned')
                print('\t- We comment: %s' % line)
                for d, rowmikki in DataMikki.iterrows():
                    if (str(row.Fish).lower() in str(rowmikki.Fishec).lower()) or \
                        (str(row.Fish).lower() in str(rowmikki.FieldID).lower()) or \
                        (str(row.Fish).lower() in str(rowmikki.OtherID).lower()) or \
                        (str(row.Fish).lower() in str(rowmikki.ReplacementID).lower()):
                        foundfishes = (rowmikki.Fishec, rowmikki.FieldID, rowmikki.OtherID, rowmikki.ReplacementID)
                        # remove nan from the list of hits
                        foundfishes = [str(x).lower() for x in foundfishes if pandas.isnull(x) == False]
                        print('\t- ', end='')
                        if len(foundfishes) > 1:
                            for found in foundfishes[:-1]:
                                print(found.upper(), end='/')
                        print('%s should be found in jar "length=%s cm" (%s))' % (foundfishes[-1].upper(),
                                                                                  rowmikki['Length(cm)'],
                                                                                  rowmikki['TemporaryJar']))
                print(80*'-')

11965 needs to be rescanned
	- We comment: Oral jaw has artifacts in segmentation, can we try re reconstruction on the rescan - 21.09.2021, ML

	- 11965 should be found in jar "length=13.5 cm" (Mark2))
--------------------------------------------------------------------------------
MA31 needs to be rescanned
	- We comment: The rescan shows artifacts on oral jaw teeth which stick them together - QualityCheck png. 
	- 80344/MA31 should be found in jar "length=11 cm" (Mark6))
--------------------------------------------------------------------------------
14269 needs to be rescanned
	- We comment: Re-reconstructed, to alleviate artefacts, but since 'very tip (of PJ) is not complete' in scan, we might need to rescan it.

	- 14269 should be found in jar "length=6.5 cm" (nan))
--------------------------------------------------------------------------------
14269 needs to be rescanned
	- We comment: 13.10.2021: I misread PJ as OJ, so re-reconstruction did not help :( We will have to rescan th

In [49]:
# Find fishes to be rescanned, e.g. 'grep' through the comments files.
for c,row in Data.iterrows():
    with open(row.commentsfile, 'r', encoding='utf-8') as f:
        for line in f:
            if 'recons' in line and 'rec_rerecon' not in line:
                print('Fish', row.Fish, 'needs to be re-reconstructed, because in its commentfile we write "%s"' % line)

Fish 11965 needs to be re-reconstructed, because in its commentfile we write "Oral jaw has artifacts in segmentation, can we try re reconstruction on the rescan - 21.09.2021, ML
"
Fish IG156 needs to be re-reconstructed, because in its commentfile we write "26.10.2021 - re reconstruction OJ artifacts - quality check segmentation file uploaded"
Fish 14269 needs to be re-reconstructed, because in its commentfile we write "Re-reconstructed, to alleviate artefacts, but since 'very tip (of PJ) is not complete' in scan, we might need to rescan it.
"
Fish 14269 needs to be re-reconstructed, because in its commentfile we write "13.10.2021: I misread PJ as OJ, so re-reconstruction did not help :( We will have to rescan this fish."
Fish TS03 needs to be re-reconstructed, because in its commentfile we write "Really blurred oral jaw - reconstruct or best case rescan at lower voxel size if possible
"
Fish ZU12 needs to be re-reconstructed, because in its commentfile we write "12.10.2021 - OJ bright

In [50]:
# Find fishes which need to be discussed in detail
for c,row in Data.iterrows():
    with open(row.commentsfile, 'r', encoding='utf-8') as f:
        for line in f:
            if 'Mikki' in line or 'David' in line or 'discus' in line:
                print('Fish', row.Fish, 'needs to be looked at in detail, because in its commentfile we write "%s"' % line)

Fish 13420 needs to be looked at in detail, because in its commentfile we write "DH 2.11.21: This was a naming SNAFU from David's side during scanning. I renamed the files, but forgot to update the relevant part in the log file. There's an encrypted/immutable copy of the log file (*.enc), which I restored, you can see this as `12807~00_recovered.log` in which the originally wrong prefix is shown on line 15. The 'process' is shown in the Screenshots also in this folder. I renamed the files now correctly to a prefix of 13420.
"
Fish 312 needs to be looked at in detail, because in its commentfile we write "We figured this out on 19.10.2021, the fish has a huge '312' label and a much smaller, barely readable label, which Mikki will update in the encompassing Excel sheet. 21.10.2021 DH"
Fish 11807 needs to be looked at in detail, because in its commentfile we write "- Mikki and David need to discuss this in detail."
Fish 10619 needs to be looked at in detail, because in its commentfile we w

In [51]:
d = 1
for c, row in Data.iterrows():
    if 'head' in row.Scan:
        print('%0.2d/60: %s has a head-scan' % (d, row.Fish))
        d+=1

01/60: 12319 has a head-scan
02/60: 12319 has a head-scan
03/60: 109188 has a head-scan
04/60: IG80 has a head-scan
05/60: 10448 has a head-scan
06/60: 11601 has a head-scan
07/60: IG104 has a head-scan
08/60: AN33 has a head-scan
09/60: 109320 has a head-scan
10/60: 10605 has a head-scan
11/60: 10619 has a head-scan
12/60: 10619 has a head-scan
13/60: 10619 has a head-scan
14/60: 103718 has a head-scan
15/60: 104671 has a head-scan
16/60: 104671 has a head-scan
17/60: 103704 has a head-scan
18/60: 11639 has a head-scan
19/60: 11807 has a head-scan
20/60: 10794 has a head-scan
21/60: 11729 has a head-scan
22/60: 11322 has a head-scan
23/60: 13393 has a head-scan
24/60: 11500 has a head-scan
25/60: 11946 has a head-scan
26/60: 11344 has a head-scan
27/60: 13492 has a head-scan
28/60: 11116 has a head-scan
29/60: BH58 has a head-scan
30/60: 109209 has a head-scan
31/60: 106816 has a head-scan
32/60: IG156 has a head-scan
33/60: IG142 has a head-scan
34/60: MA31 has a head-scan
35/60: IG1