In [1]:
import os
import sys
import glob
import re
from pathlib import Path
from difflib import SequenceMatcher
import shutil
import pandas as pd
import numpy as np

import subprocess

In [2]:
# From ...PyAnalysis/Utilities/Utilities.py
#--------------------------------------------------------------------
# PROBABLY CREATE UTILITIES_FILES OR SIMILAR AND PUT FOLLOWING THERE
#--------------------------------------------------------------------
#NOTE: when using glob
#  if recursive is true, the pattern "**" will match any files and zero or 
#    more directories, subdirectories, and symbolic links to directories.
#    If the pattern is followed by os.sep or os.altsep then files will not match
#
#  if recursive is false, it seems that "**" is treated as "*"
#    More precisely, without recursive=True, ** would be interpreted in the normal way, 
#      with each * meaning to match any number of characters EXCLUDING \, 
#      and ** would therefore just be equivalent to *
#    In other words glob(folder+'\\**\\*.ext',recursive=False) would search for any *.ext files 
#      that are exactly one level of subdirectory down from the starting folder, but not in the 
#      starting folder itself or in deeper subdirectories.
#
#  In the past, I have frequently used *\**\ (e.g. base_dir\*\**\file_name_or_whatever*.csv)
#    However, it seems this will give the same result when recursive is true or false
#      when recursive = true:  *\**\ is used, and search is recursive
#      when recursive = false: *\*\ is used, which is still essentially recursive!
#
#    Therefore, I should instead be using **\ (e.g. base_dir\**\file_name_or_whatever*.csv)
#    This will give more control when using recursive = true or false
#      when recursive = true:  **\ is used, and search is recursive
#      when recursive = false: *\ is used, 
#--------------------------------------------------------------------

def get_immediate_sub_dirs(base_dir):
    immediate_sub_dirs = [os.path.join(base_dir, sub) for sub in os.listdir(base_dir) 
                          if os.path.isdir(os.path.join(base_dir, sub))]
    return immediate_sub_dirs

def get_immediate_sub_dir_names(base_dir):
    immediate_sub_dirs = [sub for sub in os.listdir(base_dir) 
                          if os.path.isdir(os.path.join(base_dir, sub))]
    return immediate_sub_dirs
    
#--------------------------------------------------------------------
def glob_find_files_with_pattern(base, pattern, recursive=True):
    search_path = os.path.join(base, pattern) 
    results = glob.glob(search_path, recursive=recursive) 
    return results


def find_all_paths(base_dir, glob_pattern, regex_pattern=None, regex_ignore_case=False, recursive=True):
    # Can only use very simple patterns with glob
    # Also, Windows doesn't seem to care about character case (i.e. case insensitive)
    paths = glob_find_files_with_pattern(base_dir, pattern=glob_pattern, recursive=recursive)
    if regex_pattern is not None:
        if regex_ignore_case:
            paths = [x for x in paths if re.search(regex_pattern, x, flags=re.IGNORECASE)]
        else:
            paths = [x for x in paths if re.search(regex_pattern, x)]
    return paths
#--------------------------------------------------------------------

def is_dir_empty(dir_path):
    assert(os.path.exists(dir_path) and os.path.isdir(dir_path))
    contents = os.listdir(dir_path)
    return True if len(contents)==0 else False

#--------------------------------------------------------------------
def delete_dir(dir_path):
    # os.rmdir only works for empty directories
    # This will work for all
    shutil.rmtree(dir_path)
    
def clear_all_dir_contents(dir_path):
    # clears everything in a directory while keeping the directory
    # NOTE: DELETES FILES AND SUB-DIRECTORIES!
    delete_dir(dir_path)
    os.mkdir(dir_path)

#--------------------------------------------------------------------

In [None]:
def move_file(file_path, destDir, copyFile=False):
    file_name = os.path.basename(file_path)
    if(not os.path.exists(destDir)):
        os.mkdir(destDir)
    dest_path = os.path.join(destDir, file_name)
    if copyFile:
        shutil.copyfile(file_path, dest_path)
    else:
        os.replace(file_path, dest_path)
        
def get_all_converted_images(image_dir_orig, images_orig):
    images_cvrt = []
    for img_orig in images_orig:
        img_stem_orig = Path(img_orig).stem
        #img_stem_orig = os.path.splitext(Path(img_orig).name)[0]

        regex_pattern = r'_cols_\d*_rows_\d*_slices_\d*.raw'
        img_cvrt = find_all_paths(image_dir_orig, r'{}*.raw'.format(img_stem_orig), 
                                  regex_pattern=regex_pattern, regex_ignore_case=False, recursive=True)
        assert(len(img_cvrt)==1)
        img_cvrt = img_cvrt[0]
        images_cvrt.append(img_cvrt)
    n_images_cvrt = len(images_cvrt)
    assert(n_images_orig==n_images_cvrt)
    return images_cvrt

def move_all_converted_images(image_dir_orig, images_orig, image_dir_cvrt, copyFiles=False):
    images_cvrt = get_all_converted_images(image_dir_orig, images_orig)
    #-----
    for img_cvrt in images_cvrt:
        move_file(img_cvrt, image_dir_cvrt, copyFiles)

In [None]:
# I have reproduced the text in Usage.txt (in current directory) below:


# Bag2raw utility extracts raw volume data from an RTT bag image set

# 2018-11-20 - prepared distribution folder and tested on Windows 8.1 Pro

# The utility takes the name of a bag file as a command line argument
# There are no other command line options
# The volume file .vol must be in the same folder as the bag file


# Usage example:
# 1. use Bag2Raw to convert the RTT volume to a raw volume
#    a. start a Windows Command Prompt
#    b. browse to folder with Bag2Raw, for ex. release folder or copy in C:\RTT\Bag2Raw
#    c. Bag2Raw C:/bags/0090022209.bag
#    d. output in same folder as bag/vol file: 0090022209_cols_448_rows_236_slices_832.raw

# The .raw file can then be imported in an analysis/visualization tool such as ImageJ, Slicer, ParaView

In [None]:
# The above description in Usage.txt is nice if you're only converting a handful of images.
# However, when you want to convert an entire directory this is not terribly useful.
# One could create a .bat file containing a list of all the files to be converted.
# Creating this by hand would also be time consuming, but the contents of such a file would look like:
# Example ConvertCmd.bat:
#   Bag2Raw "//milky-way/projects/BACOArchive/Waiting_For_Review/VIQ_WIP/VIQ_Tray/RTT_20210714/Original_Images/2021-02-16/0090024587.bag"
#   Bag2Raw "//milky-way/projects/BACOArchive/Waiting_For_Review/VIQ_WIP/VIQ_Tray/RTT_20210714/Original_Images/2021-02-16/0090024588.bag"
#   ...
#   Bag2Raw "//milky-way/projects/BACOArchive/Waiting_For_Review/VIQ_WIP/VIQ_Tray/RTT_20210714/Original_Images/2021-02-16/2222222227.bag"
#
# The script below essentially automates the process for creating and running such a .bat file, given a directory
# of images (image_dir_orig below).  It goes one step further in that it will move the processed images to another
# directory of the user's choice (image_dir_cvrt below).  The default behavior of the Bag2Raw function is to place
# the processed images in the same directory as the original, so this additional step of moving the processed images
# is very useful.
#
#  BE CAREFULE:  One the ConvertCmd.bat file is created, simply double clicking it in your file explorer will cause
#                it to be run!  This will convert all the images, but will leave them in the orignal directory,
#                as the added functionality of moving the processed files only lives here!

In [None]:
bag2RawDir = os.getcwd()

write_convert_cmd_file = True
run_convert_cmd_file = False
move_all_converted = False
if run_convert_cmd_file:
    write_convert_cmd_file=True

#image_dir_orig = r'C:\Users\BUXTONJ\Documents\Analysis\LocalDataStorage\VIQ_Tray\Batch_20210714\2021-02-16\Original_Images' 
#image_dir_cvrt = r'C:\Users\BUXTONJ\Documents\Analysis\LocalDataStorage\VIQ_Tray\Batch_20210714\2021-02-16\Processed_Images2'

#image_dir_orig = r'D:\RTT\2021-04-20' 
#image_dir_cvrt = r'D:\RTT\2021-04-20\Processed_Images'

image_dir_orig = r'\\milky-way\projects\BACOArchive\Waiting_For_Review\VIQ_WIP\VIQ_Tray\RTT_20210714\Original_Images\2021-02-16'
image_dir_cvrt = r'\\milky-way\projects\BACOArchive\AtlanticCity\Users\BuxtonJ\DataStorage\VIQ_Tray\PracticeConvertForConnor\Processed_Images\Unsorted'
if move_all_converted and not os.path.exists(image_dir_cvrt):
    os.makedirs(image_dir_cvrt)

pattern = r'*.bag'

images_orig = find_all_paths(image_dir_orig, glob_pattern=pattern, 
                             regex_pattern=None, regex_ignore_case=False, recursive=True)
n_images_orig = len(images_orig)
print(f'n_images_orig = {n_images_orig}')
#print(*images_orig, sep='\n')

In [None]:
if write_convert_cmd_file:
    cmdFilePath = os.path.join(bag2RawDir, 'ConvertCmd.bat')
    cmdFile = open(cmdFilePath, 'w')
    for img in images_orig:
        tmp = img.replace('\\', '/') #for whatever reason, this has to be done so I can run on BACO
        cmdFile.write(f'Bag2Raw "{tmp}"\n') #same with inclusion of ""
    cmdFile.close()
#------------------------------
if run_convert_cmd_file:
    subprocess.call([cmdFilePath]);
#------------------------------
if move_all_converted:
    move_all_converted_images(image_dir_orig, images_orig, image_dir_cvrt, copyFiles=False)
#------------------------------
# Methods to remove ConvertCmd.bat so not accidentally clicked?

In [None]:
# # Below is more specialized, so be careful when running
# # Maybe comment out move_file and just print when files will be moved first

In [None]:
# The code below is specific to the tray data delivered with dates 2021-02-16 and 2021-04-20
# Currently located here: 
#  \\milky-way\projects\BACOArchive\Waiting_For_Review\VIQ_WIP\VIQ_Tray\RTT_20210714\Original_Images\2021-02-16\
#  \\milky-way\projects\BACOArchive\Waiting_For_Review\VIQ_WIP\VIQ_Tray\RTT_20210714\Original_Images\2021-04-20\
#
# These images came with a description spreadsheet, for each image giving:
# Phantom, Scan, Tray, File ID, Date, and some other alarm/no alarm column plus a comment column (mostly unused)
# The description spreadsheet is currently located here:
#   \\milky-way\projects\BACOArchive\Waiting_For_Review\VIQ_WIP\VIQ_Tray\RTT_20210714\20210420_RVT_RTT_Batelle_Tray_Phantom_FileIDs.xlsx
#
# For post processing, we want to separate the images according to both the phantom and tray.
# As you will see in the spreadsheet, this would be time consuming and difficult to do by hand.
# The code below was developed to sort the processed images by Tray and Phantom number, and move them
# to appropriate subdirectories.

In [None]:
def find_image_file_name_from_file_id(file_id, image_file_names):
    # example of file_id from info_df: 24589
    # example of matching image_file_name from image_file_names: '0090024589_cols_832_rows_240_slices_1584.raw'
    #The line below matches the pattern and returns only the first \d* (the in in ())
    regex_pattern = r'(\d*)_cols_\d*_rows_\d*_slices_\d*.raw'
    found = [x for x in image_file_names 
             if re.findall(regex_pattern, x, flags=re.IGNORECASE)[0].endswith(str(file_id))]
    assert(len(found)<2) #Only 1 or 0 matches should be found
    if len(found)==1:
        return found[0]
    else:
        return np.nan

In [None]:
# # Sort and move processed

In [None]:
sort_and_move = False

info_path = r'D:\20210420_RVT_RTT_Batelle_Tray_Phantom_FileIDs.xlsx'
sheet_name = 'RTT'
expected_trays = ['Beumer', 'OBT', 'SBT']
#-----------------------------------------
# In the typical case, images_dir below should match image_dir_cvrt above
images_dir = r'\\milky-way\projects\BACOArchive\AtlanticCity\Users\BuxtonJ\DataStorage\VIQ_Tray\PracticeConvertForConnor\Processed_Images\Unsorted'
placement_base_dir = r'\\milky-way\projects\BACOArchive\AtlanticCity\Users\BuxtonJ\DataStorage\VIQ_Tray\PracticeConvertForConnor\Processed_Images'
image_file_names = os.listdir(images_dir)

In [None]:
info_df = pd.read_excel(info_path, sheet_name=sheet_name)
info_df.drop(columns=['Unnamed: 5', 'Unnamed: 6'], inplace=True)

In [None]:
print(f'Shape before drop NaN rows = {info_df.shape}')
# For the current case, 20210420_RVT_RTT_Batelle_Tray_Phantom_FileIDs.xlsx, there was an 
# empty column that needed to be removed, otherwise there were issues (can't remember exactly what issues
# this caused, you could comment out the removal if you are interested)
info_df['row_has_nan'] = info_df.isna().sum(axis=1)>0
rows_w_nans = info_df[info_df['row_has_nan']==True]
print(f'Number of rows with NaNs = {rows_w_nans.shape[0]}')
info_df.drop(rows_w_nans.index.tolist(), inplace=True)
info_df.drop(columns=['row_has_nan'], inplace=True)
print(f'Shape after drop NaN rows = {info_df.shape}')
#------------------------------------------------------
info_df['Phantom'] = info_df['Phantom'].astype(int)
info_df['File ID'] = info_df['File ID'].astype(int)
assert(set(info_df['Tray'].unique())==set(expected_trays))
#------------------------------------------------------
info_df['file_name'] = info_df['File ID'].apply(lambda x: find_image_file_name_from_file_id(x, image_file_names))
no_file_name_found_df = info_df[info_df['file_name'].isna()]
print(f'number of File IDs without found file_name = {no_file_name_found_df.shape[0]}')


In [None]:
if sort_and_move:
    for idx, row in info_df.iterrows():
        if pd.isnull(row['file_name']):
            continue
        full_path_orig = os.path.join(images_dir, row['file_name'])
        if not os.path.exists(full_path_orig):
            print(full_path_orig)
        assert(os.path.exists(full_path_orig))
        move_dir = os.path.join(placement_base_dir, row['Tray'], f"Phantom_{row['Phantom']}")
        assert(os.path.exists(move_dir))
        move_file(full_path_orig, move_dir, copyFile=False)