# Notebook to Move final dcm files selected for processing to Project Folder

In [3]:
import shutil
import pickle
import pandas as pd
from l3finder.ingest import *
import os
import multiprocessing
from multiprocessing import get_context
from multiprocessing import set_start_method
from tqdm.notebook import tqdm
from investigate import *

# Custom functions
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def load_object(filename):        
    with open(filename, 'rb') as input:
        return pickle.load(input)

In [4]:
cwd = os.getcwd()
output = '/tf/pickles'
data = '/tf/data'
target = '/tf/output'
dump=1

### Open final df and reduce it to only the dump you are processing

In [5]:
df_final = load_object(os.path.join(output,'df_final.pkl'))
subjects = load_object(os.path.join(output,'subjects_final.pkl'))

In [6]:
df_final = df_final[df_final['dump']==dump]
subjects = [s for s in subjects if s.id_ in df_final['ID'].values]

print('Len of df_final for this dump:  ', len(df_final))
print('Len of subjects for this dump:  ', len(subjects))

Len of df_final for this dump:   1990
Len of subjects for this dump:   1990


# File Mover

In [8]:
file_mover = functools.partial(
        move_subject,
        df_final=df_final,
        target = target)

In [9]:
df_final = df_final[df_final['ID'].isin([s.id_ for s in subjects])]
print('Len of df: ', len(df_final))
print('Len of sub: ', len(subjects))

Len of df:  1990
Len of sub:  1990


In [None]:
if __name__=='__main__':
    # Find series images
    print("Moving valid subjects")
    
    def pool_mover(pool, func, candidates):
        return [a for a in tqdm(pool.imap_unordered(func, candidates),total=len(candidates))]
        
    print('Filtering series using ', 10, ' cores:')
        
    with get_context("spawn").Pool(processes=10) as p:
        result_list = pool_mover(p, file_mover,subjects)
        p.close()
        p.join()
        
    df_final['Processed'] = result_list

Moving valid subjects
Filtering series using  10  cores:


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1990.0), HTML(value='')))

In [None]:
display(df_final.head(10))

In [None]:
save_object(df_final,os.path.join(output,'df_final_dump1_processed.pkl'))