# Paligo RenameTool
The Paligo RenameToo is used created to automate the download and rename process of technical documentations published with Paligo (the CCMS system Perspectum uses). \
The development of the Paligo RenameTool starts at 2022-07-26 and currently end at 2022-09-20. 
Imaging Applications team manaul naming convention is outlined as below, MRA reference number should be added to the end of file name :
|Project Type| Naming Convention | Example | Translation|
|---|---|---|---|
|For CRO project|[company_name] [project_name] Patient Acquisition Manual for XX Scanners|Lilly I8F-MC-GPIF Patient Acquisition Manual for GE Scanners|Lilly I8F-MC-GPIF Patient Acquisition Manual for GE Scanners (Portuguese)|
|For Clinical Collaboration|[project_name] Patient Acquisition Manual for XX Scanners|NIMBLE Patient Acquisition Manual for GE Scanners|NIMBLE Patient Acquisition Manual for GE Scanners (Japanese)|

The general tasks each of the following function does include
- search through the download folder with file name that starts with 4~5 digit of number and ends with .zip
- unzip the folder and read in the pdf 
- get the file name from metadata title or the file name from the zipObj   
- replace the language suffix with full language name as per naming convention (If applicable)
- get the MRA reference number from the front page text
- add MRA reference number to the file name
- save to new PDF at the output directory selected by the user

In [2]:
# Import required Python libraries
import os
from datetime import datetime
import PyPDF2
from zipfile import ZipFile
from PyPDF2 import PdfReader 
import shutil
import webbrowser

In [25]:
today = datetime.today().strftime('%Y/%m/%d')
print('Last Update:', today)

Last Update: 2022/09/28


In [5]:
## Change working directory
download_PATH = '/Users/yi-chunwang/Downloads'
save_PATH = '/Users/yi-chunwang/Downloads'
os.getcwd()

'/Users/yi-chunwang/Downloads'

## Build a function to choose where to save the output file

In [3]:
# Let user choose download folder because manual output from Paligo are default saved on Dowload folder
def choose_download_PATH():
    global download_PATH
    download_PATH = input('Define your Download folder path')
    return download_PATH

# Let user choose a folder to save renamed files. It can be the same as the Download folder
def output_folder():
    global save_PATH
    save_PATH = input('Choose a folder/directory to save the output (as path)')
    while not os.path.exists(save_PATH):
        print('invalid path')
        save_PATH = input('Choose a folder/directory to save the output(as path)')
    return save_PATH


In [4]:
# run choose_download_PATH() function
choose_download_PATH()

'/Users/yi-chunwang/Downloads'

In [5]:
# run output_folder() function 
output_folder()

'/Users/yi-chunwang/Downloads'

## RenameTool for English Manuals

In [6]:
os.chdir(download_PATH) # change the current directory to Downloads folder
downloads = os.listdir()

for download in downloads:
    if download[0].isdigit() and download.endswith('.zip'):
        zipfile = os.path.abspath(download) # get the absolute path of the zip file
        #print(zipfile)

        with ZipFile(zipfile) as zipObj:
            list_of_file_names = zipObj.namelist()
            #print(list_of_file_names)
            for fileName in list_of_file_names:
                if fileName.endswith('.pdf'):
                    zipObj.extract(fileName, save_PATH)
                    # print(list_of_file_names[-1]) # 34495-ALT-801-106_Patient_Acquisition_Manual-pdf/out/ALT-801-106_Patient_Acquisition_Manual-en.pdf
                    # print(list_of_file_names)
                    target_name = os.path.join(save_PATH,list_of_file_names[-1])
                    #print(target_name)

                    reader = PdfReader(target_name) # read the pdf
                    meta = reader.metadata # read the meta data
                    file_name = meta.title # get the proper file name from the title in meta data
                    page = reader.pages[0] # get the fron page
                    front_page_text = page.extract_text() # get the front page text and store as a variable
                    reference_start = front_page_text.find('MRA') # find the MRA reference number
                    MRAref = front_page_text[reference_start:reference_start+7] # store the MRA reference number as a variable
                    new_name =  file_name +' - '+ MRAref +'.pdf' # compose new file name
                    os.rename(target_name, os.path.join(save_PATH, new_name)) # rename the file with the new file name

                    # remove the temporary directory
                    if download_PATH == save_PATH:
                        shutil.rmtree(list_of_file_names[0])
                    else:
                        continue

## RenameTool for Translated Manuals

In [26]:
os.chdir(download_PATH) # change the current directory to Downloads folder
downloads = os.listdir()

# build a language code and full language name mapping
lang_dict={
'-af': ' (Afrikaans)'
, '-ar': ' (Albanian)'
, '-be': ' (Baelarusian)'
, '-bg': ' (Bugarian)'
, '-bs': ' (Bosnian)'
, '-ca': ' (Catalan)'
, '-cs': ' (Czech)'
, '-da': ' (Danish)'
, '-de': ' (German)'
, '-el': ' (Greek)'
, '-en': ' (English)'
, '-eo': ' (Esperanto)'
, '-es': ' (Spanish)'
, '-et': ' (Estonian)'
, '-eu': ' (Basque)'
, '-fa': ' (Persian)'
, '-fi': ' (Finnish)'
, '-fr': ' (French)'
, '-he': ' (Hebrew)'
, '-hi': ' (Hindi)'
, '-hr': ' (Croatian)'
, '-hu': ' (Hangarain)'
, '-is': ' (Icelandic)'
, '-it': ' (Italian)'
, '-ja': ' (Japanese)'
, '-ji': ' (Yiddish)'
, '-ko': ' (Korean)'
, '-lt': ' (Lithuanian)'
, '-lv': ' (Latvian)'
, '-mg': ' (Malagasy)'
, '-mk': ' (Macedonian)'
, '-nb': ' (Norwegian Bokmål)'
, '-nl': ' (Dutch)'
, '-nn': ' (Norwegian Nynorsk)'
, '-no': ' (Norwegian)'
, '-pl': ' (Polish)'
, '-pt': ' (Portuguese)'
, '-ro': ' (Romanian)'
, '-ru': ' (Russian)'
, '-sk': ' (Slovak)'
, '-sl': ' (Slovenian)'
, '-sq': ' (Albanian)'
, '-sr': ' (Serbian)'
, '-sv': ' (Swedish)'
, '-th': ' (Thai)'
, '-tr': ' (Turkish)'
, '-uk': ' (Ukranian)'
, '-vi': ' (Vietnamese)'
, '-za': ' (Zhuang)'
, '-zh': ' (Chinese)'
, '-zh-chs': ' (Chinese-simplified)'
, '-zh-cht': ' (Chinese-traditional)'
}


for download in downloads:
    if download[0].isdigit() and download.endswith('.zip'):
        zipfile = os.path.abspath(download) # get the absolute path of the zip file
        with ZipFile(zipfile) as zipObj:
          list_of_file_names = zipObj.namelist() # get a list of file names of entire folder tree (from top zip folder to inner most files)
          for fileName in list_of_file_names:
                if fileName.endswith('.pdf'):
                  zipObj.extract(fileName,  save_PATH)
                  target_name =  os.path.join(save_PATH, list_of_file_names[-1])
                  # print(target_name)
                  index_start = list_of_file_names[-1].find('out/') # get the starting index of the last item in the list
                  file_name_strip = list_of_file_names[-1][index_start+4:].replace('_', ' ').replace('.pdf', '') # extract out the file name itseld and replace underscore with space
          
        # Replace language code with full language 
        for key in lang_dict.keys():
          file_name_strip = file_name_strip.replace(key, lang_dict[key]) # replace the language code with full labguage name
          
        # Add vendor name into file name
        split_name = file_name_strip.split('for')
        reader = PdfReader(target_name)
        title = reader.metadata.title 
        if title.find('Siemens') != -1:
          insert_vendor = split_name[0]+'for Siemens'+ split_name[1]
        elif title.find('GE') != -1:
          insert_vendor = split_name[0]+'for GE'+ split_name[1]
        else:
          insert_vendor = split_name[0]+'for Philips'+ split_name[1]
        #print(insert_vendor)

        # Add MRA ref
        page = reader.pages[0] # get the front page
        front_page_text = page.extract_text() # get the front page text and store as a variable
        reference_start = front_page_text.find('MRA') # find the MRA reference number
        MRAref = front_page_text[reference_start:reference_start+7] # store the MRA reference number as a variable
        new_name =  insert_vendor +' - '+ MRAref +'.pdf' # compose new file name
        os.rename(target_name, os.path.join(save_PATH, new_name)) # rename the file with the new file name

        # remove the temporary directory
        if download_PATH == save_PATH:
          shutil.rmtree(list_of_file_names[0])
        else:
          continue

## Rename Tool for non-Manual English Documents 

In [33]:
os.chdir(download_PATH) # change the current directory to Downloads folder
downloads = os.listdir()

for download in downloads:
    if download[0].isdigit() and download.endswith('.zip'):
        zipfile = os.path.abspath(download) # get the absolute path of the zip file

        with ZipFile(zipfile) as zipObj:
            list_of_file_names = zipObj.namelist()
            for fileName in list_of_file_names:
                if fileName.endswith('.pdf'):
                    zipObj.extract(fileName, save_PATH)
                    target_name = os.path.join(save_PATH,list_of_file_names[-1])
                    reader = PdfReader(target_name) # read the pdf
                    meta = reader.metadata # read the meta data
                    file_name_strip = meta.title.replace('_', ' ') # get the proper file name from the title in meta data and replace the underscore with space

                    # Add vendor name into file name
                    try: 
                        split_name = file_name_strip.split('for') # see if the title has "for scanners"
                        reader = PdfReader(target_name)
                        title = reader.metadata.title 
                        if title.find('Siemens') != -1:
                          insert_vendor = split_name[0]+'for Siemens'+ split_name[1]
                        elif title.find('GE') != -1:
                          insert_vendor = split_name[0]+'for GE'+ split_name[1]
                        else:
                          insert_vendor = split_name[0]+'for Philips'+ split_name[1]
                    except:
                        insert_vendor = file_name_strip

                    # Add MRA reference number
                    page = reader.pages[0] # get the front page
                    front_page_text = page.extract_text() # get the front page text and store as a variable
                    reference_start = front_page_text.find('MRA') # find the MRA reference number
                    MRAref = front_page_text[reference_start:reference_start+7] # store the MRA reference number as a variable
                    new_name =  insert_vendor +' - '+ MRAref +'.pdf' # compose new file name
                    os.rename(target_name, os.path.join(save_PATH, new_name)) # rename the file with the new file name

                    # remove the temporary directory
                    if download_PATH == save_PATH:
                        shutil.rmtree(list_of_file_names[0])
                    else:
                        continue

## Open further LegaleSign and MRA register browers 

In [28]:
# open LegaleSign 
webbrowser.open('https://app1.legalesign.com/perspectum-diagnostics3/pdf/upload/')

True

In [29]:
# open MRA reguster
webbrowser.open('https://perspectumdiagnostics.sharepoint.com/Lists/MRA%20Register/AllItems.aspx')


True

## PlayGround

In [None]:
# No need tp unpack because the ZipFile function had supported the unzip process, and it also provides us with the downstream file name
 
shutil.unpack_archive(zipfile, '/Users/yi-chunwang/Downloads/', 'zip') #use function to unzip file
os.chdir()
shutil.unpack_archive(zipfile, '/Users/yi-chunwang/Downloads/', 'zip') #use function to unzip file
os.mv
print('unzip done')

In [None]:

# there is no need to walk now because the target_name had been find out by the list of filename from the ZipFile function

for root, dirs, files in os.walk('/Users/yi-chunwang/Downloads/out/34495-ALT-801-106_Patient_Acquisition_Manual-pdf', topdown=False):
    for name in files:
        if name.endswith('.pdf'):
            target_name = os.path.join(root, name)
