In [29]:
"""A script to auto-backup data.

Author: Chunliang Mu
"""

import os
import shutil
import filecmp
from datetime import datetime
import time
import gzip



def _get_bkp_filename(dst_path: str, compress: str = False) -> str:
    """Get backup file path from src file path.

    dst_path: str
        path to a file. Must not end with '/'. (Does not check that)
    
    """
    dst_mtime = os.path.getmtime(dst_path)
    dst_mtimestamp = datetime.utcfromtimestamp(min(dst_mtime, time.time())).strftime("%Y%m%d%H%M%S")
    dst_path_new = f'{dst_path}.bkp{dst_mtimestamp}._backup_'
    if compress == 'gzip':
        dst_path_new = f'{dst_path_new}.gz'
    return dst_path_new



def _save_bkp_file(
    src_path: str, dst_path: str,
    action: str = 'copy',
    dry_run: bool = False,
    compress: str = False,
    iverbose: int = 4,
):
    """Save source file to the destination file.    
    """
    if compress == 'gzip':
        if action in ['copy', 'Copy', 'cp', 'move', 'Move', 'mv']:
            if iverbose >= 3:
                print(f"*   Note:\tgzip-ing '{src_path}' to '{dst_path}'")
            if not dry_run:
                with open(src_path, 'rb') as src_file:
                    with gzip.open(dst_path, 'wb') as dst_file:
                        dst_file.writelines(src_file)
            #if action in ['move', 'Move', 'mv']:
            #    if iverbose >= 3:
            #        print(f"*   Note:\tRemoving '{src_path}'")
            #    if not dry_run:
            #        os.remove(src_path)
    else:
        if action in ['copy', 'Copy', 'cp']:
            if iverbose >= 3:
                print(f"*   Note:\tCopying '{src_path}' to '{dst_path}'")
            if not dry_run:
                shutil.copy2(src_path, dst_path, follow_symlinks=False)
        elif action in ['move', 'Move', 'mv']:
            if iverbose >= 3:
                print(f"*   Note:\tMoving '{src_path}' to '{dst_path}'")
            if not dry_run:
                shutil.copy2(src_path, dst_path)
        else:
            raise NotImplementedError
    return



def dir_backup(
    src_path: str,
    dst_path: str,
    filecmp_shallow: bool = True,
    bkp_old_dst_files: {bool, str} = 'gzip',
    bkp_old_dst_files_excl_list: list = ['.git'],
    ignore_list: list = ['__pycache__', '.ipynb_checkpoints'],
    dry_run  : bool = False,
    top_level: bool = True,
    iverbose :  int = 4,
):
    """Recursively backup data from src to dst.

    WARNING: SYMBOLIC LINKS WON'T BE FOLLOWED.
    
    
    Parameters
    ----------
    src_path: str
        Path to the source files. Could point to one file or one directory.

    dst_path: str
        Path to the backup destination where files will be stored. Could point to one file or one directory.

    filecmp_shallow: bool
        If True, will not compare src files and dst files (if exist) byte by byte;

    dry_run: bool
        Print what will be done (if iverbose >= 3) instead of actually doing.

    bkp_old_dst_files: bool
        Whether or not to backup existing destination files if it is older.
        If == 'gzip', will compress the file while saving.

    bkp_old_dst_files_excl_list: list
        Do not keep older version backups for files/folders within this list.

    ignore_list: list
        Do not backup files/folders within this list at all.
        Only check this if src_path points to a folder.

    iverbose: int
        Wehther errors, warnings, notes, and debug info should be printed on screen. 

    Returns
    -------
    no_file_checked, no_file_changed
    no_src_peeked: int
        No of source files checked by this func
    no_src_backed: int
        No of source files backed up (i.e. copied) by this func
    """

    # init
    no_src_peeked = 0
    no_src_backed = 0
    # normalize path
    src_path = os.path.normpath(src_path)
    dst_path = os.path.normpath(dst_path)
    

    # safety check: if file exists
    #     lexist() because we want to backup symbolic links as well
    if not os.path.lexists(src_path):
        if iverbose:
            print(f"*** Error: dir_backup(...):\n" + \
                  f"\tFile '{src_path}' does not exist.")
        return no_src_peeked, no_src_backed
    
    no_src_peeked += 1
    if os.path.isfile(src_path) or os.path.islink(src_path):
        if os.path.islink(src_path):
            # warn
            if iverbose >= 2:
                print(f"**  Warning: dir_backup(...):\n" + \
                      f"\tWill not backup content in the folder pointed by symbolic link '{src_path}'.")
                
        try:
            with open(src_path, 'rb'):
                pass
        except PermissionError:
            if iverbose:
                print(f"*** Error: dir_backup(...):\n" + \
                      f"\tPermission Error on file '{dst_path}'.")
        else:
            # compare and decide if it's the same file
            do_copy = True
            if os.path.lexists(dst_path) and not os.path.isdir(dst_path):
                if filecmp.cmp(src_path, dst_path):
                    # same file content...
                    do_copy = False
                    if os.path.samefile(src_path, dst_path):
                        # and is the same exact file! (we don't want that since we want multiple physical copy)
                        #     check if backup file already existed
                        dst_path_new = _get_bkp_filename(dst_path, bkp_old_dst_files)
                        if os.path.lexists(dst_path_new) and filecmp.cmp(dst_path, dst_path_new):
                            pass
                        elif bkp_old_dst_files:
                            _save_bkp_file(dst_path, dst_path_new, 'copy', dry_run, bkp_old_dst_files, iverbose)
                elif bkp_old_dst_files:
                    dst_path_new = _get_bkp_filename(dst_path, bkp_old_dst_files)
                    _save_bkp_file(dst_path, dst_path_new, 'move', dry_run, bkp_old_dst_files, iverbose)

            # now copy
            if do_copy:
                try:
                    _save_bkp_file(src_path, dst_path, 'copy', dry_run, False, iverbose)
                    no_src_backed += 1
                except FileNotFoundError:
                    if iverbose:
                        print(f"*** Error: dir_backup(...):\n" + \
                      f"\tCannot copy to '{dst_path}'.")
                except shutil.SameFileError:
                    dst_path_new = _get_bkp_filename(src_path, bkp_old_dst_files)
                    #     check if backup file already existed
                    if os.path.lexists(dst_path_new) and filecmp.cmp(src_path, dst_path_new):
                        pass
                    else:
                        _save_bkp_file(src_path, dst_path_new, 'copy', dry_run, bkp_old_dst_files, iverbose)
                        no_src_backed += 1
                

    elif os.path.isdir(src_path):
        # create dst dir if non-existent
        if not os.path.exists(dst_path):
            if iverbose >= 3:
                print(f"*   Note:\tCreating Directory '{dst_path}'")
            if not dry_run:
                os.makedirs(dst_path)
        
        for filename in os.listdir(src_path):
            if filename not in ignore_list:
                src_path_new = f'{src_path}{os.path.sep}{filename}'
                dst_path_new = f'{dst_path}{os.path.sep}{filename}'
                if (iverbose >= 3 and top_level) or (iverbose >= 4 and os.path.isdir(src_path_new)):
                    print(f"\nWorking on sub-folder {src_path_new}...")
                    if top_level:
                        print(f"({no_src_peeked} files looked, {no_src_backed} files backed up so far.\n)")
                    else:
                        print()
                new_src_peeked, new_src_backed = dir_backup(
                    src_path_new,
                    dst_path_new,
                    filecmp_shallow   = filecmp_shallow,
                    bkp_old_dst_files = bkp_old_dst_files if filename not in bkp_old_dst_files_excl_list else False, 
                    bkp_old_dst_files_excl_list=bkp_old_dst_files_excl_list,
                    ignore_list=ignore_list,
                    top_level = False,
                    dry_run=dry_run, iverbose=iverbose,
                    )
                no_src_peeked += new_src_peeked
                no_src_backed += new_src_backed
    return no_src_peeked, no_src_backed



# Debug

In [30]:
# Debug

src_path = "/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs"
dst_path = "/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_"
dry_run=False
print(f"\n\n\n\tBeginning backup ({dry_run=}).\n\n\n")
dir_backup(src_path, dst_path, dry_run=dry_run)
print(f"\n\n\n\tDone.\n\n\n")




	Beginning backup (dry_run=False).




Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/.849C9593-D756-4E56-8D6E-42412F2A707B...
(1 files looked, 0 files backed up so far.
)
*** Error: dir_backup(...):
	Permission Error on file '/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_/.849C9593-D756-4E56-8D6E-42412F2A707B'.

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Administration.bkp20231023.rar...
(2 files looked, 0 files backed up so far.
)

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/PaperDataBackup...
(3 files looked, 0 files backed up so far.
)

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/PaperDataBackup/Bermudez-2023-1...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/PaperDataBackup/Bermudez-2023-1/clmu...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie 


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/b1...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/b8...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/bb...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/c6...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/c9...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/lib/clmuphantomlib/.git/objects/e0...


Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Proj

*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs/heads/master' to '/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_/Projects/20230201/src/tools/.git/refs/heads/master'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs/tags...

*   Note:	gzip-ing '/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_/Projects/20230201/src/tools/auto_backup.ipynb' to '/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_/Projects/20230201/src/tools/auto_backup.ipynb.bkp20231029142556._backup_.gz'
*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/auto_backup.ipynb' to '/mnt/d/Backup/Documents/Edu_6_PhD_SharePoint/_Backup_/Projects/20230201/src/tools/auto_backup.ipynb'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/star...


Wor

In [31]:
src_path = "/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools"
dst_path = "/mnt/d/da/temp/"
dry_run=True
print(f"\n\n\n\tBeginning backup ({dry_run=}).\n\n\n")
dir_backup(src_path, dst_path, dry_run=dry_run)




	Beginning backup (dry_run=True).




Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git...
(1 files looked, 0 files backed up so far.
)
*   Note:	Creating Directory '/mnt/d/da/temp/.git'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/branches...

*   Note:	Creating Directory '/mnt/d/da/temp/.git/branches'
*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/COMMIT_EDITMSG' to '/mnt/d/da/temp/.git/COMMIT_EDITMSG'
*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/config' to '/mnt/d/da/temp/.git/config'
*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/description' to '/mnt/d/da/temp/.git/description'
*   Note:	Copying '/mnt/d/Sync_OneDrive

*   Note:	Creating Directory '/mnt/d/da/temp/.git/objects/pack'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs...

*   Note:	Creating Directory '/mnt/d/da/temp/.git/refs'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs/heads...

*   Note:	Creating Directory '/mnt/d/da/temp/.git/refs/heads'
*   Note:	Copying '/mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs/heads/master' to '/mnt/d/da/temp/.git/refs/heads/master'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.git/refs/tags...

*   Note:	Creating Directory '/mnt/d/da/temp/.git/refs/tags'

Working on sub-folder /mnt/d/Sync_OneDrive/Macquarie University/Project RT in CEE - Docs/Projects/20230201/src/tools/.gitignore...
(50 files looked, 29 files backed up 

(53, 32)