# Auto_Backup

## Imports

In [1]:
import argparse
import gzip
import os
import shutil
import sys
import threading

In [2]:
def parse_input():
    """ Input Parsing for Command Line Interface """
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--target', nargs=1, required=True,
                        help='Target Backup folder')
    parser.add_argument('-s', '--source', nargs='+', required=True,
                        help='Source Files to be added')
    parser.add_argument('-c', '--compress', nargs=1, type=int,
                        help='Gzip threshold in bytes, Default 1024KB', default=[1024000])
    # Default Threshold is 1024KB
    

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()

    return parser.parse_args()


In [3]:
def size_if_newer(source, target):
    """ If newer it returns size, otherwise it returns False """

    src_stat = os.stat(source)
    try:
        target_ts = os.stat(target).st_mtime
    except FileNotFoundError:
        try:
            target_ts = os.stat(target + '.gz').st_mtime
        except FileNotFoundError:
            target_ts = 0

    # The time difference of one second
    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

In [4]:
def threaded_sync_file(source, target, compress):
    """ Threading for Synchronized Files """
    size = size_if_newer(source, target)

    if size:
        thread = threading.Thread(target=transfer_file,
                                  args=(source, target, size > compress))
        thread.start()
        return thread

In [5]:
def sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
        transfer_file(source, target, size > compress)

In [6]:
def transfer_file(source, target, compress):
    """ Either copy or compress and copies the file """

    try:
        if compress:
            # Writing in Binary mode
            with gzip.open(target + '.gz', 'wb') as target_fid:
                with open(source, 'rb') as source_fid:
                    target_fid.writelines(source_fid)
            print('Compress {}'.format(source))
        else:
            shutil.copy2(source, target)
            print('Copy {}'.format(source))
    except FileNotFoundError:
        os.makedirs(os.path.dirname(target))
        transfer_file(source, target, compress)

In [7]:
def sync_root(root, arg):
    target = arg.target[0]
    compress = arg.compress[0]
    threads = []

    for path, _, files in os.walk(root):
        for source in files:
            source = path + '/' + source
            threads.append(threaded_sync_file(source,
                                              target + source, compress))
    #            sync_file(source, target + source, compress)
    for thread in threads:
        thread.join()

In [8]:
if __name__ == '__main__':
    arg = parse_input()
    print('------------------------- Start copy -------------------------')
    print('______________________________________________________________')
    for root in arg.source:
        sync_root(root, arg)
    print('______________________________________________________________')
    print('------------------------- Done Done! -------------------------')

usage: ipykernel_launcher.py [-h] -t TARGET -s SOURCE [SOURCE ...]
                             [-c COMPRESS]
ipykernel_launcher.py: error: the following arguments are required: -t/--target, -s/--source


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


### Important

Kindly Ignore the above errors, the traceback is cause am not sure how to run a Jupyter Notebook Instance from CLI.
For Reference I have attached a the output from a native python terminal.

In [9]:
!python Auto_backup.py -t ./MIREX_Backup -s ./MIREX_Dataset -c 100000

------------------------- Start copy -------------------------
______________________________________________________________
Copy ./MIREX_Dataset/categories.txt
Copy ./MIREX_Dataset/split-by-categories-lyrics.bat
Copy ./MIREX_Dataset/split-by-categories-midi.bat
Copy ./MIREX_Dataset/clusters.txt
Copy ./MIREX_Dataset\Audio/Place  file in folder.bat
Compress ./MIREX_Dataset/dataset info.csv
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/743.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/742.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Boisterous/001.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/741.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/750.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/744.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Boisterous/008.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/748.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Boisterous/003.mp3
Compress ./MIREX_Dataset\Audio\Cluster 1\Agressive/757.mp3
Compress ./MIR

Exception in thread Thread-117:
Traceback (most recent call last):
  File "Auto_backup.py", line 71, in transfer_file
    with gzip.open(target + '.gz', 'wb') as target_fid:
  File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\gzip.py", line 58, in open
    binary_file = GzipFile(filename, gz_mode, compresslevel)
  File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\gzip.py", line 173, in __init__
    fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
FileNotFoundError: [Errno 2] No such file or directory: './MIREX_Backup./MIREX_Dataset\\Audio\\Cluster 2\\Confident/032.mp3.gz'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File

Compress ./MIREX_Dataset\Audio\Cluster 3\Brooding/434.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/476.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Brooding/422.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/446.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/455.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/451.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/477.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Brooding/441.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/449.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/453.mp3
Compress ./MIREX_Dataset\Audio\Cluster 4\Poignant/488.mp3
Compress ./MIREX_Dataset\Audio\Cluster 4\Bittersweet/377.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/472.mp3
Compress ./MIREX_Dataset\Audio\Cluster 3\Literate/462.mp3
Compress ./MIREX_Dataset\Audio\Cluster 4\Bittersweet/375.mp3
Compress ./MIREX_Dataset\Audio\Cluster 4\Bittersweet/373.mp3
Compress ./MIREX_Dataset\Audio\Cluster 4\Bittersweet/398.mp3
Co

Copy ./MIREX_Dataset\Lyrics/240.txt
Copy ./MIREX_Dataset\Lyrics/239.txt
Copy ./MIREX_Dataset\Lyrics/241.txt
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/314.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Amiable-good natured/180.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/312.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/317.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Amiable-good natured/175.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/310.mp3
Compress ./MIREX_Dataset\Audio\Cluster 7\Witty/701.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/307.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/329.mp3
Copy ./MIREX_Dataset\Lyrics/244.txt
Compress ./MIREX_Dataset\Audio\Cluster 8\Amiable-good natured/190.mp3
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/321.mp3
Copy ./MIREX_Dataset\Lyrics/242.txt
Copy ./MIREX_Dataset\Lyrics/245.txt
Copy ./MIREX_Dataset\Lyrics/243.txt
Compress ./MIREX_Dataset\Audio\Cluster 8\Sweet/325.mp3
Copy ./MIREX_Dataset\Lyrics/246.txt
Copy ./

### Size Comparision

Before Compressing

In [10]:
total_size = 0
start_path = './MIREX_Dataset'  # To get size of current directory
for path, dirs, files in os.walk(start_path):
    for f in files:
        fp = os.path.join(path, f)
        total_size += os.path.getsize(fp)
print("Directory size: " + str(total_size))

Directory size: 334731974


After Compressing

In [None]:
total_size = 0
start_path = './MIREX_Backup'  # To get size of current directory
for path, dirs, files in os.walk(start_path):
    for f in files:
        fp = os.path.join(path, f)
        total_size += os.path.getsize(fp)
print("Directory size: " + str(total_size))

File Compression successfully achieved.

Directory size: 325372974


File Compression successfully achieved.