In [None]:
# default_exp backup

# backup
 > API details
 

In [None]:
#! python3
# backupToZip.py
# Copies an entire folder and its contents into
# a zip file whose filename increments.
import os
import sys
import datetime
import time
import inspect
import warnings
import hashlib
import zlib
import zipfile
import pickle
import shutil
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path, PurePath
from collections import OrderedDict
__version__ = "0.1.1"

In [None]:
# Force warnings.warn() to omit the source code line in the message
#formatwarning_orig = warnings.formatwarning
#warnings.formatwarning = lambda message, category, filename, lineno, line=None: \
#    formatwarning_orig(message, category, filename, lineno, line='')

def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
    return ' %s:%s: %s:%s' % (filename, lineno, category.__name__, message)
warnings.formatwarning = warning_on_one_line

In [None]:
verbosity = 0
mlist = list(filter(lambda x: inspect.ismodule(x[1]), locals().items()))
if 'verbosity' in locals().keys() and verbosity > 0:
  print(mlist)
vi = sys.version_info
print("version {0}.{1}.{2} of Python".format(vi.major, vi.minor, vi.micro))
for name, mod in mlist:
    mname = name
    if name.startswith("__"):
        continue
    if hasattr(mod, "__version__"):
        mname = name
        if hasattr(mod, "__path__"):
            mname = os.path.split(mod.__path__[0])[1]
        print("version {1} of {0} as {2} ".format(mname, name, mod.__version__))
    elif hasattr(mod, "__file__") and "site-packages" in mod.__file__:
        print("No __version__ for {0} as {1}".format(mname, name))
print(datetime.datetime.now())
del mod
del name

In [None]:
def whoami():
    return sys._getframe(1).f_code.co_name
  
def sha_256(fpath, size=4096):
    m = hashlib.sha256()
    with open(fpath, mode='rb') as fp:
        for chunk in iter(lambda: fp.read(size), b''):
            m.update(chunk)
    return m.hexdigest()

In [None]:
def create_new_zip(infilepath, zipfilepath, 
                   compression=zipfile.ZIP_DEFLATED,
                   compresslevel=zlib.Z_DEFAULT_COMPRESSION,
                   verbosity=0):
  import zipfile
  if verbosity > 1:
    print("creating zipfile {0} from {1} <{2}>".format(infilepath, zipfilepath,
                                                      datetime.datetime.now()))
  zf = zipfile.ZipFile(zipfilepath, mode='w', compression=compression,
                      compresslevel=compresslevel)
  try:
    if verbosity > 1:
      print("adding {0}".format(infilepath))
    zf.write(infilepath)
  finally:
    if verbosity > 1:
      print('Done, closing <{0}>'.format(datetime.datetime.now()))
    zf.close()

In [None]:
def path2string(fpath, sep="_", verbosity=0):
    pathstring = ""
    pathleft = fpath
    while True:
        pathleft, tail = os.path.split(pathleft)
        if len(tail) == 0:
            break
        pathstring = tail + sep + pathstring
    if verbosity > 0:
        print("pathstring= {0}".format(pathstring)) 
    return pathstring
  
  
def check_outdir(outdir, create=True, verbosity=0):
    if os.path.isdir(outdir):
      return outdir
    
    warnings.warn("{0} not a dir".format(outdir))      
    if not create:
      return None
    
    if verbosity > 0:
      print("trying to create {0}".format(outdir))
    os.makedirs(outdir)
    if not os.path.isdir(outdir):
        raise RuntimeError("Cannot make dir= '{0}'".format(outdir)) 
    return outdir
    
def make_metafilepath(outdir, basename="generic",
                     sep = "_", ext="",
                     verbosity=0):
    # Figure out the filename this code should used based on 
    # what files already exist.  
    while True:
        nowstr = datetime.datetime.now().strftime(format="%Y-%m-%d__%H_%M_%S") 
        outfilename = basename + sep + nowstr + ext
        if not os.path.exists(outfilename):
            break
        number = number + 1
    if verbosity > 0:
        print("Creating '{0}'".format(outfilename))

    outfilepath = os.path.join(outdir, outfilename)    
    return outfilepath
  
def make_tempfilepath(folder, base, sep="_", ext="", verbosity=0):
    number = 1
    while True:
        nowstr = datetime.datetime.now().strftime(format="%Y-%m-%d__%H_%M_%S")  
        filename = base + sep +  nowstr + ext
        filepath = os.path.join(folder, filename)
        if not os.path.exists(filepath):
            break
        number = number + 1 
    return filepath
  

In [None]:
def import_backup_metafile(folder, filename, verbosity=0):
  filepath = os.path.join(folder, filename)
  if not os.path.isfile(filepath):
    raise ValueError("Cannot find file {0} in folder {1}".format(filename, folder))
  data = []
  with open(filepath, "rb") as fp:
      while True:
        try:
          x = pickle.load(fp)      
          data.append(x)
        except EOFError as error:
          # this is expected
          break
        except Exception as e:
          (extype, exval, tb) = sys.exc_info()
          print("extype= {0}, exval= {1}\n {2}".format(extype, exval, tb))
          raise(Exception(e))
  return data

In [None]:
def check_folder_filename(folder, filename):
  filepath = os.path.join(folder, filename)
  if not os.path.isfile(filepath):
    raise ValueError("Cannot find file {0} in folder {1}".format(filename, folder))
  meta = import_backup_metafile(folder=folder, filename=filename)
  if len(meta) == 0:
    warnings.warn("Empty metafile {0} in {1}".format(filename, folder))
    return False  
  return True


def get_meta(folder, filename):
  if not check_folder_filename(folder, filename):
    return False
  
  meta = import_backup_metafile(folder=folder, filename=filename)
  if len(meta) == 0:
    warnings.warn("Empty metafile {0} in {1}".format(filename, folder))
    return None 
  
  if not meta[0]['rec_type'] == "meta_info":
    msg = "file= {0}, folder= {1}\n first elem is not meta {2}".format(filename, folder, meta[0])
    warnings.warn(msg)
    return None
  return meta

def get_meta_fields(folder, filename):
  if not check_folder_filename(folder, filename):
    return False
  
  meta = get_meta(folder, filename)
  if not meta:
    return None
  
  res = {"meta_info": list(meta[0].keys())}
  if len(meta) > 1:
    res["file_info"] = list(meta[1].keys())
  return res
  
  
def get_meta_info(folder, filename, meta_fields=None, 
                    file_info_fields=None, verbosity=0):
  if not check_folder_filename(folder, filename):
    return False
  
  meta = get_meta(folder, filename)
  if not meta:
    return None
  res = ""
  act_fields = get_meta_fields(folder, filename)
  fields = []
  if meta_fields:
    for f in meta_fields:
      if f in act_fields['meta_info']:
        fields.append(f)
      else:
        warnings.warn(" requested meta_field {0} not in meta_fields".format(f))
  else:
    fields = act_fields['meta_info']      

  msglst = ["{0}: {1}".format(f, meta[0][f]) for f in fields]
  res += ", ".join(msglst)
  res += "\n"
  
  nfiles = sum([int(e['rec_type']=='file_info') for e in meta])
  res += "{0} files".format(nfiles)
  res += "\n"
  
  fields = []
  if file_info_fields:
    for f in file_info_fields:
      if f in act_fields['file_info']:
        fields.append(f)
      else:
        warnings.warn(" requested file_info_field {0} not in file_info_fields".format(f))
  else:
    fields = act_fields['file_info'] 
    
  for i, elem in enumerate(meta[1:]):
    msglst = ["[{0}]: {1}: {2}".format(i, f, elem[f]) for f in fields]
    res += ", ".join(msglst)
    res += "\n"
  return res

In [None]:
def check_make_path(thepath, verbosity=0):
    if os.path.isdir(thepath):
      return thepath
    
    warnings.warn("{0} not a dir".format(thepath))      
        
    if verbosity > 0:
      print("trying to create {0}".format(thepath))
      
    os.makedirs(thepath)
    if not os.path.isdir(thepath):
        raise RuntimeError("Cannot make dir= '{0}'".format(thepath)) 
        
    return thepath

In [None]:
check_make_path("C:\\backup\\foo")

In [None]:
def backup(origpath, destpath, 
            include_exts=None,
            exclude_exts=None,
            comp_thresh = 0.9,
            compression=zipfile.ZIP_DEFLATED,
            compresslevel=zlib.Z_DEFAULT_COMPRESSION,       
                testing=False,
                verbosity=0):
    # Backup the entire contents of "folder" into a zip file.
    if verbosity > 0:
        print("function: {0}".format(whoami()))
        print("origpath= {0} destpath= {1}".format(origpath, destpath))
        print("testing: {0}".format(testing))
        
    pp_origpath = PurePath(origpath)
    if not pp_origpath.is_absolute():
      warnings.warn("origpath must be absolute, {0}".format(origpath))
      
    pp_destpath = PurePath(destpath)
    if not pp_destpath.is_absolute():
      warnings.warn("destpath must be absolute, {0}".format(destpath)) 
      
    check_make_path(destpath, verbosity=verbosity)
    
    dest_drive = pp_destpath.drive
    dest_folder = os.sep.join(pp_destpath.parts[1:])  

    orig_drive = pp_origpath.drive
    orig_folder = os.sep.join(pp_origpath.parts[1:])
    
    for xname in ('include_exts', 'exclude_exts'):
        x = locals()[xname]
        if isinstance(x, str):
            x = [x]
        if isinstance(x, list):
            if len(x) == 0:
                x = None
        elif x is not None:
            raise ValueError("{0} should be None or string or list of strings")
        if verbosity > 1:
            print("{0}: {1}".format(xname, x))
        locals()[xname] = x
      
    if True:
      metafilepath = make_metafilepath(outdir=destpath,
                                     basename="backup_meta",
                                     ext=".pickle",
                                     verbosity=verbosity)
      metafilename = os.path.split(metafilepath)[1]
      ddict = OrderedDict()
      ddict['rec_type'] = "meta_info"
      ddict['comp_thresh'] = comp_thresh
      ddict['compression'] = compression
      ddict['compresslevel'] = compresslevel
      ddict['backup_version'] = __version__
      ddict['python_version'] = str(sys.version_info)
      ddict['zlib_version'] = zlib.__version__
      ddict["now"] = datetime.datetime.now()
      
      with open(metafilepath, mode='wb') as meta_fp:
        pickle.dump(ddict, meta_fp)      

    # Walk the entire folder tree and compress the files in each folder.

    for dirpath, dirnames, filenames in os.walk(origpath, topdown=True):
        pp_dirpath = PurePath(dirpath)
        dirdrive = pp_dirpath.drive
        dirfolder = os.sep.join(pp_dirpath.parts[1:])
         
        this_outpath = os.path.join(destpath, dirfolder)
        
        if verbosity > 0:
            print("Adding files from '{0}' to '{1}'".format(dirpath, this_outpath))        
        for filename in filenames:
            if filename == metafilename:
              continue
            base, ext = os.path.splitext(filename)
            if include_exts is not None:
                if ext not in  include_exts:
                    if verbosity > 1:
                        print("  Skipping {0}, {1} not in include_exts".format(filename))
                    continue
            if exclude_exts is not None:
                if ext in exlude_exts:
                    if verbosity > 1:
                        print("  Skipping {0}, {1}  in include_exts".format(filename))
                    continue
            if filename.endswith('.pickle'):
                continue # don't backup the backup pickle files
                
            origfilepath = os.path.join(dirpath, filename)
            if testing and (verbosity > 0):
                print("  adding {0}".format(filename))
            else:
              try:
                ddict = OrderedDict()
                zipfilepath = make_tempfilepath(destpath, base="temp", ext=".zip", 
                                                verbosity=verbosity)
                
                create_new_zip(origfilepath, zipfilepath)
                 
                zfile = zipfile.ZipFile(zipfilepath, mode='r')  
                for zm in zfile.infolist():
                  if verbosity > 2:
                    print(zm)
                  pass
                zfile.close()

                orig_size = os.path.getsize(origfilepath)
                comp_size = os.path.getsize(zipfilepath)
                ddict['rec_type'] = "file_info"
                ddict['filename'] = filename   
                ddict['folder'] = dirpath               
                ddict['filepath'] = origfilepath
                ddict['orig_size'] = orig_size
                ddict['comp_size'] = comp_size   
                ddict['zipname'] = zm.filename               
                ddict['sha256'] = sha_256(origfilepath, size=4096)
                dt_fmt = '%Y-%m-%dT%H:%M:%S'
                ddict['ctime'] = datetime.datetime.fromtimestamp(os.path.getctime(origfilepath)).strftime(dt_fmt)
                ddict['mtime'] = datetime.datetime.fromtimestamp(os.path.getmtime(origfilepath)).strftime(dt_fmt) 
                comp_ratio = np.nan
                if orig_size == 0:
                  warnings.warn("{0} in {1} size is {2}".format(filename, origpath, orig_size))

                else:
                  comp_ratio = float(comp_size)/orig_size 
                ddict['comp_ratio'] =  comp_ratio
                
                if ddict['comp_ratio'] > comp_thresh:
                  ddict['compressed'] = False
                  infilepath = origfilepath
                else:
                  infilepath = zipfilepath
                  ddict['compressed'] = True                  

                if verbosity > 0:
                    print("filename: {0}, filepath: {1}".format(filename, origfilepath))
                    print("osize= {0}, csize= {1}".format(orig_size, comp_size))
                    print("compressed= {0}".format(ddict['compressed']))  
                    print("sha_256= {0}".format(ddict['sha256']))
                # write metadata
                with open(metafilepath, mode='ab') as meta_fp:                
                  pickle.dump(ddict, meta_fp)
                
                # write the file
                if not testing:
                  outfilepath = os.path.join(this_outpath, ddict['sha256'])
                  check_make_path(outfilepath, verbosity=verbosity)
                  shutil.copy(infilepath, outfilepath)    
                
                  # remove the temp zipfile
                if os.path.isfile(zipfilepath):
                  os.remove(zipfilepath)
                else:
                  warnings.warn("can't find zipfile {0}".format(zipfilepath))
              except Exception as e:
                (extype, exval, tb) = sys.exc_info()
                print("extype= {0}, exval= {1}\n {2}".format(extype, exval, tb))
                raise(Exception(e))

    if True:
        if verbosity > 0:
            print("Done")
        meta_fp.close()

In [None]:

folder = os.path.join("Users", os.getenv("USERNAME"), "Documents", "GitHub", "covid-model") 
print("folder= {0}".format(folder))
pp = PurePath(folder)
os.sep.join(pp.parts[1:])
pp.is_absolute()

In [None]:
if True:
    folder = os.path.join("C:\\", "Users", os.getenv("USERNAME"), "Documents", "GitHub", "covid-model") 
    print("folder= {0}".format(folder))
    N = 10
    print("files: {0}".format(os.listdir(folder)[:N]))
    backup(origpath=folder, destpath="C:\\backup\\", testing=False, verbosity=1)

In [None]:
os.path.join("C:", "\\foo", "\\bar")

In [None]:
def recover(folder, meta_filename, filelist, 
            outdir, create_outdir=False,
            chunk_size = 10**6,
            overwrite=False, testing=True, verbosity=0):
  """
  """
  if not os.path.isdir(folder):
    warnings.warn("{0} is not a folder".format(folder))
    return None
  
  meta = get_meta(folder, meta_filename)
  
  if not meta:
    return None  
  if len(meta) ==1:
    warnings.warn("No file_info records")
    return None
  res = check_outdir(outdir, create=create_outdir, verbosity=0)  
  
  filemap = {}
  for i, e in enumerate(meta[1:]):
    filemap[e['filename']] = i+1 
  for filename in filelist:
    if filename in filemap.keys():
      ei = filemap[filename]
      msg = "Found {0} as entry {1}".format(filename, ei)
      print(msg)
      file_info = meta[ei]
      print(file_info)
      if file_info['compressed']:  
        outfilepath = make_tempfilepath(outdir, base="temp", ext=".zip", 
                                                verbosity=verbosity)
      else:
        outfilepath = os.path.join(outdir, file_info['filename'])
      print("outfilepath= {0}".format(outfilepath))
      outfilepath = os.path.abspath(outfilepath) # make sure folder is absolute 
      print("outfilepath= {0}".format(outfilepath))      
      infilename = file_info['sha256']
      infilepath = os.path.join(folder, infilename)
      if not os.path.isfile(infilepath):
        warnings.warn("Cannot fine backup file {0} in {1}".format(infilename, folder))
        continue
      try:
        if verbosity > 0:
          print("copying {0} to {1}".format(infilepath, outfilepath))
        shutil.copy(infilepath, outfilepath)         
      except Exception as e:
        (extype, exval, tb) = sys.exc_info()
        warnings.warn("extype= {0}, exval= {1}\n {2}".format(extype, exval, tb))  

        
      if file_info['compressed']:
        zipfilepath = outfilepath
        outfilepath = os.path.join(outdir, file_info['filename'])
        print("outfilepath {0}".format(outfilepath))
        if verbosity > 0:
          print("Unzipping {0} to {1}".format(zipfilepath, outfilepath))  
          
        zfile = zipfile.ZipFile(zipfilepath, mode='r')  
        for zm in zfile.infolist():
          print(zm)
        try:
          zipname = file_info['zipname']
          print("zipname= {0}  outfilepath= {1}".format(zipname, outfilepath))
          zfile.extract(member=zipname, 
                  path=outfilepath, pwd=None)
        except Exception as e:
          (extype, exval, tb) = sys.exc_info()
          warnings.warn("extype= {0}, exval= {1}\n {2}".format(extype, exval, tb))           
          raise Exception(e)
        zfile.close() 
        os.remove(zipfilepath)
      
      #with open(infilepath, mode='rb') as ifp:
      #  with open(outfilepath, mode="wb") as ofp:
      #    while True:
      #      ifp.read()
    else:
      msg = "No entry for {0}".format(filename)
      warnings.warn(msg)
    return None

In [None]:
meta_folder = "backup"
files = os.listdir(meta_folder)
meta_files = [f for f in files if f.endswith("pickle")]
meta_files

In [None]:
meta_folder = "backup"
files = os.listdir(meta_folder)
meta_files = [f for f in files if f.endswith("pickle")]
meta_filename = meta_files[0]
print(meta_filename)
#print_meta_info(folder=meta_folder, filename=meta_filename, fields=[''])
meta_fields = get_meta_fields(meta_folder, meta_filename)
meta_fields
res = get_meta_info(folder=meta_folder, filename=meta_filename, meta_fields=None, 
              file_info_fields=['filename', 'zipname', 'orig_size'])
print(res)

In [None]:
recover(folder=meta_folder, meta_filename=meta_filename,
        filelist=['mail-loop.PNG'], 
            outdir='recovered', create_outdir=True,
            overwrite=True, testing=False, verbosity=1)


In [None]:
meta_filename = "backup_meta_1.pickle"
meta_folder = "backup"
meta = import_backup_metafile(folder=meta_folder, filename=meta_filename)
meta[3]