##### Parameters
- workspaceName - The name of the workspace where the source warehouse exists
- warehouseName - The name of the warehouse that is to be backed up
- workspaceBackupName - The name of the workspace that the lakehouse for backups exists
- lakehouseBackupName - The name of the lakehouse used for back ups

In [None]:
workspaceName = 'WS_Demo_InternetSales'
warehouseName = 'WH_InternetSales'

workspaceBackupName = 'WS_Demo_InternetSales_Backup'
lakehouseBackupsName = 'LH_DW_Backups'

##### Copy the data from the Warehouse delta folders a separate Lakehouse

In [None]:
import datetime

# https://www.rakirahman.me/directory-recursion-synapse/

def deep_ls(path: str, max_depth=1):
    """
    List all files and folders in specified path and
    subfolders within maximum recursion depth.
    """

    # List all files in path
    li = mssparkutils.fs.ls(path)

    # Return all files
    for x in li:
        if x.size != 0:
            yield x

    # If the max_depth has not been reached, start
    # listing files and folders in subdirectories
    if max_depth > 1:
        for x in li:
            if x.size != 0:
                continue
            for y in deep_ls(x.path, max_depth - 1):
                yield y

    # If max_depth has been reached,
    # return the folders
    else:
        for x in li:
            if x.size == 0:
                yield x


fileList = deep_ls(f'abfss://{workspaceName}@onelake.dfs.fabric.microsoft.com/{warehouseName}.datawarehouse/Tables', max_depth=2)

nowDatetime = datetime.datetime.now(datetime.timezone.utc)
backupDatetime = nowDatetime.strftime("%Y%m%d_%H%M")
print(f'{workspaceName = }')
print(f'{backupDatetime = }')

for file in fileList:
    schema = file.path.split('/')[-2]
    table = file.name
    print(f'{schema = }, {table = }')
    df = spark.read.format('delta').load(file.path)
    df.write.mode('overwrite').format('delta').save(f'abfss://{workspaceBackupName}@onelake.dfs.fabric.microsoft.com/{lakehouseBackupsName}.Lakehouse/Files/{workspaceName}/{warehouseName}/{backupDatetime}/{schema}/{table}')


##### Drop old backups - default backupRetentionCnt is set to 2 meaning it will keep the last 2 backups.

In [None]:
backupList = mssparkutils.fs.ls(f'abfss://{workspaceBackupName}@onelake.dfs.fabric.microsoft.com/{lakehouseBackupsName}.Lakehouse/Files/{workspaceName}/{warehouseName}')
backupRetentionCnt = 2

for i, file in enumerate(backupList):
    if len(backupList) - i > backupRetentionCnt:
        print(f'Removing directory "{file.path}"')
        mssparkutils.fs.rm(file.path, recurse=True)
    else:
        break