# Move data appropriately from one Lakehouse to another to support deployment and testing

In [None]:
from notebookutils import mssparkutils
import pandas as pd
import datetime

source_workspace = 'LandRegistry'
target_workspace = 'LandRegistry_UAT'

source_lakehouse = 'PPDOneLake'
target_lakehouse = source_lakehouse

base_path = f'abfss://{source_workspace}@onelake.dfs.fabric.microsoft.com/{source_lakehouse}.Lakehouse'

# Set up functions

In [None]:
def get_file_table_list(base_path)->pd.DataFrame:

    '''
    Function to get a list of tables for a lakehouse
    adapted from https://fabric.guru/getting-a-list-of-folders-and-delta-tables-in-the-fabric-lakehouse
    This function will return a pandas dataframe containing names and abfss paths of each folder for Files and Tables
    '''
    data_types = ['Tables', 'Files'] #for if you want a list of files and tables
    #data_types = ['Tables'] #for if you want a list of tables

    df = pd.concat([
        pd.DataFrame({
            'name': [item.name for item in mssparkutils.fs.ls(f'{base_path}/{data_type}/')],
            'type': data_type[:-1].lower() , 
            'path': [item.path for item in mssparkutils.fs.ls(f'{base_path}/{data_type}/')],
        }) for data_type in data_types], ignore_index=True)

    return df

Copy the tables one by one. Use a naive replace to insert target locations.

For more fancyness like copying to a certain timestamp, <br>
review https://learn.microsoft.com/en-us/fabric/security/experience-specific-guidance?source=recommendations#approach-1-using-custom-script-to-copy-lakehouse-delta-tables-and-files <br>
and https://learn.microsoft.com/en-us/azure/synapse-analytics/spark/microsoft-spark-utilities?pivots=programming-language-python#delete-file-or-directory

In [None]:
def copy_tables(table_list):
    print (f'source, target, start time, end time, elapsed')
    for table in table_list.path:
        source = table
        destination = source.replace(f'abfss://{source_workspace}', f'abfss://{target_workspace}')
        destination = destination.replace(f'{source_lakehouse}.Lakehouse', f'{target_lakehouse}.Lakehouse')
        start_time =  datetime.datetime.now()
        mssparkutils.fs.cp(source, destination, True)
        end_time = datetime.datetime.now()
        print (f'{source}, {destination}, ',  start_time.strftime("%Y-%m-%d %H:%M:%S"),  end_time.strftime("%Y-%m-%d %H:%M:%S"), end_time - start_time)

# Get List of Lakehouse tables and files
You need to copy them one by one

In [None]:
table_list = get_file_table_list(base_path)
display(table_list)

### Copy Tables

In [None]:
copy_tables(table_list[table_list['type']=='table'])

### Copy Files

In [None]:
copy_tables(table_list[table_list['type']=='file'])

### Check copied data

In [None]:
updated_target_list = get_file_table_list(f'abfss://{target_workspace}@onelake.dfs.fabric.microsoft.com/{target_lakehouse}.Lakehouse')
display(updated_target_list)