### Using Azure Storage File Share client library

- Read file from file share
- Write file from Unity Catalog into file share
- List contents of file share 

##### Source
[Azure File Share Python SDK](https://learn.microsoft.com/en-us/python/api/overview/azure/storage-file-share-readme?view=azure-python)

![](connection-string.png)

In [0]:
%pip install azure-storage-file-share
%pip install aiohttp

In [0]:
import io
from azure.storage.fileshare import ShareFileClient
from azure.storage.fileshare.aio import ShareDirectoryClient

In [0]:
# azure storage account connection string - can copy from Azure portal directly
connection_string="DefaultEndpointsProtocol=https;AccountName=stadavidhurley;AccountKey=<accountkey>;EndpointSuffix=core.windows.net"

In [0]:
file_share_name = "myshare"
file_share_parent_dir = "mydir"
catalog = "users"
schema = "david_hurley"
volume = "azure_file_share_landing"

List contents of a fileshare directory asynchronously from Azure File Share

In [0]:
from azure.storage.fileshare.aio import ShareDirectoryClient

parent_dir = ShareDirectoryClient.from_connection_string(
  conn_str=connection_string, 
  share_name=file_share_name, 
  directory_path=file_share_parent_dir
)

my_files = []
async for item in parent_dir.list_directories_and_files():
    my_files.append(item)

file_names = [item['name'] for item in my_files]

print(file_names)


['cost_comparison.csv', 'from_buffer.csv', 'from_buffer234.csv', 'test.csv', 'test123.csv', 'test3.csv', 'test45.csv']


Download a file from Azure File Share and write to UC Volume

In [0]:
from azure.storage.fileshare import ShareFileClient
import io

for file_name in file_names:
    file_client = ShareFileClient.from_connection_string(conn_str=connection_string, share_name=file_share_name, file_path=f"{file_share_parent_dir}/{file_name}")

    file_bytes = io.BytesIO()
    file_bytes.write(file_client.download_file().readall())
    file_bytes.seek(0)

    # stream to unity catalog external volume
    with open(f"/Volumes/{catalog}/{schema}/{volume}/{file_name}", "wb") as f:
        f.write(file_bytes.read())

Read File From UC Volume and Upload to Azure File Share

In [0]:
csv_buffer = io.StringIO()

for file_name in file_names:
    df = spark.read.csv(f"/Volumes/{catalog}/{schema}/{volume}/{file_name}", header=True, inferSchema=True).toPandas()

    df.to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)

    file_client = ShareFileClient.from_connection_string(
        conn_str=connection_string,
        share_name=file_share_name, 
        file_path=f"{file_share_parent_dir}/{file_name}" 
    )

    file_client.upload_file(csv_buffer.getvalue())