# blobfuse mount init script
Inspired by https://github.com/Azure/azure-storage-fuse, this notebook creates an init script that mounts an Azure Blob Storage container with blobfuse on a cluster:

1. Import this notebook to your workspace.
2. Edit the variables above
3. Change the block of cell 8 using your own script
6. Run this notebook (clicking **Run All** above). It will generate a script called `blobfuse-mount.sh` in the location you provided.

To create a cluster with the mount:
1. Configure a cluster with the `blobfuse-mount.sh` cluster-scoped init script [using the UI](https://docs.azuredatabricks.net/user-guide/clusters/init-scripts.html#cluster-scoped-init-scripts), [Databricks CLI](https://docs.azuredatabricks.net/user-guide/dev-tools/databricks-cli.html#databricks-cli), or by [invoking the Clusters API](https://docs.azuredatabricks.net/api/latest/clusters.html#cluster-api). 
2. Start the cluster.

In [0]:
# most common
accountName = "<storage-account-name-here>"
containerName = "<container-name-here>"
mountLocation="<path/to/desired/mountpoint>"
scriptLocation="<path/to/desired/script/location>"
is_adls_gen2=True


# proxy (optional)
caCertFile='/foo/bar/cert.pem' # The absolute full name with path of the ca certificate for the proxy server. Example: /etc/ssl/certs/mitmproxy-ca-cert.pem
httpsProxy='https://proxy:port' # The proxy server address. Example: http://10.1.22.4:8080/". Environment variable can be created instead of this config as export https_proxy=http://10.1.22.4:8080/.
httpProxy='http://proxy:port' # The proxy server address when https is turned off forcing http. Example: http://10.1.22.4:8080/". Environment variable can be created instead of this config as export https_proxy=http://10.1.22.4:8080/.

In [0]:
# MUST HAVE when you are using SPN/MSI -- not a option, you must setup it on cluster ENV.
# TO DO using ${SPARKPASSWORD} spark.password {{secrets/testScope/testKey1}}

!set ENV AZURE_STORAGE_SPN_CLIENT_SECRET='xxxxxxxxxxxxxxx'
!set ENV MSI_SECRET='yyyyyyyyyyyyyy'

In [0]:
# using sas token
sastoken = "<shared-access-token-here>"

script_sas = """
  wget https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb
  sudo dpkg -i packages-microsoft-prod.deb
  sudo apt-get update -y
  sudo apt-get install blobfuse fuse -y
  mkdir -p /local_disk0/blobfusecache
  mkdir -p {mountLocation}
  rm /tmp/connection.cfg
  echo "accountName {accountName} 
sasToken {sastoken}
authType SAS
containerName {containerName}" > /tmp/connection.cfg
  blobfuse {mountLocation} --tmp-path=/local_disk0/blobfusecache -o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 --config-file=/tmp/connection.cfg --log-level=LOG_DEBUG --file-cache-timeout-in-seconds=120 --use-adls={isADLSGen2}
""".format(accountName = accountName, sastoken=sastoken, containerName=containerName,mountLocation=mountLocation, isADSLSGen2=is_adls_gen2)

In [0]:
# using key

storagekey='<storage-account-key>' # TO DO using ${SPARKPASSWORD} spark.password {{secrets/testScope/testKey1}

script_key = """
  wget https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb
  sudo dpkg -i packages-microsoft-prod.deb
  sudo apt-get update -y
  sudo apt-get install blobfuse fuse -y
  mkdir -p /local_disk0/blobfusecache
  mkdir -p {mountLocation}
  rm /tmp/connection.cfg
  echo "accountName {accountName} 
accountKey {storagekey}
authType Key
containerName {containerName}" > /tmp/connection.cfg
  blobfuse {mountLocation} --tmp-path=/local_disk0/blobfusecache -o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 --config-file=/tmp/connection.cfg --log-level=LOG_DEBUG --file-cache-timeout-in-seconds=120 --use-adls={isADLSGen2}
""".format(accountName = accountName, 
           storagekey=storagekey, 
           containerName=containerName,
           mountLocation=mountLocation, 
           isADSLSGen2=is_adls_gen2)

In [0]:
# using SPN

servicePrincipalClientId='<spn_client_id>' # Specifies the client ID for your application registration
servicePrincipalTenantId='<spn_tenant_id>' # Specifies the tenant ID for your application registration
aadEndpoint='<spn_aad_endpoint>'


script_spn = """
  wget https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb
  sudo dpkg -i packages-microsoft-prod.deb
  sudo apt-get update -y
  sudo apt-get install blobfuse fuse -y
  mkdir -p /local_disk0/blobfusecache
  mkdir -p {mountLocation}
  rm /tmp/connection.cfg
  echo "accountName {accountName} 
servicePrincipalClientId {servicePrincipalClientId}
servicePrincipalTenantId {servicePrincipalTenantId}
aadEndpoint {aadEndpoint}
authType SPN
containerName {containerName}" > /tmp/connection.cfg
  blobfuse {mountLocation} --tmp-path=/local_disk0/blobfusecache -o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 --config-file=/tmp/connection.cfg --log-level=LOG_DEBUG --file-cache-timeout-in-seconds=120 --use-adls={isADLSGen2}
""".format(accountName = accountName, 
           containerName=containerName,
           mountLocation=mountLocation, 
           isADSLSGen2=is_adls_gen2, 
           aadEndpoint=aadEndpoint, 
           servicePrincipalClientId=servicePrincipalClientId, 
           servicePrincipalTenantId=servicePrincipalTenantId)

In [0]:
# using MSI

identityClientId='<client_id>'
identityObjectId='<object_id>'
identityResourceId='<resource_id>'
msiEndpoint='<msi_endpoint>'


script_msi = """
  wget https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb
  sudo dpkg -i packages-microsoft-prod.deb
  sudo apt-get update -y
  sudo apt-get install blobfuse fuse -y
  mkdir -p /local_disk0/blobfusecache
  mkdir -p {mountLocation}
  rm /tmp/connection.cfg
  echo "accountName {accountName} 
identityClientId {identityClientId}
identityObjectId {identityObjectId}
identityResourceId {identityResourceId}
msiEndpoint {msiEndpoint}
authType MSI
containerName {containerName}" > /tmp/connection.cfg
  blobfuse {mountLocation} --tmp-path=/local_disk0/blobfusecache -o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 --config-file=/tmp/connection.cfg --log-level=LOG_DEBUG --file-cache-timeout-in-seconds=120 --use-adls={isADLSGen2}
""".format(accountName = accountName, 
           containerName=containerName,
           mountLocation=mountLocation, 
           isADSLSGen2=is_adls_gen2, 
           identityClientId=identityClientId,
           identityObjectId=identityObjectId,
           identityResourceId=identityResourceId,
           msiEndpoint=msiEndpoint)


In [0]:

dbutils.fs.put(scriptLocation+"/blobfuse-mount.sh", <script_??>, True)