### Connect to storage & load libraries

In [0]:
ServicePrincipalID = dbutils.secrets.get(scope="data-bricks", key="SPID")
ServicePrincipalKey = dbutils.secrets.get(scope="data-bricks", key="SPKey")
DirectoryID = dbutils.secrets.get(scope="data-bricks", key="DirectoryID")

Directory = f"https://login.microsoftonline.com/{DirectoryID}/oauth2/token"

container_name = 'my-csv'
storage_account_name = 'mainstorageaccountv2'

url=f'abfss://{container_name}@{storage_account_name}.dfs.core.windows.net'
mnt_path = '/mnt/test'

configs = {
  "fs.azure.account.auth.type": "OAuth",
  "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
  "fs.azure.account.oauth2.client.id": ServicePrincipalID,
  "fs.azure.account.oauth2.client.secret": ServicePrincipalKey,
  "fs.azure.account.oauth2.client.endpoint": Directory,
  "fs.azure.createRemoteFileSystemDuringInitialization": "false"
}

dbutils.fs.mount(
  source = url,
  mount_point = mnt_path,
  extra_configs = configs
)

In [0]:
# Import Libraries from pyspark
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.sql import functions as F
from pyspark.sql.functions import lit, col, date_format, to_date, add_months, coalesce,sum,substring,date_add,expr,datediff
from datetime import *

### List current directory (locally)

In [0]:
%sh
ls  /

### List mounted directory (storage in our case)

In [0]:
dbutils.fs.ls("/mnt/test")

In [0]:
dbutils.fs.ls("/") # In dbfs:/FileStore/ you will see many different test datasets

### Create dataframe and write it to storage

In [0]:
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('sparksession').getOrCreate()
data = [("James","Smith","USA","CA"),
    ("Michael","Rose","USA","NY"),
    ("Robert","Williams","USA","CA"),
    ("Maria","Jones","USA","FL")
  ]
columns = ["firstname","lastname","country","state"]
df = spark.createDataFrame(data = data, schema = columns)
df.show(truncate=True)

In [0]:
table_file_path = '/mnt/test/foo.bar'
table_name = 'foo.bar'

In [0]:
(df
 .write
 .format('delta')
 .option('mergeSchema','true')
 .save(table_file_path)
)

In [0]:
df = spark.read.format('delta').load(table_file_path)
df.show(truncate=True)

In [0]:
df_desc = spark.sql(f'DESCRIBE DETAIL "{table_file_path}"')
df_desc.show(truncate=False)

In [0]:
df_hist = spark.sql(f'DESCRIBE HISTORY "{table_file_path}"')
df_hist.show(truncate=True)

In [0]:
df_opt = spark.sql(f'OPTIMIZE "{table_file_path}"')
df_opt.show(truncate=False)

In [0]:
df_hist = spark.sql(f'DESCRIBE HISTORY "{table_file_path}"')
df_hist.show(truncate=True)

### Add new column to delta-table

In [0]:
df_new = df.withColumn('comment', lit(''))
df_new.write.format('delta').mode('append').option('mergeSchema','true').save(table_file_path)

In [0]:
df = spark.read.format('delta').load(table_file_path)
df.show(truncate=True)

In [0]:
df_hist = spark.sql(f'DESCRIBE HISTORY "{table_file_path}"')
df_hist.show(truncate=True)

### Get versions of the delta-table

In [0]:
df = (spark.read.format('delta').option('versionAsOf', 0).load(table_file_path))
df.show()

In [0]:
df = (spark.read.format('delta').option('versionAsOf', 2).load(table_file_path))
df.show()

In [0]:
dbutils.fs.ls("/mnt/test")

### Create database in Databricks

In [0]:
db_name = 'foo'

spark.sql(f"""CREATE DATABASE IF NOT EXISTS {db_name} COMMENT 'This is for test purposes' LOCATION '/user';""")

In [0]:
spark.sql(f'DESCRIBE DATABASE EXTENDED {db_name}').show(truncate=False)

### Create table in Databricks

In [0]:
spark.sql("""create table if not exists {} using delta location '{}'""".format(table_name, table_file_path))

In [0]:
df_check = spark.read.table(table_name)
df_check.show()

### Drop table in Databricks

In [0]:
spark.sql("""drop table if exists {}""".format(table_name))

### Remove table from storage

In [0]:
dbutils.fs.rm(table_file_path, recurse=True)

### Read csv from storage

In [0]:
file_path = '/mnt/test/market_data.csv'
db_table = 'foo.market_data'

In [0]:
df_csv = (spark.read.format("csv")
       .option('header','true')
       .option('delimiter',',')
       .option('inferSchema', 'true')
       #.option('format', '%i')
       .load(file_path)
      )

df_csv.show()

### End of the notebook