In [None]:
!pip install delta-sharing

In [None]:
import os
import pandas as pd
import delta_sharing
from pyspark.sql import SparkSession
from delta.pip_utils import configure_spark_with_delta_pip

builder = (SparkSession
            .builder
            .appName("DeltaSession")
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
        )


jars = [
    "io.delta:delta-core_2.12:2.3.0", # run delta
    "io.delta:delta-sharing-spark_2.12:0.6.3" # run delta sharing
]

spark = configure_spark_with_delta_pip(
            spark_session_builder=builder
            ,extra_packages=jars).getOrCreate()

In [None]:
# Point to the profile file. 
# It can be a file on the local file system or a file on a remote storage.
profile_file = 'delta-sharing/open-datasets.share'

# Create a SharingClient.
client = delta_sharing.SharingClient(profile_file)

In [None]:
client

In [None]:
# List all shared tables.
print(client.list_all_tables())

In [None]:
df = pd.DataFrame([(t.share, t.schema, t.name) for t in client.list_all_tables()],
                  columns=['share', 'schema', 'name'])
df.head(10)

## load_as_pandas

In [None]:
# Create a url to access a shared table.
# A table path is the profile file path following with `#` 
# and the fully qualified name of a table (`<share-name>.<schema-name>.<table-name>`).
profile_file = 'delta-sharing/open-datasets.share'
table_url = profile_file + "#delta_sharing.default.boston-housing"

# Fetch 10 rows from a table and convert it to a Pandas DataFrame. 
# This can be used to read sample data from a table that cannot fit in the memory.
data = delta_sharing.load_as_pandas(table_url, limit=10)

data.head(15)

## load_as_spark

In [None]:
profile_file = 'delta-sharing/open-datasets.share'
table_url = profile_file + "#delta_sharing.default.owid-covid-data"

In [None]:
# Read data using format "deltaSharing"
df1 = spark.read.format("deltaSharing").load(table_url)

df1.limit(10).toPandas()

In [None]:
# Or you can use `load_as_spark` to load the table as a Spark DataFrame.
df2 = delta_sharing.load_as_spark(table_url)

df2.limit(10).toPandas()