## Sample read Delta table on Minio

In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from dotenv import load_dotenv
import os


In [2]:
load_dotenv()

HOST_ADDRESS = os.getenv("HOST_ADDRESS")
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")

conf = SparkConf()

conf.setAppName("Sample read Delta table")
conf.set("spark.hadoop.fs.s3a.endpoint", f"http://{HOST_ADDRESS}:9000")
conf.set("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY)
conf.set("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY)
conf.set("spark.hadoop.fs.s3a.path.style.access", True)
conf.set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
conf.set('spark.hadoop.fs.s3a.aws.credentials.provider', 'org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider')
conf.set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
conf.set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") 
conf.set("hive.metastore.uris", "thrift://metastore:9083")

spark = SparkSession.builder.config(conf=conf).enableHiveSupport().getOrCreate()

In [3]:
df = spark.read.format("delta").load('s3a://bronze/adventure_works/bronze_humanresources_department')

In [4]:
df.show()

+------------+--------------------+--------------------+-------------------+--------------------+---------+
|departmentid|                name|           groupname|       modifieddate|         last_update|month_key|
+------------+--------------------+--------------------+-------------------+--------------------+---------+
|           1|         Engineering|Research and Deve...|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           2|         Tool Design|Research and Deve...|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           3|               Sales| Sales and Marketing|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           4|           Marketing| Sales and Marketing|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           5|          Purchasing|Inventory Management|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           6|Research and Deve...|Research and Deve...|2008-04-30 00:00:00|2024-09-22 23:28:...|   200804|
|           7|          Prod