In [9]:
# Change these values to your env
oci_bucket=oidlUtils.parameters.getParameter("OCI_BUCKET", "oci://<your-bucket>@<your-namespace>")
p_catalog_name=oidlUtils.parameters.getParameter("CATALOG_NAME", "default")
p_schema_name=oidlUtils.parameters.getParameter("SCHEMA_NAME", "default")
p_table_name=oidlUtils.parameters.getParameter("TABLE_NAME", "deltatab")
p_folder_name=oidlUtils.parameters.getParameter("FOLDER_NAME", "mydata")


In [10]:
import pyspark
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [11]:
# Create a spark dataframe and write as a delta table
data = [("Robert", "Baratheon", "Baratheon", "Storms End", 48),
        ("Eddard", "Stark", "Stark", "Winterfell", 46),
        ("Jamie", "Lannister", "Lannister", "Casterly Rock", 29)
        ]
schema = StructType([
    StructField("firstname", StringType(), True),
    StructField("lastname", StringType(), True),
    StructField("house", StringType(), True),
    StructField("location", StringType(), True),
    StructField("age", IntegerType(), True)
])

sample_dataframe = spark.createDataFrame(data=data, schema=schema)
sample_dataframe.write.mode(saveMode="overwrite").format("delta").save(oci_bucket+'/'+p_folder_name+'/')

In [12]:
# Create an external table on the folder for the delta table above
spark.sql("create table if not exists "+p_catalog_name+"."+p_schema_name+"."+p_table_name+" USING DELTA LOCATION '"+oci_bucket+"/"+p_folder_name+"/'");

In [13]:
# Query the Delta table
spark.sql("select * from " + p_catalog_name+"."+p_schema_name+"."+p_table_name).show()

+---------+---------+---------+-------------+---+
|firstname| lastname|    house|     location|age|
+---------+---------+---------+-------------+---+
|    Jamie|Lannister|Lannister|Casterly Rock| 29|
|   Robert|Baratheon|Baratheon|   Storms End| 48|
|   Eddard|    Stark|    Stark|   Winterfell| 46|
+---------+---------+---------+-------------+---+



In [14]:
# Let's evolve the schema, add a salary column
data = [("Jim", "Benson", "Hillmount", "Glasgow", 34,100),
        ("Jen", "Oliver", "Sleepy Hollow", "Cheddar", 37,200)
        ]
schema = StructType([
    StructField("firstname", StringType(), True),
    StructField("lastname", StringType(), True),
    StructField("house", StringType(), True),
    StructField("location", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("salary", IntegerType(), True)
])

sample_dataframe = spark.createDataFrame(data=data, schema=schema)
sample_dataframe.write.mode(saveMode="append").format("delta").option("mergeSchema", "true").save(oci_bucket+'/'+p_folder_name+'/')