In [32]:
import pyspark
from delta import *

In [33]:
builder = (
    pyspark.sql.SparkSession.builder.appName("MyApp")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
)

In [34]:
spark = configure_spark_with_delta_pip(builder).getOrCreate()

## Create Delta Lake

In [59]:
spark.sql("drop table if exists `my_cool_table`")

DataFrame[]

In [60]:
columns = ["language","speakers"]
data = [("English", "1.5"), ("Mandarin", "1.1"), ("Hindi", "0.6")]
rdd = spark.sparkContext.parallelize(data)
df = rdd.toDF(columns)

In [62]:
df.write.format("delta").saveAsTable("default.my_cool_table")

                                                                                

In [63]:
spark.sql("select * from `my_cool_table`").show()

+--------+--------+
|language|speakers|
+--------+--------+
|Mandarin|     1.1|
| English|     1.5|
|   Hindi|     0.6|
+--------+--------+



## Drop column from Delta Lake

In [54]:
spark.sql("""ALTER TABLE `my_cool_table` SET TBLPROPERTIES (
   'delta.columnMapping.mode' = 'name',
   'delta.minReaderVersion' = '2',
   'delta.minWriterVersion' = '5')""")

DataFrame[]

In [55]:
spark.sql("alter table `my_cool_table` drop column language")

                                                                                

DataFrame[]

In [56]:
spark.sql("select * from `my_cool_table`").show()

+--------+
|speakers|
+--------+
|     1.1|
|     1.5|
|     0.6|
+--------+



## Cleanup

In [64]:
spark.sql("drop table if exists `my_cool_table`")

DataFrame[]