In [0]:
from pyspark.sql import *
from delta.tables import *


## Delta tables allow for DML opperations 

In [0]:
schema = 'locations_db'
table = 'countries'

In [0]:
countries_df = spark.read.table(f"{schema}.{table}") # How we can read a table as a Spark DataFrame. The paid version of Databricks will have a Catalog

In [0]:
display(countries_df.limit(5))

COUNTRY_ID,NAME,NATIONALITY,COUNTRY_CODE,ISO_ALPHA2,CAPITAL,POPULATION,AREA_KM2,REGION_ID,SUB_REGION_ID,INTERMEDIATE_REGION_ID,ORGANIZATION_REGION_ID
1,Afghanistan,Afghan,AFG,AF,Kabul,38041754.0,652230,30,30,,30
2,Albania,Albanian,ALB,AL,Tirana,2880917.0,28748,20,70,,20
3,Algeria,Algerian,DZA,DZ,Algiers,43053054.0,2381741,50,40,,20
4,American Samoa,American Samoan,ASM,AS,Pago Pago,55312.0,199,40,20,,30
5,Andorra,Andorran,AND,AD,Andorra la Vella,77142.0,468,20,70,,20


In [0]:
%sql
Show DATABASES

databaseName
default
locations_db


In [0]:
%sql
USE locations_db

In [0]:
%sql
select * from countries 
limit 5

COUNTRY_ID,NAME,NATIONALITY,COUNTRY_CODE,ISO_ALPHA2,CAPITAL,POPULATION,AREA_KM2,REGION_ID,SUB_REGION_ID,INTERMEDIATE_REGION_ID,ORGANIZATION_REGION_ID
1,Afghanistan,Afghan,AFG,AF,Kabul,38041754.0,652230,30,30,,30
2,Albania,Albanian,ALB,AL,Tirana,2880917.0,28748,20,70,,20
3,Algeria,Algerian,DZA,DZ,Algiers,43053054.0,2381741,50,40,,20
4,American Samoa,American Samoan,ASM,AS,Pago Pago,55312.0,199,40,20,,30
5,Andorra,Andorran,AND,AD,Andorra la Vella,77142.0,468,20,70,,20



### Delete records in SQL

In [0]:
%sql
DELETE FROM 
  countries 
WHERE
  REGION_ID = 30

num_affected_rows
51


### Delete in Python

In [0]:
countries_delta = DeltaTable.forName(spark, f"{schema}.{table}")

In [0]:
'''

This gives you a DeltaTable object, which is part of the Delta Lake API — it gives you extra functionality over Pyspark like:

.update()

.delete()

.merge()

.history()

'''

'\n\nThis gives you a DeltaTable object, which is part of the Delta Lake API — it gives you extra functionality over Pyspark like:\n\n.update()\n\n.delete()\n\n.merge()\n\n.history()\n\n'

In [0]:
countries_delta.delete("region_id = 50 and population > 500000") # With Delta tables, you are updating the table in the catelog, not a copy.

In [0]:
%sql
select * 
from 
  countries
where
  region_id = 50 and POPULATION > 500000

COUNTRY_ID,NAME,NATIONALITY,COUNTRY_CODE,ISO_ALPHA2,CAPITAL,POPULATION,AREA_KM2,REGION_ID,SUB_REGION_ID,INTERMEDIATE_REGION_ID,ORGANIZATION_REGION_ID



### Updating Records in SQL

In [0]:
%sql

UPDATE 
  countries
SET
  COUNTRY_CODE = "xxx"
WHERE
  REGION_ID = 10


num_affected_rows
57


In [0]:
%sql
SELECT
  *
FROM 
  countries
WHERE
  REGION_ID = 10
LIMIT
5

COUNTRY_ID,NAME,NATIONALITY,COUNTRY_CODE,ISO_ALPHA2,CAPITAL,POPULATION,AREA_KM2,REGION_ID,SUB_REGION_ID,INTERMEDIATE_REGION_ID,ORGANIZATION_REGION_ID
7,Anguilla,Anguillan,xxx,AI,The Valley,14869.0,91,10,10,60,40
9,Antigua and Barbuda,Antiguan or Barbudan,xxx,AG,St. John's,97118.0,442,10,10,60,40
10,Argentina,Argentine,xxx,AR,Buenos Aires,44780677.0,2780400,10,10,40,40
12,Aruba,Aruban,xxx,AW,Oranjestad,106314.0,180,10,10,60,40
16,Bahamas,Bahamian,xxx,BS,Nassau,389482.0,13943,10,10,60,40


In [0]:
countries_delta.update("region_id = 10",{'country_code':"'xxx'"}) # Note, passing a literal string as a sting, so that is why there are double quotes around xxx. THere needs to be single quotes as SQL would require. 

In [0]:
countries_delta.update("region_id = 20 and area_km2 > 600000",{'country_code':"'YYY'"})

In [0]:
%sql
SELECT
  *
FROM 
  countries
WHERE
  REGION_ID = 20  and AREA_KM2 > 6000000
LIMIT
5

COUNTRY_ID,NAME,NATIONALITY,COUNTRY_CODE,ISO_ALPHA2,CAPITAL,POPULATION,AREA_KM2,REGION_ID,SUB_REGION_ID,INTERMEDIATE_REGION_ID,ORGANIZATION_REGION_ID
181,Russian Federation,Russian,YYY,RU,Moscow,145872256.0,17098242,20,140,,20
