In [None]:
from pyspark.sql import functions as F  
from pyspark.sql.types import *
from datetime import datetime
from delta.tables import *

In [None]:
SOURCE = "postgre"
DEST = "landing"

In [None]:
url = "jdbc:postgresql://localhost:5432/tabelas"
user = "postgres"
password ="123456"
driver = "org.postgresql.Driver"

In [None]:
def get_jdbc_data(qry):
  jdbc_data = (spark.read.format("jdbc")
    .option("driver", driver)
    .option("url", url)
    .option("query", qry)
    .option("user", user)
    .option("password", password)
    # .option("fetchSize", "100000")
    .load()
  )   
  return jdbc_data

In [None]:
def ingest_data(df, table, pk_col, partition, mode):
  mode = mode.lower()
  ingestion_time = datetime.now()
  dest_path = f"{SOURCE}/{DEST}/{table}/{ingestion_time.strftime('%Y%m%d')}"

  df = df.withColumn('INGESTION_TIME', F.lit(ingestion_time))

  if mode == "append":
    print(f"Append table: {table}")
    (df.write
     .format("delta")
     .mode(mode)
     .option("mergeSchema", "true")
     .partitionBt(partition)
     .saveAsTable(table)
    )
  
  elif mode == "overwrite":
    print(f"Overwrite table: {table}")
    (df.write
      .format("delta")
      .mode(mode)
      .option("overwriteSchema", "true")
      .partitionBt(partition)
      .saveAsTable(table)
    )

  elif mode == "merge":
    deltaTable = DeltaTable.forName(spark, table)
    print(f"Merge table: {table}")
    (deltaTable.alias("target")
     .merge(df.alias("update"), f"target.{pk_col} = update.{pk_col}")
     .whenMatchedUpdateAll()
     .whenNotMatchedInsertAll()
     .execute
    )


In [None]:
table = "people"
pk_col = "id"
qry = f"select * from {table} where {pk_col} >= 1000"
df = get_jdbc_data(qry)
ingest_data(df, table, pk_col, "city", mode="append")