In [0]:
import requests
import json
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from delta.tables import DeltaTable

In [0]:
dbutils.widgets.text("start_date", "2020-04-22")
start_date = dbutils.widgets.get("start_date")

In [0]:
dbutils.widgets.text("terminate_date", "2020-04-27")
end_date = dbutils.widgets.get("terminate_date")

In [0]:
bronze_folder_path = "dbfs:/mnt/marcelodevstaccount/bronze/"
silver_folder_path = "dbfs:/mnt/marcelodevstaccount/silver/"
gold_folder_path = "dbfs:/mnt/marcelodevstaccount/gold/"

In [0]:
def get_state_raw_data(state,schema):
    response = requests.get(f"https://webhooks.mongodb-stitch.com/api/client/v2.0/app/covid-19-qppza/service/REST-API/incoming_webhook/global_and_us?country=US&state={state.title()}&min_date={start_date}T00:00:00.000Z&max_date={end_date}T00:00:00.000Z")
    resp_json = spark.createDataFrame(response.json(),schema)
    raw_df = resp_json.withColumn('CHAVE_DATA_DIA', date_format(col('date'),"yyyyMMdd"))
    
    
    if (DeltaTable.isDeltaTable(spark, f"{bronze_folder_path}{state.replace(' ', '')}")):
        deltaTable = DeltaTable.forPath(spark, f"{bronze_folder_path}{state.replace(' ', '')}")
        deltaTable.alias("tgt").merge(
        raw_df.alias("src"),
         "tgt.CHAVE_DATA_DIA = src.CHAVE_DATA_DIA AND tgt.COUNTY = src.COUNTY") \
               .whenMatchedUpdateAll()\
               .whenNotMatchedInsertAll()\
               .execute()
    else:
        raw_df.write.mode("overwrite")\
                .partitionBy("CHAVE_DATA_DIA")\
                .format("json")\
                .save(f"{bronze_folder_path}{state.replace(' ', '')}")
    
    output_df = spark.read.format("json").load(f"{bronze_folder_path}{state.replace(' ', '')}")
    return output_df