In [0]:
import requests
from pyspark.sql.functions import to_date, year, month, dayofmonth
from pyspark.sql.types import StructType, StructField, StringType, DoubleType

In [0]:
def extract_data(data, base="BRL"):
    import requests

    url = f"https://api.apilayer.com/exchangerates_data/{data}"
    headers = {
        "apikey": "JACfv5Rwj1u3Gqmc4bcK8iaB5aC1PilH"
    }
    parameters = {"base": base}

    response = requests.request(
        "GET",
        url,
        headers=headers,
        params=parameters
    )

    if response.status_code != 200:
        raise Exception("Not Found")

    return response.json()

In [0]:
def save_to_delta(extraction):
    rows = [{"moeda": k, "taxa": float(v), "base": extraction["base"], "data": extraction["date"]}
    for k, v in extraction["rates"].items()]

    schema = StructType([
        StructField("moeda", StringType(), True),
        StructField("taxa", DoubleType(), True),
        StructField("base", StringType(), True),
        StructField("data", StringType(), True)
    ])

    df = spark.createDataFrame(rows, schema=schema)

    df = df.withColumn("data_dt", to_date("data"))\
        .withColumn("ano",  year("data_dt"))\
        .withColumn("mes",  month("data_dt"))\
        .withColumn("dia",  dayofmonth("data_dt"))\
        .drop("data")


    df.write \
        .format("delta") \
        .mode("overwrite") \
        .partitionBy("ano","mes","dia")\
        .save("/Volumes/airflow_databricks/default/bronze")

    print(f"Data saved to delta lake at /Volumes/airflow_databricks/default/bronze")

In [0]:
cotacoes = extract_data("2023-01-01")
save_to_delta(cotacoes)
