In [2]:
import logging
import json
import os
import shutil
import requests
import pandas as pd
from datetime import datetime

# Use a logger instead of simple print statements for better compatibility with orchestrators and logging services (like AWS CloudWatch)
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

In [None]:
API_ROOT = "https://api.frankfurter.app/"

CURRENCIES = {
	"AUD": "Australian Dollar",
	"BGN": "Bulgarian Lev",
	"BRL": "Brazilian Real",
	"CAD": "Canadian Dollar",
	"CHF": "Swiss Franc",
	"CNY": "Chinese Renminbi Yuan",
	"CZK": "Czech Koruna",
	"DKK": "Danish Krone",
	"EUR": "Euro",
	"GBP": "British Pound",
	"HKD": "Hong Kong Dollar",
	"HUF": "Hungarian Forint",
	"IDR": "Indonesian Rupiah",
	"ILS": "Israeli New Sheqel",
	"INR": "Indian Rupee",
	"ISK": "Icelandic Króna",
	"JPY": "Japanese Yen",
	"KRW": "South Korean Won",
	"MXN": "Mexican Peso",
	"MYR": "Malaysian Ringgit",
	"NOK": "Norwegian Krone",
	"NZD": "New Zealand Dollar",
	"PHP": "Philippine Peso",
	"PLN": "Polish Złoty",
	"RON": "Romanian Leu",
	"SEK": "Swedish Krona",
	"SGD": "Singapore Dollar",
	"THB": "Thai Baht",
	"TRY": "Turkish Lira",
	"USD": "United States Dollar",
    "ZAR": "South African Rand"
}

TO_CURRENCY = "EUR"
# Remove the target currency because it cannot be converted to itself
FROM_CURRENCIES = [c for c in CURRENCIES.keys() if c != TO_CURRENCY]

def get_exchange_rates(api_root:str, date:str, from_currency:str, to_currency:str):
	''' 
	Returns the exchange rate between two currencies on a given date
	'''
	url = api_root + date + "?from=" + from_currency + "&to=" + to_currency
	r = requests.get(url)
	log.info(f'Requested url: {url} with HTTP status code {r.status_code}')

	#Extract and transform the data
	data = r.json()
	to_currency, to_amount = list(data['rates'].items())[0]
	output_data = {
		"date": data["date"],
		"from_currency": data["base"],
		"amount": data["amount"],
		"to_currency": to_currency,
		"exchange_rate": to_amount
	}
	return json.dumps(output_data)

def store_exchange_rates(api_response:str, path:str):
	''' 
	Takes the API response and stores the exchange rate between two currencies on a given date
	'''
	r = json.loads(api_response)
	ds = r["date"]
	from_currency = r["from_currency"]
	to_currency = r["to_currency"]
	
	#Use Pandas to convert to the required file format
	df = pd.json_normalize(r)
	#Add an ETL timestamp to know when the data was processed
	df = df.assign(updated_at=datetime.now())
	# I would use the parquet file format, but for simplicity's sake I'm using csv
	output_path = f'{path}/{from_currency}_{to_currency}_{ds}.csv'
	df.to_csv(output_path, index=False)

	log.info(f'Stored exchange rates for {from_currency} to {to_currency} on {ds} in {output_path}')

In [None]:
# This notebook cell performs a one-time export for all the dates
dates = ['2024-02-12', '2024-02-13']

data_path = '../data'
rates_path = f'{data_path}/exchange_rates'

for ds in dates:
    # Create or replace a folder named after the date. Using the date would allow partioning the data in the future for performance
    dir = f'{rates_path}/{ds}'
    if os.path.exists(dir):
        shutil.rmtree(dir)
    os.makedirs(dir)
    for currency in FROM_CURRENCIES:
        try:
            r = get_exchange_rates(API_ROOT, ds, currency, TO_CURRENCY)
            store_exchange_rates(r, dir)
        except Exception as e:
            log.error(e)