In [8]:
import boto3
import logging
import os
import requests
import xml.etree.ElementTree as ET
from io import BytesIO
from zipfile import ZipFile
import csv


def download_xml():
    url = "https://registers.esma.europa.eu/solr/esma_registers_firds_files/select?q=*&fq=publication_date:%5B2022-01-01T00:00:00Z%20TO%202022-01-01T23:59:59Z%5D&fq=instrument_type:DERIV&wt=xml&indent=true&start=0&rows=100"
    res = requests.get(url)
    root = ET.fromstring(res.content)
    for doc in root.iter('doc'):
        file_type = doc.find("str[@name='file_type']").text
        if file_type == 'DLTINS':
            download_url = doc.find("str[@name='download_link']").text
            break
    return download_url


def download_zip(download_url):
    res = requests.get(download_url)
    zipfile = ZipFile(BytesIO(res.content))
    return zipfile


def convert_to_csv(zipfile):
    xml_filename = [name for name in zipfile.namelist() if name.endswith('.xml')][0]
    xml_content = zipfile.read(xml_filename)
    root = ET.fromstring(xml_content)
    rows = []
    for FinInstrmGnlAttrbts in root.findall(".//FinInstrmGnlAttrbts"):
        row = [
            FinInstrmGnlAttrbts.find("Id").text,
            FinInstrmGnlAttrbts.find("FullNm").text,
            FinInstrmGnlAttrbts.find("ClssfctnTp").text,
            FinInstrmGnlAttrbts.find("CmmdtyDerivInd").text,
            FinInstrmGnlAttrbts.find("NtnlCcy").text,
            FinInstrmGnlAttrbts.find("../Issr").text,
        ]
        rows.append(row)
    return rows


def upload_to_s3(rows):
    s3 = boto3.resource('s3')
    bucket_name = os.environ.get('BUCKET_NAME')
    filename = 'data.csv'
    with open(filename, 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        header = [
            'FinInstrmGnlAttrbts.Id',
            'FinInstrmGnlAttrbts.FullNm',
            'FinInstrmGnlAttrbts.ClssfctnTp',
            'FinInstrmGnlAttrbts.CmmdtyDerivInd',
            'FinInstrmGnlAttrbts.NtnlCcy',
            'Issr',
        ]
        csvwriter.writerow(header)
        csvwriter.writerows(rows)
    s3.Bucket(bucket_name).upload_file(filename, filename)


def main(event, context):
    logging.info("Starting execution...")
    download_url = download_xml()
    logging.info(f"Download URL: {download_url}")
    zipfile = download_zip(download_url)
    logging.info("Downloaded zip file")
    rows = convert_to_csv(zipfile)
    logging.info(f"Converted XML to CSV with {len(rows)} rows")
    upload_to_s3(rows)
    logging.info("Uploaded to S3")
    logging.info("Execution complete")