In [1]:
import xml.etree.ElementTree as ET  # For parsing XML files
import urllib.request  # For making HTTP requests
import zipfile  # For handling zip files
import boto3  # For interacting with AWS S3
import pandas as pd  # For data manipulation and analysis
import csv  # For reading and writing CSV files
import logging  # For logging and capturing error messages


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)


def download_xml_file(download_link):
    """
    Downloads the XML file from the given download link.

    Args:
        download_link (str): The download link for the XML file.

    Returns:
        str: The file path of the downloaded XML file.
    """
    try:
        urllib.request.urlretrieve(download_link, "file.zip")
        logger.info("XML file downloaded from: %s", download_link)
        return "file.zip"
    except Exception as e:
        logger.error("Failed to download XML file: %s", e)
        return None

def extract_xml_file(zip_file_path):
    """
    Extracts the XML file from the given zip file.

    Args:
        zip_file_path (str): The file path of the zip file containing the XML file.

    Returns:
        str: The file path of the extracted XML file.
    """
    try:
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            xml_file = zip_ref.extract("DLTINS_20210117_01of01.xml")
            logger.info("XML file extracted: %s", xml_file)
            return xml_file
    except Exception as e:
        logger.error("Failed to extract XML file from zip: %s", e)
        return None

if __name__ == '__main__':
    # Load the XML file
    tree = ET.parse('file.xml')
    root = tree.getroot()

    # Find the first download link with file_type as DLTINS
    download_link = None
    download_link = root.find('.//doc/str[@name="download_link"]').text

    # Download and extract the XML file
    if download_link:
        zip_file_path = download_xml_file(download_link)
        if zip_file_path:
            xml_file_path = extract_xml_file(zip_file_path)
            if xml_file_path:
                # Perform further processing with the extracted XML file
                pass
            else:
                logger.error("Failed to extract XML file.")
    else:
        logger.warning("No download link found for XML file.")


2023-04-23 11:28:04 - INFO - XML file downloaded from: http://firds.esma.europa.eu/firds/DLTINS_20210117_01of01.zip
2023-04-23 11:28:05 - INFO - XML file extracted: C:\Users\adity\Data Enginner Assingment\DLTINS_20210117_01of01.xml


In [2]:

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)

def remove_namespace(element):
    """
    Remove the namespace prefix from an Element object's tag.

    Args:
        element (Element): The Element object whose namespace prefix needs to be removed.
    """
    if '}' in element.tag:
        element.tag = element.tag.split('}', 1)[1]

try:
    # Load the XML file
    tree = ET.parse('DLTINS_20210117_01of01.xml')
    root = tree.getroot()

    # Iterate over elements in the XML document and remove namespaces
    for element in root.iter():
        remove_namespace(element)

    # Write the modified XML document to a new file
    tree.write('no_namespace.xml')

    logger.info("XML file processed and new XML file with no namespace is created successfully.")

    # Create a CSV file and write the header
    with open('output.csv', 'w', encoding='utf-8', errors='replace', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write the header
        writer.writerow(['FinInstrmGnlAttrbts.Id', 'FinInstrmGnlAttrbts.FullNm', 'FinInstrmGnlAttrbts.ClssfctnTp',
                         'FinInstrmGnlAttrbts.CmmdtyDerivInd', 'FinInstrmGnlAttrbts.NtnlCcy', 'Issr'])

        # Loop through each element in the XML tree
        for elem in root.findall('.//FinInstrm'):
            id = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/Id')
            full_nm = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/FullNm')
            clssfctn_tp = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/ClssfctnTp')
            cmmdty_derive_ind = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/CmmdtyDerivInd')
            ntnl_ccy = elem.find('TermntdRcrd/FinInstrmGnlAttrbts/NtnlCcy')
            issr = elem.find('TermntdRcrd/Issr')

            # Write the values to the CSV file
            writer.writerow([
                id.text if id is not None else '',
                full_nm.text if full_nm is not None else '',
                clssfctn_tp.text if clssfctn_tp is not None else '',
                cmmdty_derive_ind.text if cmmdty_derive_ind is not None else '',
                ntnl_ccy.text if ntnl_ccy is not None else '',
                issr.text if issr is not None else ''
            ])

    logger.info("CSV file created successfully.")

except Exception as e:
    logger.error("An error occurred: %s", e)


2023-04-23 11:28:45 - INFO - XML file processed and new XML file with no namespace is created successfully.
2023-04-23 11:28:50 - INFO - CSV file created successfully.


In [4]:
# Load CSV file
csv_file= pd.read_csv('output.csv', low_memory=False)
logger.info("CSV file loaded successfully.")
csv_file.head()

2023-04-23 11:29:25 - INFO - CSV file loaded successfully.


Unnamed: 0,FinInstrmGnlAttrbts.Id,FinInstrmGnlAttrbts.FullNm,FinInstrmGnlAttrbts.ClssfctnTp,FinInstrmGnlAttrbts.CmmdtyDerivInd,FinInstrmGnlAttrbts.NtnlCcy,Issr
0,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
1,DE000A1R07V3,KFW 1 5/8 01/15/21,DBFTFB,False,EUR,549300GDPG70E3MBBU98
2,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
3,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
4,DE000A1X3J56,IKB Deutsche Industriebank AG Stufenz.MTN-IHS ...,DTVUFB,False,EUR,PWEFG14QWWESISQ84C69


In [4]:
def create_s3_bucket(bucket_name):
    """
    Creates an S3 bucket with the given bucket name.

    Args:
        bucket_name (str): The name of the S3 bucket to create.

    Returns:
        None
    """
    try:
        # Create an S3 client
        s3_client = boto3.client('s3',
                                aws_access_key_id='YOUR ACCESS KEY',
                                aws_secret_access_key='YOUR SECRET KEY'
                                )
        # Create the S3 bucket
        s3_client.create_bucket(Bucket=bucket_name)
        logging.info(f'S3 bucket {bucket_name} created successfully.')
    except Exception as e:
        logging.error(f'Error creating S3 bucket {bucket_name}: {e}')

def upload_csv_to_s3(bucket_name, file_path, s3_key):
    """
    Uploads a CSV file to the specified S3 bucket.

    Args:
        bucket_name (str): The name of the S3 bucket to upload the CSV file to.
        file_path (str): The local file path of the CSV file to upload.
        s3_key (str): The S3 key or object name to use for the uploaded file.

    Returns:
        None
    """
    try:
        # Create an S3 client
        s3_client = boto3.client('s3',
                                aws_access_key_id='YOUR ACCESS KEY',
                                aws_secret_access_key='YOUR SECRET KEY'
                                )
        # Upload the CSV file to S3
        s3_client.upload_file(file_path, bucket_name, s3_key)
        logging.info(f'File {file_path} uploaded to S3 bucket {bucket_name} with key {s3_key}.')
    except Exception as e:
        logging.error(f'Error uploading file {file_path} to S3 bucket {bucket_name}: {e}')

if __name__ == '__main__':
    # Specify the bucket name
    bucket_name = 'adimani'
    # Specify the local file path of the CSV file to upload
    file_path = 'output.csv'
    # Specify the S3 key or object name for the uploaded file
    s3_key = 'output.csv'

    # Create S3 bucket
    create_s3_bucket(bucket_name)
    # Upload CSV file to S3
    upload_csv_to_s3(bucket_name, file_path, s3_key)


2023-04-22 23:00:05 - INFO - S3 bucket adimani created successfully.
2023-04-22 23:00:08 - INFO - File output.csv uploaded to S3 bucket adimani with key output.csv.
