In [1]:
import csv
import requests
import xml.etree.ElementTree as ET
import zipfile
import pandas as pd
import boto3
from io import StringIO
import logging

''''The following parameters are available for the Lambda Object:param url: 
The URL to extract the first download link whose file_type is DLTINS by parsing through the xml.
has three techniques:download_link technique: obtains the xml file's url and then parses it to produce the download_link.method 
zip_extraction: Extracts the xml file from the zip file after downloading the zip file.the xml_to_csv method 
Parse the XML file and convert it to CSV.'''
class Lambda:
    def __init__(self, url=None):
        self.url = url
        self.logger = logging.getLogger('lambda_function')
        self.logger.setLevel(logging.INFO)
        
    def download_link(self):
#         Uses the url of the class to get the required path.
        try:
            resp = requests.get(self.url)
            with open('registers.xml', 'wb') as f:
                f.write(resp.content)
            tree = ET.parse('registers.xml')
            root = tree.getroot()
            link = ''
            for item in root[1].iter("doc"):
                if item.find("str[@name='file_type']").text == 'DLTINS':
                    link = item.find("str[@name='download_link']").text
                    break
            if not link:
                raise Exception('Could not find download link for file_type DLTINS')
            return link
        except Exception as e:
            self.logger.error(f"Error in download_link: {e}")
            raise e
#    form here we are able to extreact the zip file 
    
    def zip_extraction(self, link=None):
        try:
            zip_file = requests.get(link)
            with open('zip_file.zip', 'wb') as f:
                f.write(zip_file.content)
            xml_file = ''
            with zipfile.ZipFile('zip_file.zip', 'r') as f:
                xml_file = f.namelist()[0]
                f.extractall('')
            if not xml_file:
                raise Exception('Could not extract xml file from zip')
            return xml_file
        except Exception as e:
            self.logger.error(f"Error in zip_extraction: {e}")
            raise e
    
 
    def xml_to_csv(self, xml=None):
        new = ET.parse(xml)   
        test = new.getroot()
        pattern = 'FinInstrmGnlAttrbts'   
        children = ['Id', 'FullNm', 'ClssfctnTp', 'CmmdtyDerivInd', 'NtnlCcy'] 
        tag = 'Issr'
        rows = []
        cols = [pattern + '.' + k for k in children]
        cols.append(tag)
        parent = 'TermntdRcrd'   
        for i in test.iter():         
            if parent in i.tag:     
                entry = [None for x in range(len(cols))]    
                for child in i:
                    if pattern in child.tag:  
                            for c in child:     
                                for k in range(len(children)):
                                        if children[k] in c.tag:   
                                            entry[k] = c.text
                    if tag in child.tag:    
                        entry[5] = child.text
                rows.append(entry)   
                
        df = pd.DataFrame(rows, columns=cols)      
        return df

if __name__ == '__main__':
    url = "https://registers.esma.europa.eu/solr/esma_registers_firds_files/select?q=*&fq=publication_date:%5B2021-01-17T00:00:00Z+TO+2021-01-19T23:59:59Z%5D&wt=xml&indent=true&start=0&rows=100" #Requirement-1: save the download link to url and download the xml file
    p = Lambda(url)
    zip_link = p.download_link()
    xml_file = p.zip_extraction(zip_link)
    df = p.xml_to_csv(xml_file)
    df.to_csv('output.csv')

In [2]:
df.head()

Unnamed: 0,FinInstrmGnlAttrbts.Id,FinInstrmGnlAttrbts.FullNm,FinInstrmGnlAttrbts.ClssfctnTp,FinInstrmGnlAttrbts.CmmdtyDerivInd,FinInstrmGnlAttrbts.NtnlCcy,Issr
0,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
1,DE000A1R07V3,KFW 1 5/8 01/15/21,DBFTFB,False,EUR,549300GDPG70E3MBBU98
2,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
3,DE000A1R07V3,Kreditanst.f.Wiederaufbau Anl.v.2014 (2021),DBFTFB,False,EUR,549300GDPG70E3MBBU98
4,DE000A1X3J56,IKB Deutsche Industriebank AG Stufenz.MTN-IHS ...,DTVUFB,False,EUR,PWEFG14QWWESISQ84C69


In [3]:
# Store the csv from step 4) in an AWS S3 bucket


In [4]:
s3 = boto3.client("s3", aws_access_key_id = "AKIAU3M52YHEOIWCSL7D", aws_secret_access_key="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")
csv_buf = StringIO()
df.to_csv(csv_buf, header = True, index = False)
csv_buf.seek(0)
s3.put_object(Bucket="steeleyeproject", Body=csv_buf.getvalue(), Key='output.csv')

{'ResponseMetadata': {'RequestId': 'TDMFJ31CPKQ7H06N',
  'HostId': '5cc7FWUbvULJpj9tZUtWBDkr0hIHT1YTSfq7xYs1Fm49EzJhGQQCNxC5Ox9qIF3eZcoS5Zs9HT5sYWpYQ9NPDg==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '5cc7FWUbvULJpj9tZUtWBDkr0hIHT1YTSfq7xYs1Fm49EzJhGQQCNxC5Ox9qIF3eZcoS5Zs9HT5sYWpYQ9NPDg==',
   'x-amz-request-id': 'TDMFJ31CPKQ7H06N',
   'date': 'Sun, 23 Apr 2023 16:33:13 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"79c7b9a6e53e6602aebff67d3e616ff3"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 1},
 'ETag': '"79c7b9a6e53e6602aebff67d3e616ff3"',
 'ServerSideEncryption': 'AES256'}

In [5]:
print("Utkarsh singh")
print("12008260")


Utkarsh singh
12008260


In [2]:
print("AWS S3 Bucket Link")
print('https://steeleyeproject.s3.ap-south-1.amazonaws.com/output.csv')

AWS S3 Bucket Link
https://steeleyeproject.s3.ap-south-1.amazonaws.com/output.csv


In [3]:
print("thank you ")

thank you 
