[BeautifulSoup](https://pypi.org/project/beautifulsoup4/) 

**Version:** 0.5
<p>**Description:** Adding the Banks (Angola) MX Records to the dataset</p>

This code uses the dns.resolver module from dnspython to retrieve the MX records for each naked domain. The MX records are stored in the "mx_records" column of the CSV file.

This updated code handles common exceptions related to DNS resolution, such as NXDOMAIN, NoNameservers, and NoAnswer. Additionally, it captures any other DNSException and includes the error message in the mx_records field.

In [None]:
import csv
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import dns.resolver
import requests

# Send a GET request to the webpage
url = "https://www.abanc.ao/sistema-financeiro/instituicoes-bancarias-autorizadas/"
response = requests.get(url)

# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

# Find all bank information div elements
bank_divs = soup.find_all("div", class_="cmsAccordion")

# Initialize an empty list to store the bank details
bank_details = []

# Iterate over each bank div
for bank_div in bank_divs:
    # Extract bank name
    bank_name = bank_div.find("p", class_="heading").text.strip()

    # Extract bank info divs
    bank_info_divs = bank_div.find_all("div", class_="content")

    # Extract bank website
    bank_website = None
    for bank_info_div in bank_info_divs:
        bank_website_element = bank_info_div.find("a")
        if bank_website_element:
            bank_website = bank_website_element["href"]
            break

    # If bank website is not found, check the second occurrence of class "content"
    if not bank_website and len(bank_info_divs) > 1:
        bank_info_div = bank_info_divs[1]
        bank_website_element = bank_info_div.find("a")
        if bank_website_element:
            bank_website = bank_website_element["href"]

    # Extract the naked domain from the bank website URL
    naked_domain = None
    if bank_website:
        parsed_url = urlparse(bank_website)
        netloc = parsed_url.netloc
        if netloc.startswith("www."):
            naked_domain = netloc[4:]  # Remove "www" part from the domain
        else:
            naked_domain = netloc

    # Retrieve MX records for the naked domain
    mx_records = None
    if naked_domain:
        try:
            answers = dns.resolver.resolve(naked_domain, "MX")
            mx_records = [str(r.exchange)[:-1] for r in answers]
        except (dns.resolver.NXDOMAIN, dns.resolver.NoNameservers, dns.resolver.NoAnswer):
            mx_records = "No MX records found"
        except dns.exception.DNSException as e:
            mx_records = f"Error: {str(e)}"

    # Create a dictionary to store the bank details
    bank = {
        "name": bank_name,
        "website": bank_website,
        "naked_domain": naked_domain,
        "mx_records": mx_records
    }

    # Append the bank details to the list
    bank_details.append(bank)

# Specify the output CSV file path
csv_file = "bank_details-0.5.csv"

# Write the bank details to the CSV file
with open(csv_file, "w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["name", "website", "naked_domain", "mx_records"])
    writer.writeheader()
    writer.writerows(bank_details)

print("Bank details written to", csv_file)