In [13]:
# read csv file

import csv
import numpy as np
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string
from bs4 import BeautifulSoup
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import re

In [4]:
tree = ET.parse('PV-9-2023-11-21-RCV_DE.xml')
root = tree.getroot()
print(root.tag)
root.attrib

PV.RollCallVoteResults


{'Sitting.Identifier': '2196',
 'Sitting.Date': '2023-11-21',
 'EP.Reference': 'P9_PV(2023)11-21(RCV)',
 'EP.Number': 'PE 756.572',
 'Document.Language': 'XL'}

In [11]:
leg = []
date = []
in_favour = []
against = []
abstains = []
EPP_total = 176
SD_total = 144
threshold_r = 0.8 * EPP_total
threshold_l = 0.8 * SD_total
rm_in_favour = []
rm_against = []
rm_abstains = []
lm_in_favour = []
lm_against = []
lm_abstains = []
consensus = []
count = 1


for child in root.iter():
    # Extract date if available
    if "Date" in child.attrib:
        date.append(child.attrib["Date"])

    # Extract legislative text
    if child.tag.endswith('Description.Text') and child.text is not None:
        leg.append(child.text)

    if child.tag.endswith('Result.For'):
        for c in child[1:]:
            if c.tag.endswith('Group.List') and c.attrib["Identifier"] == "PPE":
                votes = len(c)
                if votes > threshold_r:
                    rm_in_favour.append(1)
                    rm_against.append(0)
                else:
                    rm_in_favour.append(0)
                    rm_against.append(1)
            if c.tag.endswith('Group.List') and c.attrib["Identifier"] == "S&D":
                votes = len(c)
                if votes > threshold_l:
                    lm_in_favour.append(1)
                    lm_against.append(0)
                else:
                    lm_in_favour.append(0)
                    lm_against.append(1)

    # extract sum of votes
    if child.tag.endswith("RollCallVote.Result"):
        for c in child:
            
            if c.tag.endswith('For'):
                in_favour.append(c.attrib["Number"])
            #print(len(in_favour))
            if c.tag.endswith('Against'):
                against.append(c.attrib["Number"])
            #print(len(against))
            if c.tag.endswith('Abstention'):
                abstains.append(c.attrib["Number"])
            #print(len(abstains))
        # if length of in_favour is not equal to count, add 0 to in_favour
        if len(in_favour) != count:
            in_favour.append(0)
        # if length of against is not equal to count, add 0 to against
        if len(against) != count:
            against.append(0)
        # if length of abstains is not equal to count, add 0 to abstains
        if len(abstains) != count:
            abstains.append(0)
        count += 1

# Create a dataframe
df = pd.DataFrame(list(zip(date, leg, in_favour, against, abstains, rm_in_favour, 
                           rm_against, lm_in_favour, lm_against)),
                  columns = ["Date", "Title", "In_Favour", "Against", "Abstentions", 
                             "Right In Favour", "Right Against", 
                             "Left In Favour", "Left Against"]
                             )

# Add columns to dataframe
df["RM"] = np.where((df["Right In Favour"] == 1) & (df["Left In Favour"] == 0), 1, 0)
df["LM"] = np.where((df["Right In Favour"] == 0) & (df["Left In Favour"] == 1), 1, 0)
df["Consensus"] = np.where((df["Right In Favour"] == 1) & (df["Left In Favour"] == 1), 1, 0)
df["Rejected"] = np.where((df["Right Against"] == 1) & (df["Left Against"] == 1), 1, 0)
display(df)

Unnamed: 0,Date,Title,In_Favour,Against,Abstentions,Right In Favour,Right Against,Left In Favour,Left Against,RM,LM,Consensus,Rejected
0,2023-11-21 12:17:20,Hohes gemeinsames Cybersicherheitsniveau in de...,557,27,0,1,0,1,0,0,0,1,0
1,2023-11-21 12:17:46,Fangdokumentationsregelung für Roten Thun (Thu...,600,1,5,1,0,1,0,0,0,1,0
2,2023-11-21 12:18:19,Gemeinsame Vorschriften zur Förderung der Repa...,591,11,12,1,0,1,0,0,0,1,0
3,2023-11-21 12:19:26,A9-0316/2023 - René Repasi - Vorschlag der Kom...,590,15,15,1,0,1,0,0,0,1,0
4,2023-11-21 12:21:14,Rahmen für Maßnahmen zur Stärkung des europäis...,413,146,72,1,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,2023-11-21 13:23:47,B9-0462/2023 - § 22/3,394,146,74,0,1,0,1,0,0,0,1
132,2023-11-21 13:23:56,B9-0462/2023 - § 22/4,402,136,73,0,1,1,0,0,1,0,0
133,2023-11-21 13:24:05,B9-0462/2023 - § 22/5,386,146,70,0,1,0,1,0,0,0,1
134,2023-11-21 13:24:13,B9-0462/2023 - § 22/6,470,91,40,0,1,0,1,0,0,0,1


In [12]:
# Sum RM, LM, Consensus and Rejected
rm = df["RM"].sum()
lm = df["LM"].sum()
cons = df["Consensus"].sum()
rej = df["Rejected"].sum()

print(f"RM: {rm}")
print(f"LM: {lm}")
print(f"Consensus: {cons}")
print(f"Rejected: {rej}")

RM: 31
LM: 15
Consensus: 22
Rejected: 68


In [15]:
# Add summaries to the dataframe

response = requests.get("https://www.europarl.europa.eu/doceo/document/PV-9-2023-11-21-VOT_EN.html")
soup = BeautifulSoup(response.text, "html.parser")

# Find the element using a regex to flexibly match the string
report_paragraph = soup.find('p')
#print(report_paragraph)

report_links = []

for item in report_paragraph:
    if item.name == 'a':
        # join the link with the base url
        report_link = ("https://www.europarl.europa.eu/" + item['href'])
        report_links.append(report_link)

# Extract the procedure links from the report links
procedure_links = []
links = []
for link in report_links:
    response = requests.get(link)
    soup = BeautifulSoup(response.text, "html.parser")
    # Find the element using a regex to flexibly match the string
    document_info = soup.find('p')
    for item in document_info:
        if item.name == 'a':
            links.append(item['href'])
            procedure_link = links[-1]
    procedure_links.append(procedure_link)

print(procedure_links)

['https://oeil.secure.europarl.europa.eu/oeil/popups/ficheprocedure.do?lang=en&reference=2022/0085(COD)']


In [27]:
# Extract the summary links from the procedure links
summary_links = []

for link in procedure_links:
    response = requests.get(link)
    soup = BeautifulSoup(response.text, "html.parser")
    # Find the button by its id
    button = soup.find('button', id='summary')
    # Extract the 'onclick' attribute and parse it
    if button and 'onclick' in button.attrs:
        onclick_content = button['onclick']
        summary_link = onclick_content.split("'")[1]
        summary_link = "https://oeil.secure.europarl.europa.eu" + summary_link
        summary_links.append(summary_link)

print(summary_links)


['https://oeil.secure.europarl.europa.eu/oeil/popups/summary.do?id=1697722&t=e&l=en']


In [58]:
# Extract summary text from the summary links
summaries = []
items = []
for link in summary_links:
    response = requests.get(link)
    soup = BeautifulSoup(response.text, "html.parser")
    # Find the element using a regex to flexibly match the string
    summary = soup.find_all('span', lang="EN-GB")
    for item in summary:
        # strip the text of any leading or trailing whitespace
        item = item.text.strip()
        item_text = re.sub(r'\s+', ' ', item)
        # join all the text together
        items.append(item_text)
summaries = " ".join(items)

print(summaries)

PURPOSE: to establish measures to ensure a high common level of cybersecurity in the Union institutions, bodies and agencies. PROPOSED ACT: Regulation of the European Parliament and of the Council. ROLE OF THE EUROPEAN PARLIAMENT: the European Parliament decides in accordance with the ordinary legislative procedure and on an equal footing with the Council. BACKGROUND: evolving technology and increased complexity and interconnectedness of digital systems amplify cybersecurity risks making the Union administration more vulnerable to cyber threats and incidents. From 2019 to 2021, the number of significant incidents affecting Union institutions, bodies and agencies, authored by advanced persistent threat actors, has surged dramatically. The first half of 2021 saw the equivalent in significant incidents as in the whole of 2020. The Centre for Cybersecurity of the EU Institutions, Bodies and Agencies (CERT-EU) has assessed the main cyber threats to which the EU institutions, bodies and agen

PURPOSE: to establish measures to ensure a high common level of cybersecurity in the Union institutions, bodies and agencies. PROPOSED ACT: Regulation of the European Parliament and of the Council. ROLE OF THE EUROPEAN PARLIAMENT: the European Parliament decides in accordance with the ordinary legislative procedure and on an equal footing with the Council. BACKGROUND: evolving technology and increased complexity and interconnectedness of digital systems amplify cybersecurity risks making the Union administration more vulnerable to cyber threats and incidents. From 2019 to 2021, the number of significant incidents affecting Union institutions, bodies and agencies, authored by advanced persistent threat actors, has surged dramatically. The first half of 2021 saw the equivalent in significant incidents as in the whole of 2020. The Centre for Cybersecurity of the EU Institutions, Bodies and Agencies (CERT-EU) has assessed the main cyber threats to which the EU institutions, bodies and agencies are currently exposed or are likely to be exposed in the foreseeable future. The analysis examined the influence of major ongoing shifts affecting the ways in which the EU institutions manage and use their IT infrastructures and services. These shifts include the increase in teleworking, the migration of systems to the cloud and the increased outsourcing of IT services. The analysis of the 20 Union institutions, bodies and agencies shows that their governance, cyber-hygiene, overall capability and maturity vary over a broad spectrum. Therefore, requiring all Union institutions, bodies and agencies to implement a baseline of cybersecurity measures is instrumental to address this disparity in maturity and to bring all Union institutions, bodies and agencies to a high common level of cybersecurity. This proposal builds on the EU Strategy for the Security Union and the EU’s Cybersecurity Strategy for the Digital Decade. CONTENT: this proposal establishes a framework to ensure common rules and measures on cybersecurity within the Union institutions, bodies, offices and agencies to enable them to perform their respective tasks in an open, efficient and independent manner. It aims to improve all entities’ resilience and incident response capacities. The proposed Regulation: - obliges the Union institutions, bodies, offices and agencies to (i) establish an internal framework for the management, governance and control of cybersecurity risks, ensuring effective and prudent management of all such risks, (ii) adopt a cybersecurity baseline to address the risks identified through this framework, (iii) carry out a cybersecurity maturity assessment covering all elements of its IT environment at least every three years, and (iv) adopt a cyber security plan; - establishes an inter-institutional cybersecurity board to monitor the implementation of this Regulation by the Union institutions, bodies, offices and agencies, as well to supervise the implementation of general priorities and objectives by CERT-EU and providing strategic direction to CERT-EU; - defines the task and missions of CERT-EU as an autonomous inter-institutional cybersecurity centre at the service of all EU institutions, bodies, offices and agencies. CERT-EU will contribute to the security of the unclassified IT environment of all Union institutions, bodies and agencies by advising them on cybersecurity, by helping them to prevent, detect, mitigate and respond to incidents and by acting as their cybersecurity information exchange and incident response coordination hub; - ensures cooperation and the exchange of information among CERT-EU, and the Union institutions, bodies and agencies to develop trust and confidence. To this end CERT-EU may request Union institutions, bodies and agencies to provide it with relevant information and CERT-EU may exchange incident-specific information with Union institutions, bodies and agencies to facilitate detection of similar cyber threats or incidents without the consent of the affected constituent. CERT-EU may only exchange incident-specific information which reveals the identity of the target of the cybersecurity incident with the consent of the affected constituent; - obliges all EU institutions, bodies, offices and agencies to notify CERT-EU of significant cyber threats, significant vulnerabilities and significant incidents without undue delay and in any event no later than 24 hours after becoming aware of them. Budgetary implications According to studies, direct cybersecurity spending has tended to vary between 4 and 7% of the aggregated IT expenditures of organisations. However, the threat analysis undertaken by CERT-EU in support of this legislative proposal indicates that international bodies and political organisations face increased risks and therefore a level of 10% of IT spending on cybersecurity would seem a more adequate target. The exact cost of such efforts cannot be determined due to the lack of detailed information on IT expenditure of the Union institutions, bodies and agencies and the relevant share of cybersecurity spending. CERT-EU will require additional resources to fulfil its expanded role and these resources should be reallocated from the Union institutions, bodies and agencies benefitting from CERT-EU’s services.

In [49]:
# scrape website
response = requests.get("https://www.trackmyeu.org/votings.php") 

soup = BeautifulSoup(response.content, 'html.parser')

# find all div class="voting-box_date"
entries = soup.find_all('div', class_='col-12')

print(len(entries))

for entry in entries:
    votes = soup.find_all('div', class_='voting-box')

print(len(votes))
voting_dates = []
voting_ref = []
voting_titles = []


for vote in votes:
    print(vote)
    # find all dates
    voting_dates_gen = vote.find_all('div', class_='voting-box-date')
    print(voting_dates_gen)
    for date in voting_dates_gen:
        voting_dates.append(date)
    # find all reference numbers
    voting_ref_gen = vote.find_all('div', class_='voting-box-ref')
    for ref in voting_ref_gen:
        voting_ref.append(ref)
    # find all descriptions
    voting_titles_gen = vote.find_all('div', class_='voting-box-name')
    for title in voting_titles_gen:
        voting_titles.append(title)

# create dataframe from lists
df = pd.DataFrame(list(zip(voting_dates, voting_ref, voting_titles)), columns =['Date', 'Reference', 'Title'])
df.to_csv('votes.csv', index=False)

display(df)

2
1
<div @click="selectVoting(v)" class="voting-box">
<div class="voting-box-date">{{ v.date }}</div>
<div class="voting-box-ref" v-if="v.ref">{{ v.ref.join() }}</div>
<div class="voting-box-name" v-if="v.mainTitle">{{ v.mainTitle }}</div>
<div class="voting-box-name" v-else="">{{ v.desc }}</div>
<div class="voting-box-name2" v-if="v.secondaryTitle">{{ v.secondaryTitle }}</div>
</div>
[<div class="voting-box-date">{{ v.date }}</div>]


Unnamed: 0,Date,Reference,Title
0,[{{ v.date }}],[{{ v.ref.join() }}],[{{ v.mainTitle }}]


In [6]:
# read csv file

with open('baseline_test.csv', newline='') as baseline:
    baseline = csv.reader(baseline, delimiter=',', quotechar='"')
    for row in baseline:
        print(', '.join(row))

Topic;Key words;Original;ChatGPT4
green bonds;bond climate change policy disclosure of information financial legislation economic instrument for the environment economic activity issue of securities Eurobond green economy financial supervision;"European green bond standard
  
SUMMARY OF: 
Regulation (EU) 2023/2631 on European green bonds and optional disclosures for bonds marketed as environmentally sustainable and for sustainability-linked bonds 
WHAT IS THE AIM OF THE REGULATION? 
The regulation:
•	lays down uniform requirements for issuers who wish to use the designation ‘European green bond’ or ‘EuGB’; 
•	establishes a registration and supervisory system for external reviewers of EuGBs; 
•	provides disclosure templates,  notably for pre-issuance disclosures and allocation reports linked to EuGBs. 
KEY POINTS 
Eligibility 
To be able to use the designation European green bond or EuGB,  issuers:
•	must invest the proceeds from these bonds in full,  before the bond reaches maturity,  

In [None]:
# create vector representation of documents with fastext
# https://fasttext.cc/docs/en/supervised-tutorial.html

In [19]:
corpus = ["""European green bond standard
  
SUMMARY OF: 
Regulation (EU) 2023/2631 on European green bonds and optional disclosures for bonds marketed as environmentally sustainable and for sustainability-linked bonds 
WHAT IS THE AIM OF THE REGULATION? 
The regulation:
•	lays down uniform requirements for issuers who wish to use the designation ‘European green bond’ or ‘EuGB’; 
•	establishes a registration and supervisory system for external reviewers of EuGBs; 
•	provides disclosure templates, notably for pre-issuance disclosures and allocation reports linked to EuGBs. 
KEY POINTS 
Eligibility 
To be able to use the designation European green bond or EuGB, issuers:
•	must invest the proceeds from these bonds in full, before the bond reaches maturity, in sustainable economic activities covered by the European Union’s (EU) taxonomy* legislation (Regulation (EU) 2020/852 – see summary). These include fixed assets, capital and operating expenditures, and assets and expenditure of households (this is known as the gradual approach); 
•	can alternatively allocate the proceeds from these bonds to a portfolio of fixed assets or financial assets in accordance with taxonomy requirements (portfolio approach); 
•	may, under flexibility rules, invest up to 15% in economic activities that meet, but are not officially covered by, the taxonomy requirements. 
Transparency 
Issuers of green bonds must:
•	complete the European green bond factsheet (Annex I) and ensure that an external reviewer has approved it before issuing a bond (pre-issuance review); 
•	provide, until the proceeds are fully invested, an allocation report (Annex II) every 12 months on where the funds are being directed; 
•	receive post-issuance reviews from an external reviewer; 
•	draw up and make public an environmental impact report (Annex III) on the use of the funds, at least once during the lifetime of the bond; 
•	publish a prospectus in line with Regulation (EU) 2017/1129 (see summary), using the term European green bond or EuGB throughout (exceptions apply to sovereigns); 
•	make the factsheet, prospectus, various reviews and other information freely available on their website for at least a year after the bond matures. 
Securitisation* bonds 
The following rules apply:
•	bonds issued for synthetic securitisation* cannot be termed European green bond or EuGB; 
•	securitised exposures may: 
o	not be used to finance the exploration, mining, extraction, production, processing, storage, refining, distribution or transport of fossil fuels,
o	be used to finance electricity from fossil fuels or the co-generation or production of heating/cooling and power from fossil fuels, provided this meets the ‘no significant harm’ test;
•	originators of securitisation bonds designated European green bond or EuGB: 
o	must state the nature of the bond in their prospectus,
o	confirm that they are responsible for how the proceeds are used,
o	provide additional information on the economic activities being supported.
Optional disclosure templates 
The European Commission will publish templates by 21 December 2024 for voluntary pre-issuance and post-issuance disclosures for bonds marketed as environmentally sustainable* or sustainability-linked*.
Oversight 
External reviewers must:
•	be registered with the European Securities and Markets Authority (ESMA); 
•	satisfy practical and professional requirements; 
•	employ appropriate systems, resources and procedures to carry out their work; 
•	ensure that their analysts, employees and other staff have the necessary knowledge, experience and training; 
•	maintain a permanent, independent and effective system of compliance; 
•	implement internal due diligence policies and procedures to prevent conflicts of interest; 
•	ensure that their reviews are based on a thorough analysis of all available and relevant information; 
•	correct any methodological errors and immediately disclose them to ESMA and the issuers of the European green bonds involved; 
•	ensure that third-party service providers, to whom they may outsource some, but not all, activities, can carry out reliable and professional assessments for which external reviewers remain responsible; 
•	keep adequate records; 
•	identify, eliminate, manage and disclose any actual or potential conflicts of interest. 
Reviews 
•	External reviewers must: 
o	not suggest that ESMA or any competent authority endorses their review;
o	make their pre- and post-issuance and impact report reviews freely available on their website throughout the bond’s lifetime.
•	Non-EU external reviewers may provide their services under the regulation, provided that the Commission has issued an equivalence decision and that they are registered with ESMA, which may, with well-founded reasons, withdraw that approval. 
Supervision 
National competent authorities: 
•	supervise issuers of European green bonds and their use of the common templates; 
•	have extensive supervisory and investigatory powers; 
•	cooperate with each other on investigations, supervision, enforcement and the exchange of information; 
•	regularly communicate relevant information to ESMA. 
ESMA: 
•	may request any information it needs from external reviewers; 
•	has the power to carry out on-site inspections, examine records, data, procedures and other material and to interview individuals during investigations; 
•	may temporarily or permanently remove an external reviewer’s rights and impose fines ranging from €20,000 to €200,000, along with occasional penalties; 
•	charges external reviewers fees for their registration, recognition and supervision and any other costs it may incur; 
•	maintains a publicly accessible register of external reviewers on its website; 
•	will draft various regulatory technical standards needed for the implementation of the regulation. 
The Commission: 
•	may adopt delegated acts; 
•	will, by 21 December 2026, publish a report on whether there is a need to regulate sustainability-linked bonds; 
•	will, by 21 December 2028, and every 3 years thereafter, submit a report to the European Parliament and the Council of the European Union on the regulation’s implementation. 
FROM WHEN DOES THE REGULATION APPLY? 
It applies from 21 December 2024.
BACKGROUND 
Green bonds are one of the main ways of financing investment in green technology, energy and resource efficiency and sustainable transport and research infrastructure. They help implement the EU’s transition to a climate-neutral, resource-efficient economy.
The regulation fosters consistency and comparability in the green bond market and reduces the risk of greenwashing, for the benefit of both issuers and investors. 
KEY TERMS 
Taxonomy. An investment classification system containing a list of environmentally sustainable economic activities.
Securitisation. The practice of pooling together various types of debt and selling them as bonds to investors.
Synthetic securitisation. The transfer of risk by using credit derivatives or guarantees with exposure remaining with the originator.
Environmentally sustainable. A bond with a commitment that the proceeds go to environmental activities.
Sustainability-linked. A bond with defined environmental sustainability objectives.""", 
"""Summary of the Legislation:
Title: "Act to Promote Green Bonds in the European Union"
Background: Green bonds are financial instruments aimed at raising capital for projects with environmental benefits, such as renewable energy and pollution control. Amid increasing urgency to address climate change, this legislation seeks to integrate these bonds into the broader financial system of the EU, promoting a transition towards a sustainable green economy.
Aims: The legislation is designed to facilitate the funding of environmentally sustainable projects through the issuance of green bonds, aiming to attract both private and public sector investment.
Scope and Addressees: The law targets financial institutions, corporate issuers, and public sector entities across the EU that are involved in the issuance, investment, and regulation of securities, specifically green bonds.
Key Provisions:
1.	Definition and Eligibility Criteria:
o	Set strict guidelines defining eligible green bond projects, focusing on environmental sustainability and climate impact.
o	Involve the European Investment Bank (EIB) in validating project eligibility.
2.	Disclosure and Reporting Requirements:
o	Mandate issuers to provide detailed disclosures on the use of proceeds and expected environmental benefits.
o	Require annual impact reports to be reviewed by the European Securities and Markets Authority (ESMA).
3.	Regulatory Oversight:
o	Designate ESMA to oversee the compliance of green bond issuances, ensuring adherence to EU standards.
o	Establish penalties for non-compliance to be enforced by national financial regulators, within the framework set by ESMA.
4.	Market Incentives:
o	Propose tax incentives for investors in green bonds, subject to approval and implementation by individual member states.
o	Encourage member states to align national tax benefits with these EU guidelines to ensure consistency.
5.	International Standards Integration:
o	Align the EU green bond criteria with international standards to enhance market compatibility and investor confidence.
o	Facilitate cross-border investment in green bonds within the EU and globally.
Entry into Force: The legislation is scheduled to come into effect on January 1, 2026, providing time for issuers and regulators to adapt to the new regulations.
European and National Jurisdiction Considerations:
•	The EU sets overarching regulatory frameworks and standards, but implementation of specific tax incentives remains under the jurisdiction of individual member states.
•	The EU cannot mandate specific environmental projects or local economic activities; it only provides a framework and standards for what qualifies as a green bond eligible project.
"""]

corpus2 = ["""Summary of the Legislation:
Title: "European Green Bond Framework Act"
Background: Green bonds are a growing financial tool used to fund projects that have positive environmental impacts, such as renewable energy, clean transport, and pollution control. The push for such legislation reflects a growing recognition of the urgent need to finance projects that support the EU's climate change objectives and transition to a green economy.
Aims: The legislation is designed to support sustainable economic activities through the development and promotion of green bonds, ensuring that investments align with broader environmental goals. It aims to mobilize private and public capital for projects that mitigate the effects of climate change and promote ecological sustainability.
Scope and Addressees: This law targets financial institutions, corporate and public sector bond issuers, and investors interested in funding environmentally beneficial projects. It outlines responsibilities and guidelines for these entities to foster a robust market for green bonds within the EU.
Key Provisions:
1.	Criteria for Green Bonds: Define what qualifies as a green bond, including the environmental standards projects must meet to be eligible for funding through such bonds.
2.	Disclosure Requirements: Mandate that issuers provide detailed, transparent information regarding the use of proceeds, expected environmental benefits, and measures in place to ensure accountability.
3.	Regulatory Oversight: Establish guidelines for regulatory bodies to supervise and verify the adherence to green bond principles, safeguarding against greenwashing.
4.	Support Mechanisms: Introduce incentives, possibly including tax benefits or subsidies, for projects financed by green bonds to enhance their attractiveness to investors.
5.	Integration with International Standards: Ensure that the EU green bond standards are harmonious with global practices to attract international investors and facilitate cross-border investments.
Entry into Force: The legislation would typically be scheduled to take effect one to two years after passage to allow adequate time for adaptation by market participants. If approved in 2024, it could be expected to become operative by 2025 or 2026.
The support from left-wing groups, social democrats, some non-attached members, and the Greens indicates a strong environmental and social agenda driving the legislation. Opposition from more conservative and right-wing factions may stem from concerns about the financial impact on businesses and the broader economy. Nonetheless, the passage of this act by the supporting parties suggests a significant commitment to leveraging financial markets for environmental sustainability.""", 
"""Summary of the Legislation:
Title: "European Green Bond Enhancement Act"
Background: Green bonds are specialized financial instruments designed to raise funds for projects that benefit the environment, such as clean energy, sustainable transportation, and waste management. The legislation is influenced by a desire to integrate financial markets with the EU's environmental goals, particularly in light of increasing pressures to address climate change effectively.
Aims: The primary aim of this legislation is to foster the growth of the green bond market, encouraging the financing of environmentally friendly projects while maintaining a favorable economic climate for investors and issuers. This reflects a balance between environmental concerns and economic growth, appealing to the more conservative and right-leaning parties' focus on market-friendly policies.
Scope and Addressees: The law is directed at financial institutions, corporations issuing bonds, and government entities, as well as investors interested in green finance. It provides a framework for the issuance and management of green bonds, ensuring that these bonds are attractive investment opportunities that also contribute to environmental sustainability.
Key Provisions:
1.	Clear Definition of Green Bonds: Set forth detailed criteria for what projects can be financed by green bonds, focusing on their capacity to generate economic returns alongside environmental benefits.
2.	Transparency and Disclosure: Require issuers to provide comprehensive information on how the funds will be used, the expected environmental impacts, and regular performance updates.
3.	Regulatory Framework: Outline the responsibilities of financial regulators to monitor the market, ensuring that green bonds fulfill their intended purpose and that issuers meet strict compliance guidelines.
4.	Market Incentives: Introduce incentives such as lower tax rates or reduced regulatory requirements for eligible projects, aimed at boosting investment in green bonds.
5.	Alignment with International Standards: Ensure that EU regulations align with international green bond principles to attract global investors and facilitate the integration of the European market with international financial markets.
Entry into Force: The law would typically be enacted within one year of its passage, allowing for a period of adjustment for market participants. If passed in 2024, it might be expected to come into effect by 2025.
The legislation, supported by parties typically aligned with more conservative economic policies, likely emphasizes market efficiency and economic incentives over more stringent environmental criteria, which might be why left-leaning parties, social democrats, and the Greens opposed it. This alignment underscores a practical approach to environmental finance, balancing the need for sustainability with economic viability and appeal to conservative investors and businesses.
"""
]

In [22]:
def preprocess_document(doc):
    """Preprocess a single document by converting to lowercase, removing punctuation,
    stopwords, and stemming."""
    # Convert to lowercase
    doc = doc.lower()
    # Remove punctuation
    doc = ''.join([char for char in doc if char not in string.punctuation])
    # Tokenize
    tokens = nltk.word_tokenize(doc)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    # Stemming
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    # Return as a string
    return ' '.join(stemmed_tokens)

def compute_cosine_similarity(corpus):
    """Compute cosine similarity between documents in the corpus."""
    # Preprocess documents
    preprocessed_corpus = [preprocess_document(doc) for doc in corpus]
    # Vectorize documents
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(preprocessed_corpus)
    # Compute cosine similarity
    cosine_sim_matrix = cosine_similarity(tfidf_matrix)
    return cosine_sim_matrix

# Compute cosine similarity
cosine_sim_matrix = compute_cosine_similarity(corpus)
cosine_sim_matrix2 = compute_cosine_similarity(corpus2)
# Print cosine similarity matrix
print(cosine_sim_matrix)
print(cosine_sim_matrix2)

[[1.         0.45609539]
 [0.45609539 1.        ]]
[[1.         0.70411472]
 [0.70411472 1.        ]]


In [23]:
from nltk.corpus import stopwords
from gensim.models import KeyedVectors


def preprocess_document(doc):
    """Preprocess a single document by converting to lowercase, removing punctuation,
    and stopwords."""
    # Convert to lowercase
    doc = doc.lower()
    # Remove punctuation
    doc = ''.join([char for char in doc if char not in string.punctuation])
    # Tokenize
    tokens = nltk.word_tokenize(doc)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    return filtered_tokens

def document_vector(doc, model):
    """Create a document vector by averaging the word vectors of the words contained in the document."""
    # Preprocess the document
    words = preprocess_document(doc)
    # Filter words that are in the model's vocabulary
    words = [word for word in words if word in model.key_to_index]
    if not words:
        return np.zeros(model.vector_size)
    # Average the word vectors
    word_vectors = np.array([model[word] for word in words])
    return word_vectors.mean(axis=0)

def compute_cosine_similarity(corpus, model):
    """Compute cosine similarity between documents in the corpus using a FastText model."""
    # Compute document vectors
    doc_vectors = np.array([document_vector(doc, model) for doc in corpus])
    # Compute cosine similarity
    cosine_sim_matrix = cosine_similarity(doc_vectors)
    return cosine_sim_matrix

# Load a pre-trained FastText model
# Note: You need to download a FastText pre-trained model and load it here
# For example, using Gensim to load 'wiki-news-300d-1M-subword.vec'
model_path = 'path_to_model/wiki-news-300d-1M-subword.vec'
fasttext_model = KeyedVectors.load_word2vec_format(model_path, binary=False)

# Example corpus
corpus = [
    "The quick brown fox jumps over the lazy dog.",
    "Never jump over the lazy dog quickly.",
    "Dogs are not lazy when they are jumping."
]

# Compute cosine similarity
cosine_sim_matrix = compute_cosine_similarity(corpus, fasttext_model)

# Print cosine similarity matrix
print(cosine_sim_matrix)


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_model/wiki-news-300d-1M-subword.vec'