In [1]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import spacy
from spacy import displacy

In [2]:
url = "https://www.nbr.org/publication/u-s-australia-energy-cooperation-in-the-indo-pacific/"
response = requests.get(url)

In [3]:
soup=BeautifulSoup(response.content, "html.parser")

In [4]:
print(soup.prettify())

<!DOCTYPE html>
<html class="no-js" lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   U.S.-Australia Energy Cooperation in the Indo-Pacific | The National Bureau of Asian Research (NBR)
  </title>
  <link href="https://www.nbr.org/xmlrpc.php" rel="pingback"/>
  <link href="https://www.nbr.org/wp-content/themes/nbr-theme/build/css/main.css" rel="stylesheet" type="text/css"/>
  <link href="https://www.nbr.org/wp-content/themes/nbr-theme/style.css" media="screen" rel="stylesheet" type="text/css"/>
  <script crossorigin="anonymous" defer="" integrity="sha384-3yBLeJ4waqGSAf4A8pjZ13UF7GuhgbdKnBQvIp/TkWoXtQbtwjlIPNjkDRJ46UCn" src="https://pro.fontawesome.com/releases/v5.5.0/js/all.js">
  </script>
  <meta content="max-image-preview:large" name="robots"/>
  <link href="//code.jquery.com" rel="dns-prefetch"/>
  <s

In [5]:
main_content = soup.find('div',{'class':'publication-content'})
for element in main_content(['a', 'em']):
    element.decompose()

In [6]:
text_list = []
stop_text = "She also serves as the official U.S. delegate to the Energy Research Institute Network, an East Asia Summit–linked network whose inputs are designed to inform the formal East Asia Summit process."
for paragraph in main_content.find_all('p'):
    paragraph_text = paragraph.get_text(separator=' ', strip=False)

    if stop_text in paragraph_text:
        text_list.append(paragraph_text)
        break
    text_list.append(paragraph_text)

In [7]:
df = pd.DataFrame(text_list, columns=["Paragraph Text"])
df.to_csv('09_paragraphs.csv', index=False)

In [8]:
df

Unnamed: 0,Paragraph Text
0,Can fierce competitors make for the best of al...
1,Both countries have substantial natural resour...
2,"Despite these competing interests, the United ..."
3,An important cornerstone of any collaboration ...
4,In February 2018 the Trump and Morrison admini...
5,The United States currently has only three str...
6,Open-ended flexibility is the hallmark of a su...
7,According to estimates by the U.S. Energy Info...
8,"Recognizing this, the U.S.-Australia Strategic..."
9,"Meanwhile, the second identified area of coope..."


In [9]:
nlp = spacy.load("en_core_web_sm")

In [10]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [11]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [14]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [15]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE', 'FAC']]

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.lemma_ in relationship_keywords:  # Use lemma_ to check relationship keywords
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship, object)

    return entities, relationships

In [16]:
df['Entities'], df['Relationships'] = zip(*df['Paragraph Text'].apply(extract_entities_and_relationships))

In [17]:
df[['Paragraph Text', 'Entities', 'Relationships']]

Unnamed: 0,Paragraph Text,Entities,Relationships
0,Can fierce competitors make for the best of al...,"[(the United States, GPE), (Australia, GPE)]",[]
1,Both countries have substantial natural resour...,"[(U.S., GPE), (the Indo-Pacific, ORG), (The In...",[]
2,"Despite these competing interests, the United ...","[(the United States, GPE), (Australia, GPE), (...",[]
3,An important cornerstone of any collaboration ...,"[(the United States, GPE), (Australia, GPE), (...",[]
4,In February 2018 the Trump and Morrison admini...,"[(Trump, ORG), (Australia, GPE)]",[]
5,The United States currently has only three str...,"[(The United States, GPE), (the Indo-Pacific, ...",[]
6,Open-ended flexibility is the hallmark of a su...,"[(the United States, GPE), (Australia, GPE)]",[]
7,According to estimates by the U.S. Energy Info...,"[(the U.S. Energy Information Administration, ...",[]
8,"Recognizing this, the U.S.-Australia Strategic...",[(the U.S.-Australia Strategic Energy Partners...,[]
9,"Meanwhile, the second identified area of coope...","[(LNG, ORG), (the United States, GPE), (Austra...",[]


In [18]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Paragraph Text'])

    # Display dependency parsing
    doc = nlp(row['Paragraph Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)
    
    # Print extracted entities and relationships
    print("Entities:", row['Entities'])
    print("Relationships:", row['Relationships'])


Paragraph 1:
Can fierce competitors make for the best of allies? If the two powers involved are the United States and Australia, that answer is seemingly yes.




Entities: [('the United States', 'GPE'), ('Australia', 'GPE')]
Relationships: []

Paragraph 2:
Both countries have substantial natural resource endowments (particularly in coal and natural gas) and significant competitive strengths in the export of these resources as well as technologies associated with solar, wind, and more efficient power generation. Government officials and industry leaders in both countries also regularly tout trade in energy and energy technologies as a critical driver of their respective country’s economic potential—and the market that U.S. and Australian exporters are most aggressively pursuing is one and the same: the Indo-Pacific. The Institute Of Energy Economics, Japan, now projects that by 2050 80% of all energy traded globally will be consumed in Asia.  This makes the Indo-Pacific the essential region for exporters in both countries.


Entities: [('U.S.', 'GPE'), ('the Indo-Pacific', 'ORG'), ('The Institute Of Energy Economics', 'ORG'), ('Japan', 'GPE'), ('the Indo-Pacific', 'ORG')]
Relationships: []

Paragraph 3:
Despite these competing interests, the United States and Australia have maintained a close partnership on regional energy cooperation. This brief explores the dynamics of this partnership and assesses the overall outlook for future cooperation in the Indo-Pacific. Specifically, it highlights areas in which the United States and Australia are already working together, as well as those where renewed or expanded cooperation could play an important role in increasing regional economic prosperity.


Entities: [('the United States', 'GPE'), ('Australia', 'GPE'), ('the Indo-Pacific', 'ORG'), ('the United States', 'GPE'), ('Australia', 'GPE')]
Relationships: []

Paragraph 4:
An important cornerstone of any collaboration is shared values. In broad terms, both the United States and Australia follow a market-based approach to energy policymaking that is backed by liberal democratic values. Both countries generally understand that the government’s role in the energy sector is to deploy a range of tools (e.g., regulations, standards, and investments) to nudge sectorial decisions into alignment with high-level goals such as promoting economic growth, combating air pollution, or guaranteeing labor safeguards.  Historically, this has meant resisting the urge to deploy resource advantages as a geopolitical weapon, lest it undermine confidence in international trade. When this approach is coupled with broader commitments to fostering open markets, the result to date has been an environment whe

Entities: [('the United States', 'GPE'), ('Australia', 'GPE'), ('U.S.', 'GPE'), ('Australia', 'GPE'), ('the United States', 'GPE')]
Relationships: []

Paragraph 5:
In February 2018 the Trump and Morrison administrations launched the Australia-U.S. Strategic Partnership on Energy in the Indo-Pacific. The framework is designed to both build on ongoing areas of collaboration between the two countries (including R&D, investment, and market development) and to articulate meaningful ways in which further cooperation could advance the national security interests of both countries.  It also nominally prioritizes cooperation in three geographic subregions—Southeast Asia, South Asia, and Southwest Asia—although specific efforts since the announcement have also focused on the Pacific Islands.


Entities: [('Trump', 'ORG'), ('Australia', 'GPE')]
Relationships: []

Paragraph 6:
The United States currently has only three strategic partnerships on energy within the Indo-Pacific (with the other two involving Japan and India). The partnerships with India and Japan include a relatively greater emphasis on the value of collaboration as a means for addressing energy insecurity as well as tackling needs within a bilateral context.  By contrast, the U.S.-Australia partnership is more outward-looking in terms of the envisioned benefits, focusing on joint and multi-party collaboration to address regional (rather than bilateral) needs. Moreover, the goal of cooperation is not necessarily limited to energy security


Entities: [('The United States', 'GPE'), ('the Indo-Pacific', 'ORG'), ('Japan', 'GPE'), ('India', 'GPE'), ('India', 'GPE'), ('Japan', 'GPE'), ('U.S.-Australia', 'GPE')]
Relationships: []

Paragraph 7:
Open-ended flexibility is the hallmark of a successfully managed alliance. However, for such cooperation to be effective it must also be linked to mechanisms that can advance concrete outcomes. The following discussion identifies arenas where the United States and Australia are working to achieve specific, actionable goals.


Entities: [('the United States', 'GPE'), ('Australia', 'GPE')]
Relationships: []

Paragraph 8:
According to estimates by the U.S. Energy Information Administration, developing Asia is anticipated to account for most of the global increase in energy use between 2018 and 2050.  Even though Asia is a key driver of global expansion in the deployment of renewable energy, its level of unmet energy needs is still such that the IEA projects that the demand for virtually all energy types will continue to grow. Thus, meeting the region’s energy requirements raises important economic and environmental questions. For a number of countries—particularly in Southeast Asia—it will also mean grappling with the added implications of becoming a net importer for the first time in history. Such dynamics can create powerful anxieties that, if not well-managed, could further contribute to rising resource nationalism and geopolitical tension.


Entities: [('the U.S. Energy Information Administration', 'ORG'), ('IEA', 'ORG')]
Relationships: []

Paragraph 9:
Recognizing this, the U.S.-Australia Strategic Energy Partnership formally calls attention to two priority areas where the United States and Australia can partner in informing how the Indo-Pacific might meet its demands for energy: first, through supporting the deployment of low-emission technologies; and second, through promoting natural gas market development. Although the framework is not explicit on what might count as low-emission technologies, similar language in other strategic energy partnerships has touched on wind, solar, nuclear, and advanced coal technologies (which in turn, would mirror previous U.S.-Australia collaboration during the Obama and Turnbull administrations). To date, both countries have received criticism for not exploring cooperation in this arena more aggressively. Yet, regular and robust efforts do exist via state-supported labs and development 

Entities: [('the U.S.-Australia Strategic Energy Partnership', 'ORG'), ('the United States', 'GPE'), ('Australia', 'GPE'), ('the Indo-Pacific', 'ORG'), ('Turnbull', 'GPE')]
Relationships: []

Paragraph 10:
Meanwhile, the second identified area of cooperation—natural gas market development—has nominally captured greater policymaker attention, and not without reason. As two of the world’s largest exporters of liquefied natural gas (LNG), both the United States and Australia are eager to expand their exports within the region, especially given the number of countries across developing Asia that have set ambitious targets for increasing their natural gas consumption. This is an area where policy can play a critical enabling role. An ongoing uncertainty is whether gas markets in these countries will be set up in a way that allows them to absorb and utilize supplies; for example, this might involve a need to mobilize financing to develop new import terminals or to reform monopolistic state-o

Entities: [('LNG', 'ORG'), ('the United States', 'GPE'), ('Australia', 'GPE'), ('U.S.-Australia', 'GPE'), ('Australia', 'GPE')]
Relationships: []

Paragraph 11:
In tandem with the expectation that Asia’s demand for energy will continue to rise is the expectation that the availability and overall capacity of energy-related infrastructure will need to increase dramatically. The Asian Development Bank (ADB) has estimated that developing Asia will require $26 trillion in infrastructure investment between 2016 and 2030 (or roughly $1.7 trillion a year), with $23 billion specifically linked to power and transportation sector infrastructure.  Of that, ADB estimates that existing commitments will meet just over half of this target.


Entities: [('The Asian Development Bank', 'ORG'), ('ADB', 'ORG')]
Relationships: []

Paragraph 12:
With this in mind, in mid-2018 Australia and the United States announced that, along with Japan, they would enter into a Trilateral Partnership for Infrastructure Investment in the Indo-Pacific. In addition to investing significant financial and technical resources into directly helping countries build infrastructure, the partnership emphasizes capacity building, such as developing best practices for construction and procurement.


Entities: [('Australia', 'GPE'), ('the United States', 'GPE'), ('Japan', 'GPE'), ('Trilateral Partnership for Infrastructure Investment', 'ORG')]
Relationships: []

Paragraph 13:
In terms of specific execution of these goals, in June 2019 the three partners announced that they would commit more than $1 billion to an LNG infrastructure project in Papua New Guinea. Furthermore, in November 2019 they announced their intention to once again expand the scope of their partnership, creating a “blue dot” network. Though many specifics are still forthcoming, the network is expected to produce a certification process or other avenue to enable decision-makers to assess whether particular projects have been developed in line with the G-20 standards on infrastructure development. More broadly, Australia has committed to expanding the current funding of its Export Finance and Insurance Corporation alongside similar U.S. commitments to increasing the resources of its own Overseas Private Investment C

Entities: [('LNG', 'ORG'), ('Papua New Guinea', 'GPE'), ('Australia', 'GPE'), ('Export Finance', 'ORG'), ('Insurance Corporation', 'ORG'), ('U.S.', 'GPE'), ('Overseas Private Investment Cooperation', 'ORG'), ('the U.S. International Development Finance Corporation', 'ORG')]
Relationships: []

Paragraph 14:
However, closing the gap between infrastructure needs and financing will likely require private-sector investment to triple. As NBR’s Pacific Energy Summit has regularly noted, the challenge here is not identifying who might have the resources to fill this gap. Rather, it is how to increase the attractiveness of current or proposed projects so that groups feel confident investing. For example, several countries in developing Asia have histories of revolving debt in their power sectors, which can undercut interest from the private sector in financing new infrastructure.


Entities: [('NBR', 'ORG'), ('Pacific Energy Summit', 'ORG')]
Relationships: []

Paragraph 15:
A key role that the United States and Australia can play in addressing this challenge is by helping countries identify and champion reforms that could change the risk profile of urgently needed projects. This includes specific tasks, such as reducing fuel subsidies, as well as large undertakings, such as sunshine campaigns designed to improve market transparency and ultimately reduce corruption. As the United States looks at how to expand support provided by the U.S. International Development Finance Corporation in line with new authorities under the Better Utilization of Investments Leading to Development (BUILD) Act, this is an area where U.S. and Australian counterparts might strengthen their work together.


Entities: [('the United States', 'GPE'), ('Australia', 'GPE'), ('the United States', 'GPE'), ('the U.S. International Development Finance Corporation', 'ORG'), ('the Better Utilization of Investments Leading to Development (BUILD) Act', 'ORG'), ('U.S.', 'GPE')]
Relationships: []

Paragraph 16:
Both the United States and Australia already play an important role in reducing energy insecurities in Northeast Asia, and they continue to expand their ambitions in Southeast Asia, South Asia, and the Pacific. Yet at the same time, neither country could meet all of Asia’s projected energy demands on its own (or even in tandem). As noted by the head of the IEA, just meeting Asia’s rising demand for natural gas would require the United States to build one new LNG export terminal every five months.


Entities: [('the United States', 'GPE'), ('Australia', 'GPE'), ('IEA', 'ORG'), ('the United States', 'GPE'), ('LNG', 'ORG')]
Relationships: []

Paragraph 17:
Consequently, even as both countries look at how they might directly meet Asia’s energy requirements, this cannot replace the broader requirement for healthy, well-functioning regional and global markets. And in turn, fostering such markets will require dedicated, long-term support for sustained reform efforts. These include ongoing initiatives to liberalize markets, discourage the politicization of energy, and strengthen regional energy architecture to reduce any one country’s exposure to risks stemming from price or supply shocks. And there is only so much of this that can be done in the bilateral, or even trilateral, realm.


Entities: []
Relationships: []

Paragraph 18:
Good alliances are adaptive, flexible, and contain breathing room that allows them to think creatively about how to best prepare for contingencies. The United States and Australia already have such an alliance on energy cooperation. But taking this to new heights as part of a strategy truly focused on the Indo-Pacific will require greater commitment.


Entities: [('The United States', 'GPE'), ('Australia', 'GPE'), ('the Indo-Pacific', 'ORG')]
Relationships: []

Paragraph 19:
Clara Gillispie is a CFR International Affairs Fellow at Carnegie India and Senior Advisor to the National Bureau of Asian Research. She also serves as the official U.S. delegate to the Energy Research Institute Network, an East Asia Summit–linked network whose inputs are designed to inform the formal East Asia Summit process.


Entities: [('CFR International Affairs Fellow', 'ORG'), ('Carnegie India', 'ORG'), ('the National Bureau of Asian Research', 'ORG'), ('U.S.', 'GPE'), ('the Energy Research Institute Network', 'ORG')]
Relationships: []


In [19]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [20]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Paragraph Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if 'Inititative' in ent.text.lower(): 
                ent = ent._replace(label_='POL')
                entities.append(ent)
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [21]:
# DataFrame
relationships_df = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [22]:
relationships_df

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,the United States,interests (verb: compete),Australia,,
1,the United States,cooperation (verb: highlight),Australia,,
2,U.S.,engage (verb: couple),Australia,,
3,India,collaboration (verb: include),Japan,,
4,the U.S.-Australia Strategic Energy Partnership,partner (verb: recognize),the United States,,
5,LNG,exports (verb: liquefy),the United States,,
6,The Asian Development Bank,developing (verb: estimate),$26 trillion,between 2016 and 2030,$26 trillion
7,more than $1 billion,partners (verb: announce),LNG,June 2019,more than $1 billion
8,the United States,support (verb: look),the U.S. International Development Finance Cor...,,
9,IEA,export (verb: note),the United States,every five months,


In [23]:
# Export to csv
relationships_df.to_csv('09_relationships.csv', index=False)