In [30]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
import spacy
from spacy import displacy

In [31]:
url = "https://www.nbr.org/publication/enhancing-clean-energy-cooperation-in-the-indo-pacific/"
response = requests.get(url)

In [32]:
soup=BeautifulSoup(response.content, "html.parser")

In [33]:
print(soup.prettify())

<!DOCTYPE html>
<html class="no-js" lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   Enhancing Clean Energy Cooperation in the Indo-Pacific | The National Bureau of Asian Research (NBR)
  </title>
  <link href="https://www.nbr.org/xmlrpc.php" rel="pingback"/>
  <link href="https://www.nbr.org/wp-content/themes/nbr-theme/build/css/main.css" rel="stylesheet" type="text/css"/>
  <link href="https://www.nbr.org/wp-content/themes/nbr-theme/style.css" media="screen" rel="stylesheet" type="text/css"/>
  <script crossorigin="anonymous" defer="" integrity="sha384-3yBLeJ4waqGSAf4A8pjZ13UF7GuhgbdKnBQvIp/TkWoXtQbtwjlIPNjkDRJ46UCn" src="https://pro.fontawesome.com/releases/v5.5.0/js/all.js">
  </script>
  <meta content="max-image-preview:large" name="robots"/>
  <link href="//code.jquery.com" rel="dns-prefetch"/>
  <

In [34]:
main_content = soup.find('main')
for element in main_content(['a', 'em']):
    element.decompose()

In [35]:
text_list = []
stop_text = "James Bowen is a Policy Fellow at the Perth USAsia Centre."
for paragraph in main_content.find_all('p'):
    paragraph_text = paragraph.get_text(separator=' ', strip=False)

    if stop_text in paragraph_text:
        text_list.append(paragraph_text)
        break
    text_list.append(paragraph_text)

In [36]:
df = pd.DataFrame(text_list, columns=["Paragraph Text"])
df.to_csv('02_paragraphs.csv', index=False)

In [37]:
df

Unnamed: 0,Paragraph Text
0,James Bowen argues that clean energy cooperati...
1,Ensuring a rapid global transition to clean en...
2,The Intergovernmental Panel on Climate Change’...
3,Regional coordination of clean energy commitme...
4,Existing Indo-Pacific clean energy relationshi...
5,The current clean energy engagement of the Uni...
6,Key U.S. regional ally Australia is prominentl...
7,"Japan, meanwhile, has emerged as the dominant ..."
8,"Like Japan, South Korea is a major regional hy..."
9,The potential game-changer for Indo-Pacific cl...


In [38]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [39]:
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /Users/loogyee/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/loogyee/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [41]:
nlp = spacy.load("en_core_web_sm")

In [42]:
df = pd.read_csv('02_paragraphs.csv')

In [43]:
# Preprocessing 
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Lowercase the entire text
    text = text.lower()
    
    # Tokenize and lemmatize
    lemmatized_words = []
    for word in text.split():  # Split the text into words
        lemmatized_words.append(lemmatizer.lemmatize(word))  # Lemmatize each word
    
    # Join lemmatized words back into a single string
    cleaned_text = ' '.join(lemmatized_words)
    
    return cleaned_text

In [44]:
df['Processed Text'] = df['Paragraph Text'].apply(preprocess_text)

In [45]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [46]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [47]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [48]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE']]  # Extract only ORG and GPE entities

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.text in relationship_keywords:  # Check if the token matches any relationship keyword
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find the subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find the direct object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship keyword, object)

    return entities, relationships

In [49]:
df['Entities'], df['Relationships'] = zip(*df['Processed Text'].apply(extract_entities_and_relationships))

In [50]:
df[['Processed Text', 'Entities', 'Relationships']]

Unnamed: 0,Processed Text,Entities,Relationships
0,james bowen argues that clean energy cooperati...,"[(the united state, ORG)]",[]
1,ensuring a rapid global transition to clean en...,"[(un, ORG), (the united state, ORG)]",[]
2,the intergovernmental panel on climate change’...,[],[]
3,regional coordination of clean energy commitme...,[(national clean energy transition require mas...,[]
4,existing indo-pacific clean energy relationshi...,"[(china, GPE), (china, GPE), (china, GPE), (be...",[]
5,the current clean energy engagement of the uni...,"[(the united state, ORG)]",[]
6,key u.s. regional ally australia is prominentl...,"[(u.s., GPE), (australia, GPE), (australia, GP...",[]
7,"japan, meanwhile, ha emerged a the dominant su...","[(japan, GPE), (australia, GPE), (tokyo, GPE),...","[(investment, enhance, security)]"
8,"like japan, south korea is a major regional hy...","[(japan, GPE), (south korea, GPE), (seoul, GPE...",[]
9,the potential game-changer for indo-pacific cl...,"[(india, GPE), (china, GPE), (india, GPE), (in...",[]


In [51]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Paragraph Text'])

    # Display dependency parsing
    doc = nlp(row['Processed Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)

    # Print extracted entities and relationships
    print("Entities:", row['Entities'])


Paragraph 1:
James Bowen argues that clean energy cooperation would be a win for both the climate and stability of the long-standing Indo-Pacific order and urges the United States and other advanced regional economies to revive the spirit of common cause that followed past energy crises.


Entities: [('the united state', 'ORG')]

Paragraph 2:
Ensuring a rapid global transition to clean energy systems is the overriding priority of international climate action. Cross-border cooperation in this space is critical, yet it has proved exceedingly difficult at the all-inclusive UN-led level. Smaller avenues of parallel activity could ultimately deliver more meaningful progress. Collaborative efforts that simultaneously allow space for the advancement of national economic and strategic positions are a prominent feature of current Indo-Pacific relations. They merit sustained commitment from the United States and its regional allies and partners, particularly at a time of upheaval in global energy markets.




Entities: [('un', 'ORG'), ('the united state', 'ORG')]

Paragraph 3:
The Intergovernmental Panel on Climate Change’s April 2022 report noted significant “geophysical, environmental-ecological, technological, economic and, especially, institutional and socio-cultural” barriers to an otherwise feasible transition to clean energy and broader economic systems.  All these obstacles are evident to typically high degrees in Indo-Pacific countries, whose dominant emissions pathways guarantee the region’s ongoing centrality in global decarbonization efforts.


Entities: []

Paragraph 4:
Regional coordination of clean energy commitments is essential to breaking down barriers for four key reasons. First, many Indo-Pacific countries will struggle to meet their decarbonization goals without access to international trade in clean electricity and other clean energy carriers, such as hydrogen. Second, the changing economics of clean energy versus fossil fuels will reorder trade for some commodities and goods, with value-adding shifts to new low-cost markets. Third, clean energy systems generate a much higher demand for unevenly distributed “critical minerals”—such as lithium, cobalt, nickel, and rare earth minerals—than do hydrocarbon equivalents. Fourth, national clean energy transitions require mass deployment of novel technologies and infrastructure, which calls for significant external financial and technical assistance for developing economies.


Entities: [('national clean energy transition require mass deployment', 'ORG')]

Paragraph 5:
Existing Indo-Pacific clean energy relationships are dominated by China. Chinese companies have leading positions in key areas, including solar and wind, high-voltage transmission lines, and electric vehicle manufacturing. Chinese interests additionally control the value chains of many critical minerals. China’s Belt and Road Initiative is reportedly also being retooled to provide more clean energy infrastructure finance.  These interventions have helped increase the availability and affordability of Indo-Pacific clean energy. Yet they have simultaneously consolidated China’s often problematic position at the center of regional affairs. There are unavoidable risks of Beijing weaponizing some interdependencies, as autocratic fossil fuel superpowers have done in the past.


Entities: [('china', 'GPE'), ('china', 'GPE'), ('china', 'GPE'), ('beijing', 'GPE')]

Paragraph 6:
The current clean energy engagement of the United States and its Indo-Pacific allies and partners is comparatively weak. In recent years, however, governments have begun to catch up, deploying a combination of strong domestic industrial policy and internationally coordinated geoeconomic instruments. This process could hasten regional decarbonization. It could also reinforce strategic alignments that are critical to managing broader geopolitical challenges.


Entities: [('the united state', 'ORG')]

Paragraph 7:
Key U.S. regional ally Australia is prominently seeking to adapt its position as a purely fossil fuel–based energy superpower to new decarbonization realities. For instance, it has implemented domestic strategies to support the development of hydrogen and critical minerals. Australia has formed “low emission technology partnerships” with India, Singapore, Japan, and South Korea to foster joint financing and development of these prospects. The deal that Canberra struck with Seoul also looks to accelerate development of low emissions and potentially “green” iron ore and steel utilizing renewable inputs.  Australia has additional capacity to emerge as a major provider of direct renewable electricity to Asia. The privately funded Sun Cable consortium is already developing a 5,000-kilometer-long subsea transmission line connecting solar output in the north of the country to Singapore and other potential markets.


Entities: [('u.s.', 'GPE'), ('australia', 'GPE'), ('australia', 'GPE'), ('india', 'GPE'), ('singapore', 'GPE'), ('japan', 'GPE'), ('south korea', 'GPE'), ('seoul', 'ORG'), ('australia', 'GPE'), ('singapore', 'GPE')]

Paragraph 8:
Japan, meanwhile, has emerged as the dominant supply-side force in developing the Indo-Pacific hydrogen market. The government and businesses have worked in close concert and with foreign counterparts in countries such as Australia and Brunei to develop new supply chains and industry standards. Mirroring Tokyo’s past interventions in the liquefied natural gas (LNG) market, its hydrogen efforts should aid Japan’s own energy transition as well as that of other Indo-Pacific countries. The Japan Bank for International Cooperation has, for example, designated hydrogen an “essential resource” to increase self-sufficiency and lower carbon emissions.  This could pave the way for future involvement in developing regions such as Southeast Asia. Japanese investment in ne

Entities: [('japan', 'GPE'), ('australia', 'GPE'), ('tokyo', 'GPE'), ('japan', 'GPE'), ('the japan bank', 'ORG')]

Paragraph 9:
Like Japan, South Korea is a major regional hydrogen player. It seeks to become not only a major consumer but a major supplier of fuel cells and vehicles that might see growing market share alongside battery electric vehicles in coming years. Both Northeast Asian nations have ample opportunities to broaden their clean energy interactions with less advanced Indo-Pacific nations. This is evident in a recent $9 billion agreement between an LG-led Korean consortium and Indonesian partners on development of a “mine-to-manufacturing” electric vehicle project. Seoul and Jakarta have a memorandum of understanding in place to help foster more of the same type of activity.


Entities: [('japan', 'GPE'), ('south korea', 'GPE'), ('seoul', 'GPE'), ('jakarta', 'GPE')]

Paragraph 10:
The potential game-changer for Indo-Pacific clean energy cooperation is undoubtedly India. No other regional economy has the potential to play anywhere near the role that China has in areas such as large-scale technology manufacturing and provision of cross-border energy supplies. National officials are cognizant of this potential, as revealed by India’s National Hydrogen Mission released in 2021, which aims to transform the country into a global hub for fuel production and export.  India has also taken tentative steps toward embracing the type of economic openness that will be necessary to fulfill its potential in the clean energy space. One of the headline provisions of the trade agreement New Delhi signed with Canberra earlier in 2022—its first with an advanced economy for more than a decade—was increased access to Australian critical minerals.


Entities: [('india', 'GPE'), ('china', 'GPE'), ('india', 'GPE'), ('india', 'GPE'), ('new delhi', 'GPE')]

Paragraph 11:
The United States is already playing an important role in supporting Indo-Pacific clean energy cooperation. In December 2021, for example, the International Development Finance Corporation announced that it would lend $500 million to a U.S. firm building a solar panel factory in India.  Washington has also taken a notable interest in shoring up regional production of critical minerals. In March 2022, U.S. commerce secretary Gina Raimondo committed to rule changes that would allow U.S. export finance agencies to help fund the development of Australian projects in this area.  U.S. regional infrastructure provision is likewise set to take on a greener hue following President Joe Biden’s request in late 2021 that government agencies stop funding foreign carbon-intensive projects.


Entities: [('the united state', 'ORG'), ('u.s.', 'GPE'), ('india', 'GPE'), ('washington', 'GPE'), ('u.s.', 'GPE'), ('commerce', 'ORG'), ('u.s.', 'GPE'), ('u.s.', 'GPE')]

Paragraph 12:
The most meaningful attempt to coordinate regional clean energy efforts has come through the Quad grouping of the United States, Japan, India, and Australia, which has made engagement on these issues a priority of outreach to developing economies in the Indo-Pacific. Australia will take an important step in advancing this process when it partners with the International Energy Agency (IEA) to host the Indo-Pacific Clean Energy Supply Chain Forum in July. This meeting will ideally provide a comprehensive assessment of activity to date and help identify future policy priorities for key parties.


Entities: [('the united states', 'GPE'), ('japan', 'GPE'), ('india', 'GPE'), ('australia', 'GPE'), ('australia', 'GPE')]

Paragraph 13:
Chinese interests are likely to fight any efforts to break Beijing’s control of clean energy positions. There is evidence of this already occurring in the recent merger of three large mining conglomerates to create the China Rare Earth Group,  which aims to reassert and even grow Chinese state control over production and processing activity. The result could be value chains that increasingly focus on servicing domestic demand and concurrently diminish international access.


Entities: [('beijing', 'GPE')]

Paragraph 14:
Attempted clean energy uncoupling from China that goes too hard and too fast could simultaneously jeopardize critical inputs to systems before alternatives are widely available. This could frustrate the regional energy transition and the broader processes of strategic integration that flow from it.


Entities: [('china', 'GPE')]

Paragraph 15:
The bigger contemporary challenge is that Indo-Pacific clean energy progress might be derailed by the geopolitics of the old hydrocarbon economy. Russia’s war in Ukraine has improved the resolve of supporters of the U.S.-led international order and accelerated European desire to transition away from dependence on Russian energy and fossil fuels in aggregate. Yet a combination of severe short-term energy shortages and domestic price pressures have simultaneously revived support for non-Russian fossil fuel interests elsewhere, particularly in the United States. The U.S. Export-Import Bank has, for example, approved new financing arrangements for export LNG terminals.


Entities: [('russia', 'GPE'), ('the united states', 'GPE'), ('u.s.', 'GPE')]

Paragraph 16:
There is a need to ensure that short-term geopolitical exigencies do not derail longer-term imperatives, not least in the Indo-Pacific. Indeed, there is an even more pressing requirement for the United States and its partners to cooperate on meeting regional decarbonization needs than existed prior to the Ukraine invasion. Such cooperation is now also critical to heading off any attempts by Russia to transfer its European fossil fuel interdependencies to the energy-hungry Indo-Pacific economies. This would be a disaster for not only the climate but the future geopolitical orientation of the region.


Entities: [('the united state', 'ORG'), ('russia', 'GPE')]

Paragraph 17:
The United States and other advanced regional economies should revive the spirit of common cause that followed past energy crises. The legacy of the 1973 Arab oil embargo, most notably, included formation of the IEA and its various commitments to collective energy security. Washington and its allies should in turn recognize the new realities of the growing decarbonization imperative and the shift of global energy interactions to the Indo-Pacific. They should commit sustained resources to working alongside one another and with regional partners to connect clean energy needs and abilities.


Entities: [('the united state', 'ORG'), ('washington', 'GPE')]

Paragraph 18:
The Biden administration’s Indo-Pacific Economic Framework would be an ideal instrument for shepherding progress. Regional interlocutors have already expressed strong interest in the new framework’s potential offerings in this space.  Seizing on the imperative for clean energy cooperation would be a win for both the climate and stability of the long-standing Indo-Pacific order.


Entities: []

Paragraph 19:
James Bowen is a Policy Fellow at the Perth USAsia Centre.


Entities: [('the perth usasia centre', 'ORG')]


In [52]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [53]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Paragraph Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if ent.label_ == 'Initiative' in ent.text:
                entities.append(ent._replace(label_='POL'))  # Change label for specific case
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [54]:
# DataFrame
extracted_data = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [55]:
extracted_data

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,Indo-Pacific,cooperation (verb: argue),the United States,,
1,the United States,partners,Indo-Pacific,,
2,Sun Cable,funded (verb: fund),Singapore,,
3,Australia,develop (verb: work),Brunei,,
4,Tokyo,efforts (verb: mirror),LNG,,
5,Indo-Pacific,cooperation,India,,
6,India,export (verb: reveal),National Hydrogen Mission,2021,
7,New Delhi,agreement (verb: sign),Canberra,2022,
8,The United States,supporting (verb: play),Indo-Pacific,,
9,U.S.,export (verb: commit),U.S.,March 2022,


In [56]:
# Export to csv
extracted_data.to_csv('02_relationship.csv', index=False)