In [1]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import spacy
from spacy import displacy

In [4]:
url = "https://www.aspistrategist.org.au/the-geopolitics-of-the-energy-transition-in-the-indo-pacific/"
response = requests.get(url)

In [5]:
soup=BeautifulSoup(response.content, "html.parser")

In [6]:
print(soup.prettify())

<!DOCTYPE html>
<!--[if lt IE 7 ]><html class="lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
<!--[if IE 7 ]><html class="lt-ie9" lang="en"> <![endif]-->
<!--[if IE 8 ]><html class="lt-ie9" lang="en"> <![endif]-->
<!--[if (gte IE 9)|!(IE)]><!-->
<html class="no-js" lang="en-AU" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   The geopolitics of the energy transition in the Indo-Pacific | The Strategist
  </title>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/css/foundation.css?v=1.7" rel="stylesheet"/>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/css/print.css?v=1.0.4" media="print" rel="stylesheet" type="text/css"/>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/assets/img/icons/favicon.ico" rel="icon" type="image/x-icon"/>
  <link href="https://www.aspistrategist.org.au/w

In [7]:
# Extract main content
main_content = soup.find('div', {'class': 'entry-content'})
for element in main_content(['a', 'em', 'span']):
    element.decompose()

In [8]:
text_list = []
for paragraph in main_content.find_all('p'):
    paragraph_text = paragraph.get_text(separator=' ', strip=True)
    text_list.append(paragraph_text)

In [10]:
df = pd.DataFrame({"Paragraph Text": text_list})
df.to_csv('13_paragraphs.csv', index=False) # Save as csv file

In [11]:
df

Unnamed: 0,Paragraph Text
0,
1,While the forces shaping the Indo-Pacific’s en...
2,* * *
3,It is the year 2035. The global transformation...
4,Already struggling players are losing out. A l...
5,"By 2035, energy systems across the whole regio..."
6,Southeast Asia is transforming from a ‘maritim...
7,India’s presence in the region has grown consi...
8,The region’s dependency on China’s supply of r...
9,The region has thus seen several geopolitical ...


In [12]:
nlp = spacy.load("en_core_web_sm")

In [13]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [14]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE', 'FAC']]

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.lemma_ in relationship_keywords:  # Use lemma_ to check relationship keywords
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship, object)

    return entities, relationships

In [15]:
df['Entities'], df['Relationships'] = zip(*df['Paragraph Text'].apply(extract_entities_and_relationships))

In [16]:
df[['Paragraph Text', 'Entities', 'Relationships']]

Unnamed: 0,Paragraph Text,Entities,Relationships
0,,[],[]
1,While the forces shaping the Indo-Pacific’s en...,"[(the Indo-Pacific’s, ORG)]",[]
2,* * *,[],[]
3,It is the year 2035. The global transformation...,"[(OECD, ORG), (China, GPE), (India, GPE)]",[]
4,Already struggling players are losing out. A l...,"[(Cambodia, GPE), (Laos, GPE), (Myanmar, GPE),...",[]
5,"By 2035, energy systems across the whole regio...","[(Bangkok, GPE), (Jakarta, GPE), (Ho Chi Minh,...",[]
6,Southeast Asia is transforming from a ‘maritim...,"[(China, GPE), (China, GPE)]",[]
7,India’s presence in the region has grown consi...,"[(India, GPE), (India, GPE), (OECD, ORG)]",[]
8,The region’s dependency on China’s supply of r...,"[(China, GPE), (Malaysia, GPE), (India, GPE), ...",[]
9,The region has thus seen several geopolitical ...,"[(China, GPE), (India, GPE)]",[]


In [17]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Paragraph Text'])

    # Display dependency parsing
    doc = nlp(row['Paragraph Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)
    
    # Print extracted entities and relationships
    print("Entities:", row['Entities'])
    print("Relationships:", row['Relationships'])


Paragraph 1:



Entities: []
Relationships: []

Paragraph 2:
While the forces shaping the Indo-Pacific’s energy transition out to 2035 are complex, some macro trends are clear. Energy demand in the region is projected to grow drastically and remain concentrated in urban areas, and power infrastructure will remain highly vulnerable to climate impacts. The uneven availability of finance to countries attempting to make the transition from fossil fuels to renewable-based energy systems is perhaps the most significant risk. Without major policy shifts, these trends prefigure widespread energy insecurity in 2035, with far-reaching implications for regional security, as the following projection to 2035 illustrates.




Entities: [('the Indo-Pacific’s', 'ORG')]
Relationships: []

Paragraph 3:
* * *


Entities: []
Relationships: []

Paragraph 4:
It is the year 2035. The global transformation of energy systems is well underway, yet only the most mature markets in Asia and the Pacific are able to use the ongoing shifts to their economic and (geo)political benefit. Continued in the levelised cost of electricity generated from solar and wind power spur a rapid shift of finance flows into clean energy. Approximately US$1.2 trillion was annually in clean electricity supply between 2023 and 2026, yet it was disproportionately focused on OECD countries. In Asia and the Pacific, China and India are large centres of investment, boasting both rapid deployment of renewable-based generation and manufacturing capacity.


Entities: [('OECD', 'ORG'), ('China', 'GPE'), ('India', 'GPE')]
Relationships: []

Paragraph 5:
Already struggling players are losing out. A lack of finance means Cambodia, Laos, Myanmar and Thailand have been unable to significantly ramp up clean power generation capacities. The logic that stymies the transition in vulnerable countries is also often perverse. For example, power-supply disruptions to major Asian markets, most notably China, simultaneously deliver short-term boosts to the fossil-fuel markets of several Southeast Asian producers, most notably gas exporters Indonesia, Malaysia, Myanmar and Brunei. Faced with a lack of private- and public-sector finance to pursue alternative—and more resilient paths—these countries opt for boosting domestic oil, coal and gas production.


Entities: [('Cambodia', 'GPE'), ('Laos', 'GPE'), ('Myanmar', 'GPE'), ('Thailand', 'GPE'), ('China', 'GPE'), ('Indonesia', 'GPE'), ('Malaysia', 'GPE'), ('Myanmar', 'GPE'), ('Brunei', 'GPE')]
Relationships: []

Paragraph 6:
By 2035, energy systems across the whole region are under extreme physical and economic strain due to the severe and intensifying impacts of global warming, which has now reached 1.5°C above pre-industrial levels. The largest energy demand centres (Bangkok, Jakarta, Ho Chi Minh, Yangon) and other coastal cities are in the middle of a massive relocation inland, as neither the centralised power-supply systems nor cooling and heating appliances can withstand the pressure of repeated floods. This trend dramatically accelerated after the sequential El Niño – La Niña events in 2022–23.


Entities: [('Bangkok', 'GPE'), ('Jakarta', 'GPE'), ('Ho Chi Minh', 'GPE'), ('Yangon', 'GPE'), ('2022–23', 'FAC')]
Relationships: []

Paragraph 7:
Southeast Asia is transforming from a ‘maritime periphery’ for the major regional powers to a place of great-power competition on energy standards, finance and political influence. China, which has been historically dominant in Southeast Asia’s energy system planning through close links to regional governments and energy companies, has managed to establish itself as the standard-bearer in the increasingly decarbonised energy sector. The successful energy transition and climate resilience of its southern region is one of China’s security priorities, as climate migration leads to tensions at its southern borders. Moreover, its major low-carbon technology production centres are increasingly exposed to climate impacts, threatening Chinese companies with supply disruptions and loss of capital.


Entities: [('China', 'GPE'), ('China', 'GPE')]
Relationships: []

Paragraph 8:
India’s presence in the region has grown considerably over the past decade, particularly within the India–ASEAN Power Grid connectivity initiative and within various power-grid cooperative initiatives. The third-largest manufacturing centre for photovoltaic cells and producer of over one-quarter of solar panels globally, India has become the primary supplier of solar-power equipment to OECD countries that are looking to diversify away from Chinese products and is in a fierce competition for dominance with Chinese equipment providers in Southeast Asia’s renewable-power markets.


Entities: [('India', 'GPE'), ('India', 'GPE'), ('OECD', 'ORG')]
Relationships: []

Paragraph 9:
The region’s dependency on China’s supply of raw materials remains extremely high, however, as demand for lithium, silicon, nickel, copper and rare metals is on a rapid and steady rise. Other parts of the renewables supply chains have been redistributed, and Malaysia, India and Vietnam have joined the regional market as major solar-power equipment suppliers.


Entities: [('China', 'GPE'), ('Malaysia', 'GPE'), ('India', 'GPE'), ('Vietnam', 'GPE')]
Relationships: []

Paragraph 10:
The region has thus seen several geopolitical shifts as China cements its energy dominance, India rises as a major regional energy player and a handful of smaller powers manage to carve out renewable production niches.


Entities: [('China', 'GPE'), ('India', 'GPE')]
Relationships: []

Paragraph 11:
There remains a relative lack of major support from OECD countries and international finance institutions. This absence is being filled by China, using its historically established cooperation ties and its mature domestic renewables market. Japan, by contrast, remains a prominent regional provider of fossil-fuel finance, locking many countries in the region into several decades of growing gas and oil consumption. Much of this finance will eventually translate into stranded assets.


Entities: [('OECD', 'ORG'), ('China', 'GPE'), ('Japan', 'GPE')]
Relationships: []

Paragraph 12:
* * *


Entities: []
Relationships: []

Paragraph 13:
The scenario just outlined is among the most probable outcomes unless decisive policy shifts take place in the countries of Southeast Asia and sufficient financial support from developed countries is granted. Sticking to a ‘business as usual’ scenario won’t be possible in years to come. The extreme weather events, unchecked loss and damage, and increased vulnerability of energy systems to the impacts of climate change no longer leave any room for status quo assumptions.


Entities: []
Relationships: []

Paragraph 14:
Measures that can help to rapidly boost the resilience of the region’s energy systems to climate impacts, and alleviate the impact of market shocks and energy supply crunches, already exist in 2022. Regional cooperation in risk-sharing and risk-management mechanisms and disaster preparedness, which is embedded in a broader international cooperation framework, is key.


Entities: []
Relationships: []

Paragraph 15:
We also know that a massive shift of finance flows will need to take place to unlock access to low-carbon growth for the developing economies of Southeast Asia, with public finance playing a key role in support of regulatory and policy action. And, globally, it’s critical for policymakers to ensure that the geopolitical shifts result in a race to the top rather than a clash of standards on a range of critical topics, including the diversification and resilience of low-carbon supply chains and the parallel critical-materials markets.


Entities: []
Relationships: []

Paragraph 16:
The policy challenges involved in managing the regional energy transition are complex and defy traditional policy silos, but governments must start acting now to avoid the worst outcomes that are possible as soon as 2035.


Entities: []
Relationships: []


In [18]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [19]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Paragraph Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if 'Inititative' in ent.text.lower(): 
                ent = ent._replace(label_='POL')
                entities.append(ent)
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [20]:
# DataFrame
relationships_df = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [21]:
relationships_df

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,Approximately US$1.2 trillion,supply (verb: focus),OECD,annually,Approximately US$1.2 trillion
1,Cambodia,finance (verb: mean),Laos,,
2,China,supply (verb: deliver),Indonesia,,
3,Bangkok,supply (verb: cool),Jakarta,,
4,Malaysia,supply (verb: redistribute),India,,


In [22]:
# Export to csv
relationships_df.to_csv('13_relationships.csv', index=False)