In [1]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
import spacy
from spacy import displacy

In [2]:
url = "https://www.aspistrategist.org.au/chinas-investment-in-renewable-energy-in-the-indo-pacific-brings-risks-and-opportunities/"
response = requests.get(url)

In [3]:
soup=BeautifulSoup(response.content, "html.parser")

In [4]:
print(soup.prettify())

<!DOCTYPE html>
<!--[if lt IE 7 ]><html class="lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
<!--[if IE 7 ]><html class="lt-ie9" lang="en"> <![endif]-->
<!--[if IE 8 ]><html class="lt-ie9" lang="en"> <![endif]-->
<!--[if (gte IE 9)|!(IE)]><!-->
<html class="no-js" lang="en-AU" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   China’s investment in renewable energy in the Indo-Pacific brings risks—and opportunities | The Strategist
  </title>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/css/foundation.css?v=1.7" rel="stylesheet"/>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/css/print.css?v=1.0.4" media="print" rel="stylesheet" type="text/css"/>
  <link href="https://www.aspistrategist.org.au/wp-content/themes/aspi/assets/img/icons/favicon.ico" rel="icon" type="image/x-icon"/>
  <link href="https:

In [5]:
main_content = soup.find('div', {'class': 'entry-content'})
for element in main_content(['span']):
    element.decompose()

In [6]:
text_list = []
for paragraph in main_content.find_all('p'):
    paragraph_text = paragraph.get_text(separator=' ', strip=True)
    text_list.append(paragraph_text)

In [7]:
df = pd.DataFrame(text_list, columns=["Paragraph Text"])
df.to_csv('05_paragraphs.csv', index=False)

In [8]:
df

Unnamed: 0,Paragraph Text
0,"Next week’s meeting of the G20, hosted by Indo..."
1,The expansion of the partnership to include In...
2,China is the largest financer of renewable ene...
3,These concerns are heightened by China’s domin...
4,The country also produces about 85% of the wor...
5,China’s leverage is particularly pronounced in...
6,China has pledged to use the dams to ensure a ...
7,But it’s important to keep in mind that China’...
8,China’s financing of renewable energy projects...
9,China’s multidimensional engagement in the reg...


In [9]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [10]:
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /Users/loogyee/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/loogyee/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [11]:
nlp = spacy.load("en_core_web_sm")

In [12]:
df = pd.read_csv('05_paragraphs.csv')

In [13]:
# Preprocessing 
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Lowercase the entire text
    text = text.lower()
    
    # Tokenize and lemmatize
    lemmatized_words = []
    for word in text.split():  # Split the text into words
        lemmatized_words.append(lemmatizer.lemmatize(word))  # Lemmatize each word
    
    # Join lemmatized words back into a single string
    cleaned_text = ' '.join(lemmatized_words)
    
    return cleaned_text

In [14]:
df['Processed Text'] = df['Paragraph Text'].apply(preprocess_text)

In [15]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [16]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [17]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [18]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE']]

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.text in relationship_keywords:  # Check if the token matches any relationship keyword
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find the subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find the direct object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship keyword, object)

    return entities, relationships

In [19]:
df['Entities'], df['Relationships'] = zip(*df['Processed Text'].apply(extract_entities_and_relationships))

In [20]:
df[['Processed Text', 'Entities', 'Relationships']]

Unnamed: 0,Processed Text,Entities,Relationships
0,"next week’s meeting of the g20, hosted by indo...","[(indonesia, GPE), (jakarta, GPE), (us, GPE), ...",[]
1,the expansion of the partnership to include in...,"[(indonesia, GPE), (south africa, GPE), (paris...",[]
2,china is the largest financer of renewable ene...,"[(china, GPE), (beijing, GPE), (china, GPE)]",[]
3,these concern are heightened by china’s domina...,"[(china, GPE), (china, GPE)]",[]
4,the country also produce about 85% of the worl...,"[(china, GPE), (japan, GPE)]",[]
5,china’s leverage is particularly pronounced in...,"[(china, GPE), (myanmar, GPE), (laos, GPE), (c...",[]
6,china ha pledged to use the dam to ensure a mo...,"[(china, GPE), (china, GPE)]",[]
7,but it’s important to keep in mind that china’...,"[(china, GPE), (china, GPE), (beijing, GPE)]",[]
8,china’s financing of renewable energy project ...,"[(china, GPE), (indonesia, GPE), (china, GPE),...",[]
9,china’s multidimensional engagement in the reg...,"[(china, GPE), (paris, GPE), (china, GPE)]",[]


In [21]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Paragraph Text'])

    # Display dependency parsing
    doc = nlp(row['Processed Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)

    # Print extracted entities and relationships
    print("Entities:", row['Entities'])


Paragraph 1:
Next week’s meeting of the G20, hosted by Indonesia, will feature discussions on climate change and the need to accelerate the global energy transition from fossil fuels to renewables. Jakarta is also expected formally to announce that it’s joining the Just Energy Transition Partnership , a multibillion-dollar G7 initiative led by the US, France, Germany, the UK and the EU that’s designed to assist emerging economies with accelerating their transitions to renewables.


Entities: [('indonesia', 'GPE'), ('jakarta', 'GPE'), ('us', 'GPE'), ('france', 'GPE'), ('germany', 'GPE'), ('uk', 'GPE')]

Paragraph 2:
The expansion of the partnership to include Indonesia (South Africa joined last year) reflects the G7’s concern that the Paris climate agreement’s targets won’t be achieved without major financing and other support for the energy transition in emerging markets. But it also reflects the group’s attempt to counter China’s increasing prominence in renewable energy investments, particularly in Asia.


Entities: [('indonesia', 'GPE'), ('south africa', 'GPE'), ('paris', 'GPE'), ('china', 'GPE')]

Paragraph 3:
China is the largest financer of renewable energy projects in the Indo-Pacific. More than half of its overseas energy investments under the Belt and Road initiative , amounting to US$20 billion in 2020, are in the renewables sector. This significant funding is critical to supporting the regional energy transition. But G7 leaders have also expressed concerns that Beijing will leverage this financing for political influence and strategic advantage , including by creating financial and technological dependence on China.




Entities: [('china', 'GPE'), ('beijing', 'GPE'), ('china', 'GPE')]

Paragraph 4:
These concerns are heightened by China’s dominance of renewable energy products and control over the supply chains for rare-earth mineral s and the processing capacity to produce them. The International Energy Agency estimates, for example, that China’s global share in all the key manufacturing stages of solar panels, which currently exceeds 80%, will rise to more than 95% in the coming years.


Entities: [('china', 'GPE'), ('china', 'GPE')]

Paragraph 5:
The country also produces about 85% of the world’s rare-earth oxides and about 90% of rare-earth metals, alloys and permanent magnets. And China has demonstrated its willingness to use its control of the supply chain in pursuit of its wider geopolitical interests, most notably in 2010 when in retaliation for a maritime dispute with Japan it restricted rare-earth mineral exports to that country.


Entities: [('china', 'GPE'), ('japan', 'GPE')]

Paragraph 6:
China’s leverage is particularly pronounced in the Mekong region. Myanmar, Laos, Cambodia and Vietnam rely on hydroelectricity produced along the Mekong River system for more than 50% of their electricity production, and China is their main source of financing for hydropower dams. More importantly, though, China’s upstream dams increasingly enable it to control the flow of water to its downstream neighbours, where 60 million people depend on the Mekong for their livelihoods.


Entities: [('china', 'GPE'), ('myanmar', 'GPE'), ('laos', 'GPE'), ('cambodia', 'GPE'), ('vietnam', 'GPE'), ('china', 'GPE'), ('china', 'GPE')]

Paragraph 7:
China has pledged to use the dams to ensure a more even distribution of water during dry and wet seasons. But recent analysis suggests that on at least one occasion, during the extreme drought affecting the region in 2019, it did not do so. At least one expert in the region saw China’s control of Mekong water during the drought as an act of political manipulation.


Entities: [('china', 'GPE'), ('china', 'GPE')]

Paragraph 8:
But it’s important to keep in mind that China’s energy investments are motivated by multiple, overlapping objectives, including to secure economic advantage, achieve energy security, build greater geopolitical influence and leverage, and reduce climate risks. All of these objectives are on display in the Mekong, including in the key role hydropower plays in China’s plan to become carbon-neutral by mid-century. Chinese objectives are also evolving. As Huong Le Thu has observed with respect to Beijing’s investments in the Mekong, ‘Energy security has become almost secondary as dams gain geopolitical in addition to economic importance.’


Entities: [('china', 'GPE'), ('china', 'GPE'), ('beijing', 'GPE')]

Paragraph 9:
China’s financing of renewable energy projects in the Indo-Pacific has increased markedly, but the investments may not be delivering the broader political benefits one would expect. Despite significant recent investments in Indonesia, for example, a 2021 public opinion poll found that 60% of Indonesians agreed that ‘Indonesia should join with other countries to limit China’s influence’, an increase of 10 percentage points since 2011. A recent survey of Southeast Asian elites found that China was the most distrusted power in the region. Similarly, public opinion surveys in other Asian subregions that have received significant Chinese investments, such as Central Asia , have noted a steady decrease in positive public sentiment towards China.


Entities: [('china', 'GPE'), ('indonesia', 'GPE'), ('china', 'GPE'), ('china', 'GPE'), ('china', 'GPE')]

Paragraph 10:
China’s multidimensional engagement in the region’s renewable energy systems has some important implications. One clearly positive implication is that Chinese domestic and regional investments in renewable energy are a fundamentally important contribution to efforts to prevent dangerous climate change. Indeed, it will be impossible to achieve the objectives in the Paris agreement without China’s financing, support and engagement.


Entities: [('china', 'GPE'), ('paris', 'GPE'), ('china', 'GPE')]

Paragraph 11:
Both the Covid-19 crisis and the Russian invasion of the Ukraine have starkly demonstrated the importance of securing critical supply chains. In this respect, China’s domination of the renewable energy market is a significant risk. And that risk may be growing; for example, recent reporting suggests that China sees Russia’s disruption of gas supplies to Europe as a major opportunity for China’s wind and solar energy firms to expand their European markets and presence.


Entities: [('china', 'GPE'), ('china', 'GPE'), ('russia', 'GPE'), ('china', 'GPE')]

Paragraph 12:
There are at least two dimensions to the supply-chain risk. The first is the opportunity it affords China to leverage its control over resources and technologies to extract geopolitical concessions from its competitors or adversaries, as it attempted to do with Japan in 2010. The second is the exposure and vulnerability of China’s own domestic renewable energy infrastructure to disasters that are rapidly intensifying due to climate change, which is a risk China itself should seek to reduce. As the International Energy Agency has observed:


Entities: [('china', 'GPE'), ('japan', 'GPE'), ('china', 'GPE'), ('china', 'GPE')]

Paragraph 13:
Today, China’s Xinjiang province accounts for 40% of global polysilicon manufacturing. Moreover, one out of every seven [solar] panels produced worldwide is manufactured by a single facility. This level of concentration in any global supply chain would represent a considerable vulnerability; solar PV is no exception.


Entities: [('china', 'GPE'), ('xinjiang province', 'GPE')]

Paragraph 14:
Clearly, it must be a high priority for Australia and like-minded countries to diversify the renewable energy supply chain as rapidly as possible. China is playing the pivotal role in the transition from fossil fuels to renewables, including in the Indo-Pacific. Australia and its allies must find ways to engage with China in accelerating the transition to reduce the increasing risk of catastrophic climate change. Identifying opportunities to do so at a time when China is increasingly challenging the geostrategic order, and without undermining other important interests, is an urgent, daunting and fundamentally important task. As the Brookings Institution recently observed concerning the US and China:


Entities: [('china', 'GPE'), ('australia', 'GPE'), ('china', 'GPE'), ('china', 'GPE'), ('china', 'GPE')]

Paragraph 15:
The relationship remains too consequential to people in both countries and the rest of the world to be guided by a fatalistic acceptance of deepening enmity. And while competition resides at the core of the relationship, it is a mistake to view the relationship solely through the lens of rivalry. Doing so limits tools available to Washington for developing a more durable, productive relationship that serves America’s interests.


Entities: [('washington', 'GPE'), ('america', 'GPE')]


In [22]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [23]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Paragraph Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if 'Inititative' in ent.text.lower(): 
                ent = ent._replace(label_='POL')
                entities.append(ent)
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [24]:
# DataFrame
extracted_data = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [25]:
extracted_data

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,G20,transition (verb: host),Indonesia,Next week’s,
1,Jakarta,transitions (verb: expect),the Just Energy Transition Partnership,,multibillion-dollar
2,Indonesia,agreement (verb: include),South Africa,last year,
3,The International Energy Agency,share (verb: estimate),China,the coming years,
4,China,supply (verb: demonstrate),Japan,2010,
5,Paris,agreement (verb: achieve),China,,
6,China,supplies (verb: grow),Russia,,
7,China,transition (verb: play),the Indo-Pacific,,
8,Australia,engage (verb: find),China,,
9,Washington,developing (verb: do),America,,


In [26]:
# Export to csv
extracted_data.to_csv('05_relationships.csv', index=False)