In [11]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install googletrans==4.0.0-rc1

Note: you may need to restart the kernel to use updated packages.


In [19]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [13]:
import requests 
from bs4 import BeautifulSoup
from googletrans import Translator
import pandas as pd
import spacy
from spacy import displacy

In [4]:
url = "https://www.mri.co.jp/knowledge/insight/20231221.html"
response = requests.get(url)

In [5]:
soup=BeautifulSoup(response.content, "html.parser")

In [6]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="ja">
 <meta content="width=device-width,initial-scale=1" name="viewport"/>
 <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
 <head>
  <script src="https://www3.gred.jp/saas/gred_checker.js?sid=17127&amp;lang=ja&amp;redirect_url=http://sorry.mri.co.jp/" type="text/javascript">
  </script>
  <meta charset="utf-8">
   <title>
    インドネシアの脱炭素化での原子力活用期待される日本の役割 | MRI 三菱総合研究所
   </title>
   <meta content="ASEANのカーボンニュートラル達成のカギは最大級のエネルギー需要国であるインドネシアだ。本コラムではインドネシアの脱炭素化について、原子力発電に焦点をあて、日本の果たす役割を考える。" name="description">
    <meta content="https://www.mri.co.jp/dia6ou0000016csk-img/og_img.png" property="og:image"/>
    <meta content="MRI 三菱総合研究所" property="og:site_name"/>
    <meta content="https://www.mri.co.jp/knowledge/insight/20231221.html" property="og:url"/>
    <meta content="ASEANのカーボンニュートラル達成のカギは最大級のエネルギー需要国であるインドネシアだ。本コラムではインドネシアの脱炭素化について、原子力発電に焦点をあて、日本の果たす役割を考える。" property="og:description"/>
    <meta content="article" property="og:type"/>
  

In [7]:
# Extract main content
main_content = soup.find('div', {'id': 'contents_inner'})
for element in main_content(['a', 'em', 'span']):
    element.decompose()

In [8]:
text_list = []
for paragraph in main_content.find_all('div', {'class': 'txt'}):
    paragraph_text = paragraph.get_text(separator=' ', strip=True)
    text_list.append(paragraph_text)

In [14]:
# Translate Non-English text
translator = Translator()

In [15]:
translated_texts = []
for text in text_list:
    translated = translator.translate(text, src='ja', dest='en').text
    translated_texts.append(translated)

In [16]:
df = pd.DataFrame({
    "Paragraph Text": text_list,
    "Translated Text": translated_texts
})
df.to_csv('12_paragraphs.csv', index=False) # Save as csv file

In [17]:
df

Unnamed: 0,Paragraph Text,Translated Text
0,ASEANのカーボンニュートラル達成のカギは最大級のエネルギー需要国であるインドネシアだ。石...,The key to ASEAN's achievement of carbon necto...
1,2023年8月のMRIエコノミックレビュー「 」では、インドネシアはASEANの中で最大のC...,"In August 2023, MRI Economic Review """" Indones..."
2,現在インドネシアでは原子力発電の導入について盛んに議論されている。同国のような化石燃料輸出国...,"Currently, in Indonesia, the introduction of n..."
3,インドネシア国有電力会社は、2030年時点でも石炭火力発電の比率が約60%を占めると予想する...,Indonesian National Power Company expects that...
4,インドネシアは1万を超える島々で構成されており、図2に示した通り2030年、2060年時点の...,"Indonesia is composed of more than 10,000 isla..."
5,石炭はインドネシアにとって国内のエネルギー供給の主力であるだけでなく、輸出資源としての位置付...,Coal is not only the mainstay of domestic ener...
6,原子力の導入時、核不拡散、過酷事故防止、放射性廃棄物の処理・処分などクリアしなければならない...,At the time of the introduction of nuclear pow...


In [18]:
nlp = spacy.load("en_core_web_sm")

In [20]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [21]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE', 'FAC']]

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.lemma_ in relationship_keywords:  # Use lemma_ to check relationship keywords
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship, object)

    return entities, relationships

In [23]:
df['Entities'], df['Relationships'] = zip(*df['Translated Text'].apply(extract_entities_and_relationships))

In [24]:
df[['Translated Text', 'Entities', 'Relationships']]

Unnamed: 0,Translated Text,Entities,Relationships
0,The key to ASEAN's achievement of carbon necto...,"[(ASEAN, ORG), (Indonesia, GPE), (Indonesia, G...","[(Indonesia, promote, carbonization)]"
1,"In August 2023, MRI Economic Review """" Indones...","[(MRI Economic Review, ORG), (Indonesia, GPE),...","[(government, invested, subsidies)]"
2,"Currently, in Indonesia, the introduction of n...","[(Indonesia, GPE), (Japan, GPE), (GW, ORG)]",[]
3,Indonesian National Power Company expects that...,"[(Indonesian National Power Company, ORG), (In...",[]
4,"Indonesia is composed of more than 10,000 isla...","[(Indonesia, GPE), (kW, GPE), (SMR, ORG), (kW,...",[]
5,Coal is not only the mainstay of domestic ener...,"[(Indonesia, GPE), (Indonesia, GPE)]","[(government, developing, furnace)]"
6,At the time of the introduction of nuclear pow...,"[(Japan, GPE), (Indonesia, GPE), (Japan, GPE),...",[]


In [27]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Translated Text'])

    # Display dependency parsing
    doc = nlp(row['Translated Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)
    
    # Print extracted entities and relationships
    print("Entities:", row['Entities'])
    print("Relationships:", row['Relationships'])


Paragraph 1:
The key to ASEAN's achievement of carbon nector is Indonesia, the largest energy demand.There is also a situation in which coal -related is one of the main exports industries.The way for Indonesia to promote carbonization while protecting its industry.Focus on nuclear power and think about the role of Japan.


Entities: [('ASEAN', 'ORG'), ('Indonesia', 'GPE'), ('Indonesia', 'GPE'), ('Japan', 'GPE')]
Relationships: [('Indonesia', 'promote', 'carbonization')]

Paragraph 2:
In August 2023, MRI Economic Review "" Indonesia has the largest CO2 emissions and population in ASEAN, and the increased energy demand for future economic growth will increase, and to make ASEAN decarbonization.He stated that Indonesia was the point.To achieve Indonesia's decarbonization, it will be a major challenge to respond to the increase in energy consumption in the future and the conversion of coal -related industries, which is a major exports of the country.Indonesia has announced the realization of carbon neutral (CN) up to 2060, and as a power configuration in 2060, solar 361 gigawat (GW), hydrochlower 83 GW, bio 37 GW, nuclear 35GW, geothermal 18 GW, sea current 13.4 GW, 13.4 GW.The aim is to realize the power supply of carbonization with a total of about 587 GW equipment capacity (Fig. 1).As of 2020, thermal pow

Entities: [('MRI Economic Review', 'ORG'), ('Indonesia', 'GPE'), ('ASEAN', 'ORG'), ('ASEAN', 'ORG'), ('Indonesia', 'GPE'), ('Indonesia', 'GPE'), ('Indonesia', 'GPE'), ('GW', 'ORG')]
Relationships: [('government', 'invested', 'subsidies')]

Paragraph 3:
Currently, in Indonesia, the introduction of nuclear power is actively discussing.It is believed that fossil fuel exports, such as the country, have a great merit in terms of a major change in power sources in order to realize CN while reducing the economic burden.Currently, a plan to introduce nuclear power generation of 8 GW by 2035 and 35 GW by 2060 by 2060.Since the number of nuclear power plants in Japan in 2022 is about 10 GW, the size of its introduction can be inferred.There is also a boost for people's will.Approximately 76%of Indonesian people are positive to the introduction of nuclear power * 2.On the other hand, it is not easy to change the power supply configuration that requires a large amount of investment.Not only the vi

Entities: [('Indonesia', 'GPE'), ('Japan', 'GPE'), ('GW', 'ORG')]
Relationships: []

Paragraph 4:
Indonesian National Power Company expects that the ratio of coal -fired power generation will account for about 60%as of 2030 * 3.In reducing coal -fired power, which accounts for more than half of the power supply configuration, the impact on the industry is inevitable, and it is necessary to create employment under a new power supply configuration.This is not the only issue in Indonesia, but also a US study issue with 20%of coal -fired power generation as of 2022, and the Ministry of Energy (DOE) has announced the effects of coal -fired power to nuclear power.It is * 4.According to DOE, in the United States, the construction cost of nuclear power plants may be reduced by 15 to 35%, since the foundation of the power system, road maintenance, and buildings have already been completed.In addition, some of the skills and knowledge of workers working at coal -fired power plants can be used in

Entities: [('Indonesian National Power Company', 'ORG'), ('Indonesia', 'GPE'), ('US', 'GPE'), ('the Ministry of Energy', 'ORG'), ('the United States', 'GPE')]
Relationships: []

Paragraph 5:
Indonesia is composed of more than 10,000 islands, and as shown in Fig. 2, power demand in 2030 and 2060 is very different for major islands, and the supply sources are diverse.The transmission network is also restricted in cooperation between the islands.Since the potential of the introduction of renewable energy is also different, power supply according to the characteristics of each island is required.Based on these characteristics, the introduction of the conventional large light water reactor (300,000 kW or more) may be excessively supply, and small module furnace (SMR) and 1 of less than 300,000 kW, which have been in R & D in recent years.The introduction of micro furnace under 10,000 kW is also promising.In Canada, which has the same task of transmission networks and power supply and demand

Entities: [('Indonesia', 'GPE'), ('kW', 'GPE'), ('SMR', 'ORG'), ('kW', 'GPE'), ('kW', 'GPE'), ('Canada', 'GPE'), ('SMR', 'ORG'), ('SMR', 'ORG')]
Relationships: []

Paragraph 6:
Coal is not only the mainstay of domestic energy supply for Indonesia, but also as export resources, and it is an important issue to explore new alternative candidates.In contrast, exports of hydrogen manufactured by nuclear power have the potential to be part of the solution.Currently, the Japanese government is developing a demonstration furnace for high -temperature gas reactors, but is about 12 yen / NM3 (1NM3 1m3 in a gas standard state) as the development target for hydrogen production * 5 * 5.When Indonesia has introduced high -temperature gas reactors and excessive power supply, there may be a strategy of switching from power generation to hydrogen production and exporting products.


Entities: [('Indonesia', 'GPE'), ('Indonesia', 'GPE')]
Relationships: [('government', 'developing', 'furnace')]

Paragraph 7:
At the time of the introduction of nuclear power, there are of course issues that must be cleared, such as nuclear non -diffusion, severe accidents prevention, and radioactive waste processing and disposal.However, as the Indonesian government has expected to nuclear power as mentioned above, Japan can fulfill the role of Indonesia in the use of safety measures that Japan has cultivated so far, and the cooperation of human resource development.Is big.Japanese earthquakes and tsunami countermeasures based on the accident at the Fukushima Daiichi Nuclear Power Plant TEPCO are also required by Indonesia in the earthquake.In the development of high -temperature gas reactors, Japan is leading the world, and it is necessary to cooperate in both technology development and human resource development.In the development of human resources in nuclear fields in general, the

Entities: [('Japan', 'GPE'), ('Indonesia', 'GPE'), ('Japan', 'GPE'), ('the Fukushima Daiichi Nuclear Power Plant TEPCO', 'FAC'), ('Indonesia', 'GPE'), ('Japan', 'GPE'), ('the Japan Nuclear Research and Development Organization', 'ORG'), ('Indonesia', 'GPE'), ('Japan', 'GPE'), ('Japan', 'GPE'), ('Indonesia', 'GPE'), ('Indonesia', 'GPE'), ('Indonesia', 'GPE')]
Relationships: []


In [28]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [30]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Translated Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if 'Inititative' in ent.text.lower(): 
                ent = ent._replace(label_='POL')
                entities.append(ent)
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [31]:
# DataFrame
relationships_df = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [32]:
relationships_df

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,kW,supply (verb: base),SMR,recent years,
1,Canada,supply (verb: have),SMR,,
2,Japan,cooperation (verb: expect),Indonesia,,
3,the Japan Nuclear Research and Development Org...,develop (verb: work),Indonesia,,
4,Japan,collaboration (verb: spend),Indonesia,,


In [34]:
# Export to csv
relationships_df.to_csv('12_relationships.csv', index=False)