In [1]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import spacy
from spacy import displacy

In [4]:
url = "https://www.societegenerale.asia/en/newsroom/success-stories/success-stories-details/news/novel-financing-solution-for-japanes-solar-energy-ambitions/"
response = requests.get(url)

In [5]:
soup=BeautifulSoup(response.content, "html.parser")

In [6]:
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en-Us">
 <head>
  <meta charset="utf-8"/>
  <base href="https://www.societegenerale.asia/"/>
  <meta content="user-scalable=no, initial-scale=1.0, maximum-scale=1, width=device-width" name="viewport">
   <meta content="summary" name="twitter:card"/>
   <meta content="telephone=no" name="format-detection"/>
   <title>
    A novel financing solution for Japan’s solar energy ambitions
   </title>
   <meta content="A novel financing solution for Japan’s solar energy ambitions" name="title"/>
   <!-- HrefLang&Canonic -->
   <link href="https://www.societegenerale.asia/jp/newsroom/%E3%82%B5%E3%82%AF%E3%82%BB%E3%82%B9%E3%82%B9%E3%83%88%E3%83%BC%E3%83%AA%E3%83%BC%E3%83%93%E3%83%A5%E3%83%BC%E3%83%9D%E3%82%A4%E3%83%B3%E3%83%88/translate-japan-success-stories-details/news/novel-financing-solution-for-japanes-solar-energy-ambitions/" hreflang="jp" rel="alternate"/>
   <link href="https://www.societegenerale.asia/en/newsroom/success-stories/success-stories-deta

In [7]:
# Extract main content
main_content = soup.find('div', {'class': 'sgnews_single_content'})
for element in main_content(['a', 'em', 'span']):
    element.decompose()

In [8]:
text_list = []
for paragraph in main_content.find_all('p'):
    paragraph_text = paragraph.get_text(separator=' ', strip=True)
    text_list.append(paragraph_text)

In [9]:
df = pd.DataFrame({"Paragraph Text": text_list})
df.to_csv('15_paragraphs.csv', index=False) # Save as csv file

In [10]:
df

Unnamed: 0,Paragraph Text
0,Canadian Solar has sealed a JPY 24.5 billion (...
1,Societe Generale recently acted as Mandated Le...
2,In Japan’s updated energy plan announced in Ap...
3,"Japan’s solar ambitions, however, are a challe..."
4,Putting finance in place
5,While the FiT and FiP mechanisms provide a str...
6,"Unusually for a long-term power asset, this de..."
7,“This funding model has real potential to help...
8,Canadian Solar is one of the biggest solar pow...
9,“Canadian Solar is committed to working with a...


In [11]:
nlp = spacy.load("en_core_web_sm")

In [12]:
relationship_keywords = {'collaboration', 'cooperation', 'partner', 'engage', 
                         'support', 'fund', 'finance', 'invest', 'develop', 
                         'supply', 'promote', 'transition', 'export', 'agreement', 
                         'interest', 'share', 'enhance', 'effort', 'import'}

In [13]:
def extract_entities_and_relationships(text):
    doc = nlp(text)  # Process text with spaCy
    entities = [(ent.text, ent.label_) for ent in doc.ents if ent.label_ in ['ORG', 'GPE', 'FAC']]

    # Find relationships based on keywords
    relationships = []
    for token in doc:
        if token.lemma_ in relationship_keywords:  # Use lemma_ to check relationship keywords
            subject = [w for w in token.children if w.dep_ == "nsubj"]  # Find subject
            object_ = [w for w in token.children if w.dep_ == "dobj"]  # Find object
            if subject and object_:
                relationships.append((subject[0].text, token.text, object_[0].text))  # (subject, relationship, object)

    return entities, relationships

In [14]:
df['Entities'], df['Relationships'] = zip(*df['Paragraph Text'].apply(extract_entities_and_relationships))

In [15]:
df[['Paragraph Text', 'Entities', 'Relationships']]

Unnamed: 0,Paragraph Text,Entities,Relationships
0,Canadian Solar has sealed a JPY 24.5 billion (...,"[(Japan, GPE), (Japan, GPE), (Tohoku, GPE)]",[]
1,Societe Generale recently acted as Mandated Le...,"[(Societe Generale, ORG)]",[]
2,In Japan’s updated energy plan announced in Ap...,"[(Japan, GPE), (GHG, ORG), (Renewables, ORG)]",[]
3,"Japan’s solar ambitions, however, are a challe...","[(Japan, GPE), (Japan, GPE), (FiP, ORG)]",[]
4,Putting finance in place,[],[]
5,While the FiT and FiP mechanisms provide a str...,"[(FiT, ORG), (FiP, ORG)]",[]
6,"Unusually for a long-term power asset, this de...","[(the Canadian Solar Infrastructure Fund, ORG)]",[]
7,“This funding model has real potential to help...,"[(Japan, GPE), (Director - Energy Finance & Ad...",[]
8,Canadian Solar is one of the biggest solar pow...,"[(Japan, GPE), (FiT, ORG), (Tokyo, GPE), (Cana...",[]
9,“Canadian Solar is committed to working with a...,"[(Japan, GPE), (Canadian Solar Inc., ORG)]",[]


In [16]:
for index, row in df.iterrows():
    print(f"\nParagraph {index + 1}:")
    print(row['Paragraph Text'])

    # Display dependency parsing
    doc = nlp(row['Paragraph Text'])
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style='dep', jupyter=True, options={'compact': True})
    
    # Display named entities
    displacy.render(sentence_spans, style="ent", jupyter=True)
    
    # Print extracted entities and relationships
    print("Entities:", row['Entities'])
    print("Relationships:", row['Relationships'])


Paragraph 1:
Canadian Solar has sealed a JPY 24.5 billion (EUR 185 million) financing for the 100 megawatt Azuma Kofuji Solar power project, which counts among the largest solar projects in the country, and is Canadian Solar’s flagship project in Japan. The deal harnesses an inventive financing approach to advance Japan’s clean energy ambitions and help revitalising the Fukushima prefecture, reinvigorating the local economy in a region devastated by the 2011 Tohoku earthquake.


Entities: [('Japan', 'GPE'), ('Japan', 'GPE'), ('Tohoku', 'GPE')]
Relationships: []

Paragraph 2:
Societe Generale recently acted as Mandated Lead Arranger (MLA) in the financing of Canadian Solar’s Azuma Kofuji Solar photovoltaic project.


Entities: [('Societe Generale', 'ORG')]
Relationships: []

Paragraph 3:
In Japan’s updated energy plan announced in April 2021, its government committed to a 2030 target of cutting greenhouse gas (GHG) emissions by at least 46% from 2013 levels. Renewables will make up between 36% and 38% of overall generation by then. 1 Of this target, of solar’s share is set to be 15% while wind (predominantly offshore wind) will account for 6%.


Entities: [('Japan', 'GPE'), ('GHG', 'ORG'), ('Renewables', 'ORG')]
Relationships: []

Paragraph 4:
Japan’s solar ambitions, however, are a challenge in a country where land use is at a premium and geography restricts available options for solar installations. In addition, solar sites are relatively complex, often requiring capital-intensive civil works. Japan’s feed-in tariff (FiT), introduced in 2012, has helped to meet these challenges by guaranteeing a fixed purchase price for renewable power. It boosted non-hydro renewables’ growth rate from 9% CAGR between 2009-12 to 29% between 2012 and 2015. 2 A new Feed-in-Premium (FiP) is due to be introduced in 2022 which will further incentivise renewables generators to sell their power to the grid.




Entities: [('Japan', 'GPE'), ('Japan', 'GPE'), ('FiP', 'ORG')]
Relationships: []

Paragraph 5:
Putting finance in place


Entities: []
Relationships: []

Paragraph 6:
While the FiT and FiP mechanisms provide a strong ‘pull’ factor for installing solar generation, suitable financial solutions are needed to support construction and commissioning.


Entities: [('FiT', 'ORG'), ('FiP', 'ORG')]
Relationships: []

Paragraph 7:
Unusually for a long-term power asset, this deal is structured as a hard mini perm, a short-term facility with a maturity one year after construction is complete. This solution allows for the debt to be taken out by capital markets funding once the plant is up and running: after the solar farm is completed in 2023, it is expected to be acquired by the Canadian Solar Infrastructure Fund, which will subsequently receive the revenues from its operations at a predictable rate guaranteed by the FiT.


Entities: [('the Canadian Solar Infrastructure Fund', 'ORG')]
Relationships: []

Paragraph 8:
“This funding model has real potential to help accelerate Japan’s energy transition, providing additional financing options to address the country’s specific challenges and enabling the project sponsors to benefit from a wider range of funding schemes. It’s a great example of how international and local institutions, as well as commercial debt and institutional funding, can make a difference through innovative financing,” said Cedric Chatel, Managing Director - Energy Finance & Advisory, Societe Generale.


Entities: [('Japan', 'GPE'), ('Director - Energy Finance & Advisory', 'ORG'), ('Societe Generale', 'ORG')]
Relationships: []

Paragraph 9:
Canadian Solar is one of the biggest solar power developers in Japan, having successfully bid for projects at every auction since the FiT programme was launched in 2017. It is also the sponsor of the Tokyo-listed Canadian Solar Infrastructure Fund, which offers access to efficient capital markets funding for long-term infrastructure assets.


Entities: [('Japan', 'GPE'), ('FiT', 'ORG'), ('Tokyo', 'GPE'), ('Canadian Solar Infrastructure Fund', 'ORG')]
Relationships: []

Paragraph 10:
“Canadian Solar is committed to working with all stakeholders in furthering Japan’s renewable energy journey. Obtaining financing that suits the country’s specific circumstances is key to enabling the construction phase of solar projects by tapping into the government’s strategic incentive programmes,” said Dr. Shawn Qu, Chairman and Chief Executive Officer of Canadian Solar Inc.


Entities: [('Japan', 'GPE'), ('Canadian Solar Inc.', 'ORG')]
Relationships: []

Paragraph 11:
The reduced tenor and capital markets-friendly structure of the Azuma Kofuji financing is unusual in Japan but is a regular feature in the US, applying Canadian Solar’s and Societe Generale’s global experience to the local market.


Entities: [('Japan', 'GPE'), ('US', 'GPE'), ('Societe Generale’s', 'ORG')]
Relationships: []

Paragraph 12:
This is Societe Generale’s first financing for a domestic Japanese solar project, having entered the Japanese renewables market in 2020 with financing for the Marubeni Corporation’s 140 MW Akita offshore wind project. 3 The Azuma Kofuji facility will be one of the largest solar projects in the country to date, critical to the meeting the country’s energy transition commitments, as well as playing a substantial role in the reconstruction of the Fukushima prefecture.


Entities: [('Societe Generale’s', 'ORG'), ('the Marubeni Corporation’s', 'ORG'), ('MW Akita', 'ORG')]
Relationships: []

Paragraph 13:



Entities: []
Relationships: []

Paragraph 14:



Entities: []
Relationships: []


In [17]:
entity1_list = []
relationship_list = []
entity2_list = []
date_list = []
money_list = []

In [18]:
# Iterate through each paragraph
for index, row in df.iterrows():
    doc = nlp(row['Paragraph Text'])  # Process the paragraph with spaCy

    for sent in doc.sents:
        # Extract named entities (countries, organizations)
        entities = []
        money_in_sentence = []

        for ent in sent.ents:
            if 'Inititative' in ent.text.lower(): 
                ent = ent._replace(label_='POL')
                entities.append(ent)
            elif ent.label_ in ['GPE', 'ORG', 'POL', 'MONEY']:
                entities.append(ent)

            # Collect monetary amounts
            if ent.label_ == "MONEY":
                money_in_sentence.append(ent.text)
        
        # Look for verbs or specific relationship nouns in the sentence
        verbs = [token for token in sent if token.pos_ == "VERB"]
        keywords_in_sentence = [token.text for token in sent if token.lemma_ in relationship_keywords]
        
        # Extract dates
        dates = [ent.text for ent in sent.ents if ent.label_ == "DATE"]

        # If we find two entities and a relationship keyword, we assume a relationship
        if len(entities) >= 2 and keywords_in_sentence:
            entity1_list.append(entities[0].text)

            # Concatenate the relationship noun and verb (if available) into one string
            relationship_info = keywords_in_sentence[0]
            if verbs:
                relationship_info += " (verb: " + verbs[0].lemma_ + ")"

            relationship_list.append(relationship_info)  # Store concatenated info
            entity2_list.append(entities[1].text)
            date_list.append(dates[0] if dates else "n/a")
            money_list.append(money_in_sentence[0] if money_in_sentence else "n/a")

In [19]:
# DataFrame
relationships_df = pd.DataFrame({
    "Entity1": entity1_list,
    "Relationship": relationship_list,
    "Entity2": entity2_list,
    "Date": date_list,
    "Money": money_list
})

In [20]:
relationships_df

Unnamed: 0,Entity1,Relationship,Entity2,Date,Money
0,FiT,support (verb: provide),FiP,,
1,Tokyo,funding (verb: list),Canadian Solar Infrastructure Fund,,


In [21]:
# Export to csv
relationships_df.to_csv('15_relationships.csv', index=False)