## WEB SCRAPPING S&P

https://www.spglobal.com/esg/solutions/data-intelligence-esg-scores

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


In [13]:
INDUSTRY_FIELDS = {
    "COM Construction Materials": ["Occupational Health & Safety", "Operational Eco-Efficiency", "Human Rights", "Business Ethics", "Risk & Crisis Management", "Talent Attraction & Retention", "Water Related Risks", "Climate Strategy"],
    "AUT Automobiles": ["Corporate Governance", "Human Capital Development", "Talent Attraction & Retention", "Operational Eco-Efficiency", "Low Carbon Strategy", "Occupational Health & Safety", "Climate Strategy", "Supply Chain Management", "Innovation Management"],
    "ELC Electric Utilities": ["Market Opportunities", "Electricity Generation", "Business Ethics", "Talent Attraction & Retention", "Climate Strategy", "Stakeholder Engagement", "Operational Eco-Efficiency", "Risk & Crisis Management", "Occupational Health & Safety"]
}

In [16]:
%%time
COMPANY = "Iberdrola, S.A." # "Iberdrola, S.A.", "CEMEX, S.A.B. de C.V.", "Ford Motor Company"

# CEMEX: https://www.spglobal.com/esg/scores/results?cid=4077095
driver = webdriver.Chrome()
driver.get("https://www.spglobal.com/esg/solutions/data-intelligence-esg-scores")

browser = driver.find_element(By.CLASS_NAME, "banner-search__input")
browser.send_keys(COMPANY)
browser.send_keys(Keys.RETURN)

# Ir sacando los valores númericos uno a uno por sus posiciones

# Wait to get fields until load the results for company
esg_score_present = EC.presence_of_element_located((By.ID, "esg-score"))
WebDriverWait(driver, 10).until(esg_score_present)

# 1. ESG score: id="esg-score"
esg_score = driver.find_element(By.ID, "esg-score")
esg_score_value = esg_score.text
print(f"ESG Score: {esg_score_value}")
# Image 1 (3 line charts): figure class="highcharts-figure"
# 2. Environmental score: "Environmental: CEMEX S.A.B. de C.V. 89"
enviromental_score = driver.find_element(By.XPATH, "//div[@class='dimention-chart1']/div[@class='DimensionScore__label']/ul/li[1]/span")
enviromental_score_value = enviromental_score.text
print(f"Environmental Score: {enviromental_score_value}")
# environmental_score = driver.find_element(By.CLASS_NAME, "dimention-chart1")
# 3. Social score: "Social: CEMEX S.A.B. de C.V. 71"
social_score = driver.find_element(By.XPATH, "//div[@class='dimention-chart2']/div[@class='DimensionScore__label']/ul/li[1]/span")
social_score_value = social_score.text
print(f"Social Score: {social_score_value}")
# 4. Governance & economic score: "Governance & Economic: CEMEX S.A.B. de C.V. 68"
gov_eco_score = driver.find_element(By.XPATH, "//div[@class='dimention-chart3']/div[@class='DimensionScore__label']/ul/li[1]/span")
gov_eco_score_value = gov_eco_score.text
print(f"Governance and economic Score: {gov_eco_score_value}")
# Image 2 (line chart)
# 5. Score History: "Score History"
history_score = driver.find_elements(By.XPATH, "//div[@id='line-series-chart']/div[@class='highcharts-container ']/*[name()='svg']/*[name()='g' and @class='highcharts-series-group']/*[name()='g' and contains(@class, 'highcharts-markers') and contains(@class, 'highcharts-tracker')]/*[name()='path' and @class='highcharts-point']") 
history_score_values = []
for point in history_score:
    values = point.get_attribute("aria-label").split(".")[1].strip().split(",")
    history_score_values.append((values[0].strip(), values[1].strip()))
print(f"Historic ESG Scores: {history_score_values}")
# Image 3 (Radar plot). Change based in industry 
spider_chart = driver.find_elements(By.XPATH, "//div[@id='spiderchartcontainer']/div[@class='highcharts-container ']/*[name()='svg']/*[name()='g' and @class='highcharts-series-group']/*[name()='g' and contains(@aria-label, 'series 1 of 3')]/*[name()='path' and @class='highcharts-point']") 
industry = driver.find_element(By.ID, "company-industry").text.split(":")[1].strip()
dimensions = {} 
for field in INDUSTRY_FIELDS[industry]:
    dimensions[field] = next(point.get_attribute("aria-label").split(".")[1].split(",")[1].strip() for point in spider_chart if field in point.get_attribute("aria-label"))
for key, value in dimensions.items():
    print(f"{key}: {value}")


# driver.close()


ESG Score: 89
Environmental Score: 89
Social Score: 90
Governance and economic Score: 89
Historic ESG Scores: [('2018', '87'), ('2019', '86'), ('2020', '87'), ('2021', '89'), ('2022', '89')]
Market Opportunities: 100
Electricity Generation: 92
Business Ethics: 93
Talent Attraction & Retention: 90
Climate Strategy: 93
Stakeholder Engagement: 100
Operational Eco-Efficiency: 90
Risk & Crisis Management: 100
Occupational Health & Safety: 72
CPU times: total: 78.1 ms
Wall time: 7.33 s
