In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### scrape one stock

send request without user agent

In [19]:
url = 'https://finance.yahoo.com/quote/AAPL/sustainability'
response = requests.get(url)
response

<Response [404]>

In [20]:
response.request.headers

{'User-Agent': 'python-requests/2.28.2', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': '*/*', 'Connection': 'keep-alive'}

sent request with user agent

In [8]:
url = 'https://finance.yahoo.com/quote/AAPL/sustainability'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
response = requests.get(url, headers={'User-Agent': user_agent} )
response

<Response [200]>

In [9]:
soup = BeautifulSoup(response.text, 'html.parser')

option 1: use soup.select

In [10]:
ESG_risk_score = soup.select("div.Fz\\(36px\\).Fw\\(600\\).D\\(ib\\).Mend\\(5px\\)")
print('Total ESG Risk Score is: ', ESG_risk_score[0].text)

Total ESG Risk Score is:  17


option 2: use soup.find_all

In [14]:
ESG_risk_score = soup.find_all('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
print('Total ESG Risk Score is: ', ESG_risk_score[0].text)

Total ESG Risk Score is:  17


option 3: use soup.find

In [15]:
ESG_risk_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
print('Total ESG Risk Score is: ', ESG_risk_score.text)

Total ESG Risk Score is:  17


### scrape many stocks

create function for parsing yahoo finance page to get ESG scores

In [68]:
def parse_soup(soup):
    
    esg_score = soup.find('div', {'class':'Fz(36px) Fw(600) D(ib) Mend(5px)'})
    other_scores = soup.find_all('div', {'class':'D(ib) Fz(23px) smartphone_Fz(22px) Fw(600)'})
    ctr_score = soup.find('div', {'class':'D(ib) Fz(36px) Fw(500)'})
    
    # ESG score may be unavailable for some tickers 
    # e.g. https://finance.yahoo.com/quote/alk/sustainability

    if esg_score is None:
        esg_score = 'N/A'
    else:
        esg_score = esg_score.text
        
    if ctr_score is None:
        ctr_score = 'N/A'
    else:
        ctr_score = ctr_score.text
        
    if len(other_scores) == 0:
        env_score = 'N/A'
        soc_score = 'N/A'
        gov_score = 'N/A'
    else:
        env_score = other_scores[0].text
        soc_score = other_scores[1].text
        gov_score = other_scores[2].text
        
    return [esg_score, env_score, soc_score, gov_score, ctr_score]

get ticker list from wikipedia

In [16]:
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies').text
sp_500 = pd.read_html(wiki_page)

In [45]:
tickers = sp_500[0]['Symbol'].to_list()
tickers

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ATVI',
 'ADM',
 'ADBE',
 'ADP',
 'AAP',
 'AES',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AMD',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'AON',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'AZO',
 'AVB',
 'AVY',
 'BKR',
 'BALL',
 'BAC',
 'BBWI',
 'BAX',
 'BDX',
 'WRB',
 'BRK.B',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BK',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'BG',
 'CHRW',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'CNC',
 'CNP',
 'CDAY',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA'

loop through all the tickers (note: the list is long, you may want to test with a smaller sample first e.g. tickers[:10] for first 10 tickers

In [71]:
scores_list = []
for ticker in tickers:
    url = 'https://finance.yahoo.com/quote/'+ ticker + '/sustainability'
    print(url) # for debugging
    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
    response = requests.get(url, headers={'User-Agent': user_agent})
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        scores = parse_soup(soup)
        scores.insert(0, ticker)
        scores_list.append(scores)

https://finance.yahoo.com/quote/MMM/sustainability
https://finance.yahoo.com/quote/AOS/sustainability
https://finance.yahoo.com/quote/ABT/sustainability
https://finance.yahoo.com/quote/ABBV/sustainability
https://finance.yahoo.com/quote/ACN/sustainability
https://finance.yahoo.com/quote/ATVI/sustainability
https://finance.yahoo.com/quote/ADM/sustainability
https://finance.yahoo.com/quote/ADBE/sustainability
https://finance.yahoo.com/quote/ADP/sustainability
https://finance.yahoo.com/quote/AAP/sustainability
https://finance.yahoo.com/quote/AES/sustainability
https://finance.yahoo.com/quote/AFL/sustainability
https://finance.yahoo.com/quote/A/sustainability
https://finance.yahoo.com/quote/APD/sustainability
https://finance.yahoo.com/quote/AKAM/sustainability
https://finance.yahoo.com/quote/ALK/sustainability
https://finance.yahoo.com/quote/ALB/sustainability
https://finance.yahoo.com/quote/ARE/sustainability
https://finance.yahoo.com/quote/ALGN/sustainability
https://finance.yahoo.com/qu

https://finance.yahoo.com/quote/DD/sustainability
https://finance.yahoo.com/quote/DXC/sustainability
https://finance.yahoo.com/quote/EMN/sustainability
https://finance.yahoo.com/quote/ETN/sustainability
https://finance.yahoo.com/quote/EBAY/sustainability
https://finance.yahoo.com/quote/ECL/sustainability
https://finance.yahoo.com/quote/EIX/sustainability
https://finance.yahoo.com/quote/EW/sustainability
https://finance.yahoo.com/quote/EA/sustainability
https://finance.yahoo.com/quote/ELV/sustainability
https://finance.yahoo.com/quote/LLY/sustainability
https://finance.yahoo.com/quote/EMR/sustainability
https://finance.yahoo.com/quote/ENPH/sustainability
https://finance.yahoo.com/quote/ETR/sustainability
https://finance.yahoo.com/quote/EOG/sustainability
https://finance.yahoo.com/quote/EPAM/sustainability
https://finance.yahoo.com/quote/EQT/sustainability
https://finance.yahoo.com/quote/EFX/sustainability
https://finance.yahoo.com/quote/EQIX/sustainability
https://finance.yahoo.com/quot

https://finance.yahoo.com/quote/MHK/sustainability
https://finance.yahoo.com/quote/MOH/sustainability
https://finance.yahoo.com/quote/TAP/sustainability
https://finance.yahoo.com/quote/MDLZ/sustainability
https://finance.yahoo.com/quote/MPWR/sustainability
https://finance.yahoo.com/quote/MNST/sustainability
https://finance.yahoo.com/quote/MCO/sustainability
https://finance.yahoo.com/quote/MS/sustainability
https://finance.yahoo.com/quote/MOS/sustainability
https://finance.yahoo.com/quote/MSI/sustainability
https://finance.yahoo.com/quote/MSCI/sustainability
https://finance.yahoo.com/quote/NDAQ/sustainability
https://finance.yahoo.com/quote/NTAP/sustainability
https://finance.yahoo.com/quote/NFLX/sustainability
https://finance.yahoo.com/quote/NWL/sustainability
https://finance.yahoo.com/quote/NEM/sustainability
https://finance.yahoo.com/quote/NWSA/sustainability
https://finance.yahoo.com/quote/NWS/sustainability
https://finance.yahoo.com/quote/NEE/sustainability
https://finance.yahoo.co

KeyboardInterrupt: 

In [None]:
scores_list

In [72]:
df_scores = pd.DataFrame(scores_list, columns=['Ticker', 'ESG_Score', 'Env_Score', 'Soc_Score', 'Gov_score', 'Ctr_score'])

In [73]:
df_scores

Unnamed: 0,Ticker,ESG_Score,Env_Score,Soc_Score,Gov_score,Ctr_score
0,MMM,34,12.3,13.6,7.7,3
1,AOS,25,7.3,11.9,6.3,0
2,ABT,25,3.0,13.6,8.4,3
3,ABBV,28,1.1,16.8,9.9,3
4,ACN,10,0.3,4.6,4.8,2
...,...,...,...,...,...,...
425,LUV,32,11,15,6,2
426,SWK,26,6,13,8,2
427,SBUX,25,6,14,4,3
428,STT,22,2,11,9,2


In [74]:
df_scores.to_excel('ESG scores.xlsx', index=False)