### Scraping examples

https://youtu.be/7sFCOunKL_Y

In [10]:
import requests
from bs4 import BeautifulSoup
import json

url = 'https://finance.yahoo.com/quote/AAPL'

headers =  {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36'}
r = requests.get(url, headers=headers)

print(r.status_code)
#print(r.text)


200


### Bring site text into soup data

In [11]:
soup = BeautifulSoup(r.text, 'html.parser')

#print(soup.title.text)

### Query parts in data you need

#### Finding direct tags

In [12]:
#<fin-streamer class="Fw(b) Fz(36px) Mb(-4px) D(ib)" data-symbol="AAPL" data-test="qsp-price" data-field="regularMarketPrice" data-trend="none" data-pricehint="2" value="151.21" active="">151.21</fin-streamer>
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text

#<fin-streamer class="Fw(500) Pstart(8px) Fz(24px)" data-symbol="AAPL" data-test="qsp-price-change" data-field="regularMarketChange" data-trend="txt" data-pricehint="2" value="2.5" active=""><span class="C($positiveColor)">+2.50</span></fin-streamer>
change = soup.find('fin-streamer', {'class': 'Fw(500) Pstart(8px) Fz(24px)'}).text

print(price, change)

145.38 -5.83


#### Finding under sub tag

In [13]:
#<div class="D(ib) Mend(20px)"><fin-streamer class="Fw(b) Fz(36px) Mb(-4px) D(ib)" data-symbol="DIS" data-test="qsp-price" data-field="regularMarketPrice" data-trend="none" data-pricehint="2" value="110.87" active="">110.87</fin-streamer><fin-streamer class="Fw(500) Pstart(8px) Fz(24px)" data-symbol="DIS" data-test="qsp-price-change" data-field="regularMarketChange" data-trend="txt" data-pricehint="2" value="1.6800003" active=""><span class="C($positiveColor)">+1.68</span></fin-streamer> <fin-streamer class="Fw(500) Pstart(8px) Fz(24px)" data-symbol="DIS" data-field="regularMarketChangePercent" data-trend="txt" data-pricehint="2" data-template="({fmt})" value="0.015386026" active=""><span class="C($positiveColor)">(+1.54%)</span></fin-streamer><fin-streamer class="D(n)" data-symbol="DIS" changeev="regularTimeChange" data-field="regularMarketTime" data-trend="none" value="" active="true"></fin-streamer><fin-streamer class="D(n)" data-symbol="DIS" changeev="marketState" data-field="marketState" data-trend="none" value="" active="true"></fin-streamer><div id="quote-market-notice" class="C($tertiaryColor) D(b) Fz(12px) Fw(n) Mstart(0)--mobpsm Mt(6px)--mobpsm Whs(n)"><span>At close:  04:04PM EDT</span></div></div>
streamers = soup.find('div', {'class': 'D(ib) Mend(20px)'}).find_all('fin-streamer')
price = streamers[0].text
change = streamers[1].text

print(price, change)

145.38 -5.83


### Another way to get the sub tags

In [14]:
datatag = soup.find('div', {'class': 'D(ib) Mend(20px)'})
price = datatag.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
change = datatag.find('fin-streamer', {'class': 'Fw(500) Pstart(8px) Fz(24px)'}).text

print(price, change)

145.38 -5.83


### Put it all under function

In [15]:
def getData(symbol):
    print('Getting %s' % symbol)
    url = f'https://finance.yahoo.com/quote/{symbol}'
    headers =  {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36'}
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    streamers = soup.find('div', {'class': 'D(ib) Mend(20px)'}).find_all('fin-streamer')
    stock = {
        'symbol': symbol,
        'price': streamers[0].text,
        'change': streamers[1].text
    }
    return stock

### Function usage

In [16]:
mystocks = {
'ARGFX',
'RYBHX',
'PRGSX',
'VMFXX',
'VUSXX',
'VITAX',
'DFDIX',
'DIS',
'SPAXX',
'VBK'}
stockdata = []
for mystock in mystocks:
    stockdata.append(getData(mystock))


Getting DFDIX
Getting PRGSX
Getting ARGFX
Getting SPAXX
Getting VMFXX
Getting VUSXX
Getting VITAX
Getting VBK
Getting DIS
Getting RYBHX


### Output into JSON format

In [17]:
with open('stockdata.json', 'w') as f:
    json.dump(stockdata, f)

### Trying with lxml instead of html

In [18]:
web_content = BeautifulSoup(r.text, 'lxml')
price = web_content.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'})
price

<fin-streamer active="" class="Fw(b) Fz(36px) Mb(-4px) D(ib)" data-field="regularMarketPrice" data-pricehint="2" data-symbol="AAPL" data-test="qsp-price" data-trend="none" value="145.38">145.38</fin-streamer>