In [2]:
from bs4 import BeautifulSoup

In [3]:
html = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>First HTML Page</title>
</head>
<body>
  <div id="first">
    <h3 data-example="yes">hi</h3>
    <p>more text.</p>
  </div>
  <ol>
    <li class="special">This list item is special.</li>
    <li class="special">This list item is also special.</li>
    <li>This list item is not special.</li>
  </ol>
  <div data-example="yes">bye</div>
</body>
</html>
"""

In [4]:
soup = BeautifulSoup(html, "html.parser")
type(soup)

bs4.BeautifulSoup

In [7]:
print(soup.body.p)

<p>more text.</p>


In [9]:
print(soup.find_all('div'))

[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>, <div data-example="yes">bye</div>]


In [10]:
print(soup.find_all(class_='special'))

[<li class="special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [12]:
print(soup.find_all(attrs={'data-example': 'yes'}))

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]


In [13]:
print(soup.select('#first'))

[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>]


In [14]:
print(soup.select('.special'))

[<li class="special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [15]:
print(soup.select('ol li'))

[<li class="special">This list item is special.</li>, <li class="special">This list item is also special.</li>, <li>This list item is not special.</li>]


In [16]:
print(soup.select('[data-example]'))

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]


In [18]:
stuff = soup.select('ol li')
for li in stuff:
    print(li.get_text())

This list item is special.
This list item is also special.
This list item is not special.


In [23]:
stuff = soup.select('.special')
stuff[0].get_text()

'This list item is special.'

In [25]:
for element in stuff:
    print(element.name, ':', element.get_text())

li : This list item is special.
li : This list item is also special.


In [26]:
for element in stuff:
    print(f'{element.name} with attributes {element.attrs} contains text: {element.get_text()}')

li with attributes {'class': ['special']} contains text: This list item is special.
li with attributes {'class': ['special']} contains text: This list item is also special.


In [29]:
soup.select('#first')[0].attrs['id']

'first'

In [30]:
soup.body.contents

['\n',
 <div id="first">
 <h3 data-example="yes">hi</h3>
 <p>more text.</p>
 </div>,
 '\n',
 <ol>
 <li class="special">This list item is special.</li>
 <li class="special">This list item is also special.</li>
 <li>This list item is not special.</li>
 </ol>,
 '\n',
 <div data-example="yes">bye</div>,
 '\n']

In [31]:
listas = soup.body.ol
listas

<ol>
<li class="special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>

In [37]:
from bs4.element import Tag

for item in listas.children:
    if type(item) == Tag:
        print(item)

<li class="special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>


In [40]:
item.parent.parent

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>

In [46]:
pirmas_li = soup.find('li')
print(pirmas_li)
print(li.find_next_sibling())
print(li.find_next_siblings())

<li class="special">This list item is special.</li>
<li>This list item is not special.</li>
[<li>This list item is not special.</li>]


In [52]:
import requests
html = requests.get('https://www.aic.lt/ispardavimas').content

In [53]:
aic_sales_page = BeautifulSoup(html, "html.parser")
print(aic_sales_page)

<!DOCTYPE html>

<html lang="lt">
<head>
<meta charset="utf-8"/>
<meta content="ie=edge" http-equiv="x-ua-compatible"/>
<title>Sumažinta kaina</title>
<script data-keepinline="true">
    var ajaxGetProductUrl = '//www.aic.lt/modulis/cdc_googletagmanager/async';
    var ajaxShippingEvent = 1;
    var ajaxPaymentEvent = 1;

/* datalayer */
dataLayer = window.dataLayer || [];
    let cdcDatalayer = {"pageCategory":"pricesdrop","event":null,"ecommerce":{"currency":"EUR"}};
    dataLayer.push(cdcDatalayer);

/* call to GTM Tag */
(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-M75MWG');

/* async call to avoid cache system for dynamic data */
var cdcgtmreq = new XMLHttpRequest();
cdcgtmreq.onreadystatechange = fu

In [56]:
aic_sales_page.select('#products')

[<section id="products">
 <div>
 <div class="row products-selection my-3" id="js-product-list-top">
 <div class="col-lg-6 visible--desktop total-products">
 <p class="product__quantity">Yra 187 produktai</p>
 </div>
 <div class="col-lg-6">
 <div class="sort-by-row justify-content-md-end">
 <div class="form-inline">
 <div class="form-group mb-0">
 <label class="visible--desktop" for="select-sort-order">Rikiuoti pagal:</label>
 <select class="custom-select ml-sm-2" id="select-sort-order">
 <option value="https://www.aic.lt/ispardavimas?order=product.position.asc">Numatytasis rūšiavimas</option>
 <option value="https://www.aic.lt/ispardavimas?order=product.date_add.desc">Naujausios prekės</option>
 <option value="https://www.aic.lt/ispardavimas?order=product.name.asc">Pavadinimas, nuo A iki Z</option>
 <option value="https://www.aic.lt/ispardavimas?order=product.name.desc">Pavadinimas, nuo Z iki A</option>
 <option selected="selected" value="https://www.aic.lt/ispardavimas?order=product.p

In [57]:
products = aic_sales_page.select('#products h2')
products

[<h2 class="h3 product-title text-left"><a href="https://www.aic.lt/sublimuotas-maistas/1166-meatmakers-tactical-original-pork-jerky-vytinta-kiauliena-477902797209.html">MeatMakers Tactical Original Pork Jerky (vytinta kiauliena)</a></h2>,
 <h2 class="h3 product-title text-left"><a href="https://www.aic.lt/ivairus-laikikliai-karabinai/7678-nite-ize-tvirtinimas-gear-tie-6-juoda-094664018037.html">NITE-IZE tvirtinimas Gear Tie 6' (juoda)</a></h2>,
 <h2 class="h3 product-title text-left"><a href="https://www.aic.lt/isgyvenimo-reikmenys/1514-hipotermine-antklode-rothco-613902103207.html">Hipoterminė išgyvenimo antklodė Rothco</a></h2>,
 <h2 class="h3 product-title text-left"><a href="https://www.aic.lt/ivairus-laikikliai-karabinai/1997-nite-ize-plastik-karabinas-sbp6-juoda-094664019263.html">NITE-IZE plastik. karabinas SBP6 (juoda)</a></h2>,
 <h2 class="h3 product-title text-left"><a href="https://www.aic.lt/ivairus-laikikliai-karabinai/1983-nite-ize-tvirtinimas-gear-tie-12-juoda-094664019

In [64]:
for product in products:
    kaina = product.find_next(class_='price')
    print(kaina.get_text(), product.get_text())

1,50 € MeatMakers Tactical Original Pork Jerky (vytinta kiauliena)
2,50 € NITE-IZE tvirtinimas Gear Tie 6' (juoda)
3,00 € Hipoterminė išgyvenimo antklodė Rothco
3,50 € NITE-IZE plastik. karabinas SBP6 (juoda)
3,50 € NITE-IZE tvirtinimas Gear Tie 12' (juoda) 2 vnt
4,00 € NITE-IZE tvirtinimas Gear Tie 18' (juoda) 2 vnt
5,50 € Rankšluostis JR GEAR Quick Dry, L-XL, Baltic (mėlyna)
14,00 € Rankšluostis JR GEAR Quick Dry, L-XL, Beetle (žalia)
6,00 € Krepšelis TT Dump Pouch Light (juoda)
6,00 € NITE-IZE tel. dėklas CashBack Wallet
4,50 € ALOKSAK maišeliai 2vnt.13x12cm (ALOK2-5x4)
9,00 € NITE-IZE tvirtinimo virvė CamJam 6 ft.
10,00 € NITE-IZE įrankis Doohickey Ratchet Tool
10,00 € NITE-IZE magnetas Steelie Orbiter magnetic socket and metal plate
10,00 € NITE-IZE tvirtinimo virvė CamJam 12 ft.
11,00 € Pirštinės ARSENAL Pilot (Nomex)
6,50 € ALOKSAK maišeliai 2vnt 16x16cm (ALOK2-6x6)
14,00 € NITE-IZE tvirtinimo virvė CamJam 18 ft.
7,00 € OPSAK maišeliai 2vnt (EU-OPD2-7x7) 17.1 x 17.8cm
15,00 € Ne