In [2]:
# YouTube Link: https://www.youtube.com/watch?v=oDtLJEc5Ako
from bs4 import BeautifulSoup

# To keep things simple and also reproducible, consider the following HTML code
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; their names:
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>

<b class="boldest">Extremely bold</b>
<blockquote class="boldest">Extremely bold</blockquote>
<b id="1">Test 1</b>
<b another-attribute="1" id="verybold">Test 2</b>
"""

In [3]:
with open('index.html', 'w') as f:
    f.write(html_doc)

soup = BeautifulSoup(html_doc, "lxml")

#print(soup.prettify())  # for broad structureor
print(soup)             # for condenced structure

<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; their names:
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
<b class="boldest">Extremely bold</b>
<blockquote class="boldest">Extremely bold</blockquote>
<b id="1">Test 1</b>
<b another-attribute="1" id="verybold">Test 2</b>
</body></html>


## Tag:

In [4]:
print(soup.b)              # Finds the first occurrence of usage for a "b" bold tag.
print(soup.find('b'))      # The "find" function also does the same, where it only finds the 
                           #FIRST occurrence in the HTML doc of a tag with "b".

print(soup.find_all('b'))  # If we want to find all of the elements on the page with the "b" tag, we can use the 
                           #"find_all" function.
    
print(soup.b.name)         # NAME: This gives the name of the tag. In this case, the tag name is "b".

<b>The Dormouse's story</b>
<b>The Dormouse's story</b>
[<b>The Dormouse's story</b>, <b class="boldest">Extremely bold</b>, <b id="1">Test 1</b>, <b another-attribute="1" id="verybold">Test 2</b>]
b


## We can alter the name and have that reflected in the source. For instance:

In [5]:
tag = soup.b              
print(tag)
tag.name = "blockquote"    # An arbitrary name I have assigned to the 'b' tag
print(tag)

<b>The Dormouse's story</b>
<blockquote>The Dormouse's story</blockquote>


## Attributes:

In [6]:
tag=soup.find_all('b')[2]  # My index is 1 smaller than his in the video at time index 27:04
print(tag)

#print(tag['id'])           # This specific tag has the attribute "id", which can be accessed like so:

#tag=soup.find_all('b')[3]  # I understand why I get an error, but I don't understand why he doesn't get
#print(tag)                 #in the video at time index 27:00-30:00

<b another-attribute="1" id="verybold">Test 2</b>


## We can even access multiple attributes that are non-standard HTML attributes:

In [7]:
print("id", tag['id'])
print("another-attribute", tag['another-attribute'])

# If we want to see all attributes, we can access them as a dictionary object:
tag = soup.find_all('b')[2]
print(tag)
print(tag.attrs)

print(tag)                 # These properties are mutable, and we can alter them in this manner.
#tag['another-attribute']=2
#print(tag)

# We can also use Python's del command for lists to remove attributes:
#del tag['id']
#del tag['another-attribute']
#print(tag)

id verybold
another-attribute 1
<b another-attribute="1" id="verybold">Test 2</b>
{'another-attribute': '1', 'id': 'verybold'}
<b another-attribute="1" id="verybold">Test 2</b>


In [9]:
# Multi-valued Attributes
tag = soup.find_all('b')[2]
print(tag)
print(tag.string)

# We can use the "replace_with" function to replace
# the content of the string with something different:
tag.string.replace_with("This is another string")
print(tag)

# NavigableString

# BeautifulSoup

# Comments


<b another-attribute="1" id="verybold">Test 2</b>
Test 2
<b another-attribute="1" id="verybold">This is another string</b>
