In [None]:
Sure, here are some of the methods of BeautifulSoup in web scraping and their explanations:

soup.find(): This method is used to find the first occurrence of a specified HTML tag in the BeautifulSoup object. You can search for tags based on their name, attributes, and text content.

soup.find_all(): This method is used to find all occurrences of a specified HTML tag in the BeautifulSoup object. You can search for tags based on their name, attributes, and text content.

soup.select(): This method is used to select HTML elements based on CSS selectors. You can use CSS selectors to select tags based on their attributes, class names, and parent-child relationships.

soup.get_text(): This method is used to extract the text content of an HTML document. It returns all the text in the document, including the text inside tags.

soup.prettify(): This method is used to format the HTML document in a human-readable way. It adds line breaks and indentation to the document to make it easier to read.

soup.contents: This method returns a list of all the direct child elements of an HTML tag. It can be used to iterate over the child elements of a tag and extract their contents.

soup.parent: This method returns the parent element of an HTML tag. It can be used to navigate up the HTML document and extract information from parent elements.

soup.next_sibling: This method returns the next sibling element of an HTML tag. It can be used to navigate to the next sibling element and extract information from it.

soup.previous_sibling: This method returns the previous sibling element of an HTML tag. It can be used to navigate to the previous sibling element and extract information from it.

These are just some of the many methods available in BeautifulSoup. Using these methods, you can extract information from HTML documents and scrape data from websites.


In [None]:
#find() method: This method returns the first matching tag, given the tag name and attributes.
from bs4 import BeautifulSoup

html_doc = """
<html>
<head>
<title>My Website</title>
</head>
<body>
<div id="content">
<h1>Welcome to my website!</h1>
<p>This is a paragraph of text.</p>
<a href="http://www.google.com">Google</a>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
# Find the first <div> tag with id="content"
div_tag = soup.find('div', {'id': 'content'})
print(div_tag)


<div id="content">
<h1>Welcome to my website!</h1>
<p>This is a paragraph of text.</p>
<a href="http://www.google.com">Google</a>
</div>


In [None]:
#find_all() method: This method returns a list of all matching tags, given the tag name and attributes.
from bs4 import BeautifulSoup

html_doc = """
<html>
<head>
<title>My Website</title>
</head>
<body>
<div class="article">
<h2>Article Title</h2>
<p>This is the first paragraph.</p>
<p>This is the second paragraph.</p>
</div>
<div class="article">
<h2>Another Article Title</h2>
<p>This is another paragraph.</p>
</div>
</body>
</html>
"""

soup = BeautifulSoup(html_doc, 'html.parser')

# Find all <div> tags with class="article"
#div_tags = soup.find_all('div', {'class': 'article'})
div_tags = soup.find_all('h2')
for div_tag in div_tags:
    print(div_tag)


<h2>Article Title</h2>
<h2>Another Article Title</h2>


In [None]:
#select() method: This method allows you to use CSS selectors to find matching tags.
from bs4 import BeautifulSoup

html_doc = """
<html>
<head>
<title>My Website</title>
</head>
<body>
<div class="article">
<h2>Article Title</h2>
<p>This is the first paragraph.</p>
<p>This is the second paragraph.</p>
</div>
<div class="article">
<h2>Another Article Title</h2>
<p>This is another paragraph.</p>
</div>
</body>
</html>
"""

soup = BeautifulSoup(html_doc, 'html.parser')

# Find all <div> tags with class="article"
#div_tags = soup.select('div.article')
div_tags = soup.select('h2')
for div_tag in div_tags:
    print(div_tag)


<h2>Article Title</h2>
<h2>Another Article Title</h2>


In [None]:
#Accessing tag attributes: You can use the dictionary-like syntax to access the attributes of a tag.from bs4 import BeautifulSoup

html_doc = """
<html>
<head>
<title>My Website</title>
</head>
<body>
<a href="http://www.google.com">Google</a>
</body>
</html>
"""

soup = BeautifulSoup(html_doc, 'html.parser')

# Find the <a> tag and print its href attribute
a_tag = soup.find('a')
print(a_tag['href'])


http://www.google.com


In [None]:
#Accessing tag text: You can use the .text property to access the text content of a tag.
from bs4 import BeautifulSoup

html_doc = """
<html>
<head>
<title>My Website</title>
</head>
<body>
<div id="content">
<h1>Welcome to my website!</h1>
<p>This is a paragraph of text.</p>
<a href="http://www.google.com">Google</a>
</div>
</body>
</html>
"""

soup = BeautifulSoup(html_doc, 'html.parser')

# Find the <h1> tag and print its text content
h1_tag = soup.find('h1')
print(h1_tag.text)


Welcome to my website!


In [None]:
from bs4 import BeautifulSoup
import requests
#get data from html page toa file text
f=requests.get("https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=Python&txtLocation=Mumbai&cboWorkExp1=0").text

soup=BeautifulSoup(f,'lxml')
print(soup)

<!DOCTYPE html>
<html><head>
<link href="https://fonts.googleapis.com/css?family=Poppins:400,500,600,700" rel="stylesheet"/>
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/materialize.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/global-usability.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/srp-usability.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<script src="https://static.timesjobs.com/newtj_js/scripts/tj_scripts/usability/jquery-3.3.1.min.js" type="text/javascript"></script>
<script src="https://static.timesjobs.com/newtj_js/scripts/jquery.tokeninput.js" type="text/javascript"></script>
<!-- <script type="text/javascript" src="https://code.angularjs.org/1.7.0/angular.min.js"></scrip

In [None]:
from bs4 import BeautifulSoup
import requests
#get data from html page toa file text
f=requests.get("https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=mumbai&cboWorkExp1=0").text

soup=BeautifulSoup(f,'lxml')
tags=soup.find_all('span')
for i in tags:
    print(i.text)
#print(soup)


close

Live Chat (10am to 6pm IST)
1

Job Alerts(10)
3 hrs ago




                Morgan Stanley Pvt Ltd
                
(12)

                Crimson Interactive Pvt Ltd
                
(3)

                Hiotron India Pvt Ltd
                
(3)

                Verisk Analytics India Private..
                
(3)

                Boyen Haddin Consultants Llp
                
(2)

                Dg7
                
(2)

                Interactive Brokers
                
(2)

                Larsen & Toubro Infotech L..
                
(2)

                Pagalguy
                
(2)

                Publicis Groupe
                
(2)

                Rangam Infotech Pvt. Ltd.
                
(2)

                Tac Security
                
(2)

                World Resources Institute
                
(2)

                Abc Consultants Pvt Ltd
                
(1)

                Axionconnect Infosolutions Pvt..
                
(

In [None]:
tag=soup.find_all('ul')
for i in tag:
    print(i.text)


TimesJobs
JobBuzz

Gulf Jobs
Career Insight


call0120 6358222

Live Chat (10am to 6pm IST)







notifications_none
1 
	



To see all recommended jobs, profile views, recruiter's contacted information, updates and more...

Login
or
Register





Login to View Notifications!




Forgot Password?




       	      Login-ID/Password cannot be left blank. Enter the full email ID (like: ram@gmail.com), if you are using registered email-id for login. 
	      








Job Alerts(10)

5 Matching Jobs 3 hrs ago










Login


Register




Job Alerts(10)

5 Matching Jobs 3 hrs ago








Companies







                Morgan Stanley Pvt Ltd
                 (12)





                Verisk Analytics India Private..
                 (4)





                Crimson Interactive Pvt Ltd
                 (3)





                Hiotron India Pvt Ltd
                 (3)





                Boyen Haddin Consultants Llp
                 (2)





                Dg7
         

In [None]:
tag=soup.find_all('ul',class_="list-job-dtl clearfix")
for i in tag:
    print(i.text)



Job Description:
Maxgen technologies pvt ltd offers live project internships in Mumbai. We are offering internships in core python  , django and machine learning.benefit to join maxgen ?Colleg... More Details


KeySkills:

python  ,  Css
        
       
directions_walk
Walk-in Details 10-Apr-2023 - 08-Jun-2023  09:00 AM - 04:00 PM   Maxgen Technologies Pvt Ltd.
Ambiance 1505,Sector 19 D,
Opposite R.T.O,
Backside Of Fortune Hotel,
Vashi , Navi Mumbai



Job Description:
Maxgen technologies pvt ltd offers live project internships in Mumbai. We are offering internships in python  , django and machine learning.Reasons behind choosing us?1 - Real... More Details


KeySkills:

python  ,  new
        
       
directions_walk
Walk-in Details 13-Mar-2023 - 12-May-2023  09:30 AM - 05:00 PM   Maxgen Technologies Pvt Ltd.
Ambiance 1505,Sector 19 D,
Opposite R.T.O,
Backside Of Fortune Hotel,
Vashi , Navi Mumbai



Job Description:
Maxgen technologies pvt ltd offers live project internship

In [None]:
from bs4 import BeautifulSoup
import requests
#get data from html page toa file text
f=requests.get("https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=python&txtLocation=mumbai&cboWorkExp1=0").text

soup=BeautifulSoup(f,'lxml')
print(soup)
job=soup.find('h3',class_="joblist-comp-name")
#company=job.find('h3')
print(company.text.strip())

<!DOCTYPE html>
<html><head>
<link href="https://fonts.googleapis.com/css?family=Poppins:400,500,600,700" rel="stylesheet"/>
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/materialize.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/global-usability.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<link href="https://static.timesjobs.com/newtj_css//css/tj_css/usability/srp-usability.css?v=7.1.7" media="all" rel="stylesheet" type="text/css"/>
<script src="https://static.timesjobs.com/newtj_js/scripts/tj_scripts/usability/jquery-3.3.1.min.js" type="text/javascript"></script>
<script src="https://static.timesjobs.com/newtj_js/scripts/jquery.tokeninput.js" type="text/javascript"></script>
<!-- <script type="text/javascript" src="https://code.angularjs.org/1.7.0/angular.min.js"></scrip