### https://realpython.com/beautiful-soup-web-scraper-python/#step-3-parse-html-code-with-beautiful-soup

In [1]:
import requests
from bs4 import BeautifulSoup

In [85]:
def get_soup(URL :str) :
    """
    Convert an URL request into a BeautifulSoup item
    """
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")
    
    return soup

    

In [12]:
URL  = "https://realpython.github.io/fake-jobs/"
soup = get_soup(URL) 

In [42]:
# This allows to access 1 fake job position and his subtitle
i = 1
print(soup.find_all('h2')[i].text.strip())    #Title
print(soup.find_all('h3')[i].text.strip())    #Subtitle

Energy engineer
Vasquez-Davidson


## List all element by tag

In [39]:
def get_element_by_tag(soup, tag :str, class_ :str = None) -> list : 
    '''
    List all webpage element based on a keyword tag
    '''
    if class_ is None :
        tags = soup.find_all(tag)
    else :
        tags = soup.find_all(tag, class_ = class_)
    
    return [element.text.strip() for element in tags]
    

In [40]:
Title = get_element_by_tag(soup, 'h2')
Sub_title = get_element_by_tag(soup, 'h3')
location = get_element_by_tag(soup, 'p', class_="location")  

## Find Elements by Class Name and Text Content

In [56]:
def get_element_by_keyword(soup, tag :str, string :str = None) -> list : 
    '''
    List all webpage element based on a class and keyword tag
    '''
    if string is None :
        tags = soup.find_all(tag)
    else :
        string = string.lower() # Decaptilalize word
        tags = soup.find_all(tag, string = lambda x : string in x.lower())
    
    return [element.text.strip() for element in tags]
    

In [57]:
get_element_by_keyword(soup, 'h2', string="Python") 

['Senior Python Developer',
 'Software Engineer (Python)',
 'Python Programmer (Entry-Level)',
 'Python Programmer (Entry-Level)',
 'Software Developer (Python)',
 'Python Developer',
 'Back-End Web Developer (Python, Django)',
 'Back-End Web Developer (Python, Django)',
 'Python Programmer (Entry-Level)',
 'Software Developer (Python)']

In [76]:
def get_data_by_jobcard(soup, tag :str, string :str = None) -> list :
    '''
    List all card content related to a keyword
    '''
    if string is None :
        tags = soup.find_all(tag)
    else :
        string = string.lower() # Decaptilalize word
        tags = soup.find_all(tag, string = lambda x : string in x.lower())
    
    return [h2_element.parent.parent.parent for h2_element in python_jobs]

In [77]:
python_job_elements = get_data_by_jobcard(soup, 'Python') # List all card content related to python

In [83]:
def list_job_url(job_list : list) -> list :
    job_url = []
    
    for job in python_job_elements :
        links = job.find_all("a")
        for link in links:
            link_url = link["href"] # apply url
            job_url.append(link_url) # list all url 
    return job_url

In [84]:
job_list = list_job_url(python_job_elements)