# Importações

In [5]:
import os
import re
import time

import numpy as np
import pandas as pd
import seaborn as sns

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

# testing
from traceback import print_stack
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import *
from utilities.custom_logger import CustomLogger
import logging

# Classes e Funções

In [6]:
class Job():
    """A class used to represent a job opportunity."""
    
    def __init__(self, position, company, location, posted_date, no_applicants, date_collected, type_workplace, 
                 required_skills, competitive_advantages, level, worktype, description, link_application, link_linkedin):
        """
        Parameters
        ----------
        position: str
            The name of the position
        company: str
            The name of the company
        location: str
            The location of the company
        posted_date: datetime
            When the job was posted approximately
        no_applicants: int
            How many people applied to the job
        date_collected: datetime
            When the job opportunity was registered by our system
        type_workplace: str
            If it is hybrid, remote or presential
        required_skills: str
            Which skills are considered essential for consideration of the candidate
        competitive_advantages: str
            Which skills would benefit the applicant but are not essential
        level: str
            The level of the job, i. e. junior, senior etc.
        worktype: str
            If it is full-time job, a contract etc.
        description: str
            The text that describes the various attributes of the opportunity, generally including information about the company, required skills, and competitive advantages.
        link_application: str
            Link to the application of the candidate.
        link_linkedin: str
            Link to the opportunity in linkedin.
            """

        self.position = position
        self.company = company
        self.location = location
        self.posted_date = posted_date
        self.no_applicants = no_applicants
        self.date_collected = date_collected
        self.type_workplace = type_workplace
        self.required_skills = required_skills
        self.competitive_advantages = competitive_advantages
        self.level = level
        self.worktype = worktype
        self.description = description
        self.link_application = link_application
        self.link_linkedin = link_linkedin

    @staticmethod
    def calculate_date(date):
        """This method converts the approximate date of the posting of a job. This is necessary because Linkedin
         uses the syntax 'há três semanas' or similar for informing the data.
         
         Parameters
         ----------
         date: str
            The str object that describes the data. Examples: 'há três semanas', 'há dois dias', 'há 45 minutos'.

        Returns
        -------
        datetime
            The converted and calculated date of the job posting (approx.).

         """
        
        try:
            quantity, type_of_date = date.split()[1:]
            quantity = int(quantity)
            if 'minuto' in type_of_date:
                return datetime.today() - timedelta(minutes=quantity)
            elif 'hora' in type_of_date:
                return datetime.today() - timedelta(hours=quantity)
            elif 'dia' in type_of_date:
                return datetime.today() - timedelta(days=quantity)
            elif 'semana' in type_of_date:
                return datetime.today() - timedelta(weeks=quantity)
            elif 'mes' in re.sub('ê', 'e', type_of_date):
                return datetime.today() - relativedelta(months=quantity)
        except:
            return np.nan
    
    def to_dict(self):
        """Returns a conversion of the Job object to dict, so it can be used in dataframes for example.
        
        Returns
        -------
        dict
            Attributes of the object converted to dict
        """

        return {
            'position': self.position,
            'company': self.company,
            'location': self.location,
            'posted_date': self.posted_date,
            'no_applicants': self.no_applicants,
            'date_collected': self.date_collected,
            'type_workplace': self.type_workplace,
            'required_skills': self.required_skills,
            'competitive_advantages': self.competitive_advantages,
            'level': self.level,
            'worktype': self.worktype,
            'description': self.description,
            'link_application': self.link_application,
            'link_linkedin': self.link_linkedin
        }
    
class Company():
    """A class used to represent a hiring company."""

    def __init__(self, name, size, sector):
        """
        Parameters
        ----------
        name: str
            Name of the company
        size: str
            How many employees it has
        sector: str
            Which sector it belongs to. Example: Technology etc.
        """

        self.name = name
        self.size = size
        self.sector = sector

    def to_dict(self):
        """Returns a conversion of the Company object to dict, so it can be used in dataframes for example.
        
        Returns
        -------
        dict
            Attributes of the object converted to dict
        """

        return {
            'name': self.name,
            'size': self.size,
            'sector': self.sector
        }


In [7]:
class SeleniumDriver():
    """Object with the main methods of Selenium adapted for easier use and to avoid duplication fo the code.
    
    Source:
        https://github.com/Shivam23Thaman/POM-Project/blob/master/base/selenium_driver.py
        https://medium.com/analytics-vidhya/creating-our-own-selenium-driver-class-in-python3-x-and-important-python-concepts-8bf92d702230
    """

    log = CustomLogger(logging.DEBUG)

    def __init__(self, driver):
        """
        Parameters
        ----------
        driver: WebDriver
            Driver object generated by Selenium; usually initiated by the method initialize_driver()
        """
        self.driver = driver

    def initialize_driver():
        """This method initializes the driver object.
        
        Returns
        -------
        Webdriver
            Object that allows using the Chrome browser.
        """

        # Options is used for saving the User Data, so it is not necessary to log in Linkedin every time
        options = webdriver.ChromeOptions()
        options.add_argument(r"--user-data-dir=C:\Users\alexa\AppData\Local\Google\Chrome\User Data\\")
        driver = webdriver.Chrome(executable_path='./resources/chromedriver.exe', options=options)
        return driver

    def screen_shot(self, resultMessage):
        """
        Takes screenshot of the current open web page
        """
        fileName = resultMessage + "." + str(round(time.time() * 1000)) + ".png"
        screenshotDirectory = "../screenshots/"
        relativeFileName = screenshotDirectory + fileName
        currentDirectory = os.path.dirname(__file__)
        destinationFile = os.path.join(currentDirectory, relativeFileName)
        destinationDirectory = os.path.join(currentDirectory, screenshotDirectory)

        try:
            if not os.path.exists(destinationDirectory):
                os.makedirs(destinationDirectory)
            self.driver.save_screenshot(destinationFile)
            self.log.info("Screenshot save to directory: " + destinationFile)
        except:
            self.log.error("### Exception Occurred when taking screenshot")
            print_stack()

    getTitle = lambda self: self.driver.title
    
    def get_by_type(self, locatorType):
        locatorType = locatorType.lower()
        if locatorType == "id":
            return By.ID
        elif locatorType == "name":
            return By.NAME
        elif locatorType == "xpath":
            return By.XPATH
        elif locatorType == "css":
            return By.CSS_SELECTOR
        elif locatorType == "class":
            return By.CLASS_NAME
        elif locatorType == "link":
            return By.LINK_TEXT
        elif locatorType == "tag":
            return By.TAG_NAME
        else:
            self.log.info("Locator type " + locatorType +
                          " not correct/supported")
        return False

    def get_element(self, locator, locatorType="id", parent=True, parent_element=None):
        element = None
        try:
            locatorType = locatorType.lower()
            byType = self.get_by_type(locatorType)
            if parent:
                element = self.driver.find_element(byType, locator)
            else:
                element = parent_element.find_element(byType, locator)
            self.log.info("Element found with locator: " + locator +
                          " and  locatorType: " + locatorType)
        except:
            self.log.info("Element not found with locator: " + locator +
                          " and locatorType: " + locatorType)
        return element

    def get_element_list(self, locator, locatorType="id", parent=True, parent_element=None):
        """
        Get list of elements
        """
        locatorType = locatorType.lower()
        byType = self.get_by_type(locatorType)
        if parent:
            elements = self.driver.find_elements(byType, locator)
        else:
            elements = parent_element.find_elements(byType, locator)
        if len(elements) > 0:
            self.log.info("Element list FOUND with locator: " + locator +
                          " and locatorType: " + locatorType)
        else:
            self.log.info("Element list NOT FOUND with locator: " + locator +
                              " and locatorType: " + locatorType)
        return elements

    def click_element(self, locator="", locatorType="id", element=None):
        """
        Click on an element -> MODIFIED
        Either provide element or a combination of locator and locatorType
        """
        try:
            if locator:  # This means if locator is not empty
                element = self.get_element(locator, locatorType)
            element.click()
            self.log.info("Clicked on element with locator: " + locator +
                          " locatorType: " + locatorType)
        except:
            self.log.info("Cannot click on the element with locator: " + locator +
                          " locatorType: " + locatorType)
            print_stack()

    def send_keys(self, data, locator="", locatorType="id", element=None):
        """
        Send keys to an element -> MODIFIED
        Either provide element or a combination of locator and locatorType
        """
        try:
            if locator:  # This means if locator is not empty
                element = self.get_element(locator, locatorType)
            element.send_keys(data)
            self.log.info("Sent data on element with locator: " + locator +
                          " locatorType: " + locatorType)
        except:
            self.log.info("Cannot send data on the element with locator: " + locator +
                  " locatorType: " + locatorType)
            print_stack()

    def clear_field(self, locator="", locatorType="id"):
        """
        Clear an element field
        """
        element = self.get_element(locator, locatorType)
        element.clear()
        self.log.info("Clear field with locator: " + locator +
                      " locatorType: " + locatorType)

    def get_text(self, locator="", locatorType="id", element=None, info=""):
        """
        NEW METHOD
        Get 'Text' on an element
        Either provide element or a combination of locator and locatorType
        """
        try:
            
            if locator: # This means if locator is not empty
                element = self.get_element(locator, locatorType)
            if not element:
                return None
            
            text = element.text
           
            if len(text) == 0:
                text = element.get_attribute("innerText")
            if len(text) != 0:
                self.log.info("Getting text on element :: " +  info)
                self.log.info("The text is :: '" + text + "'")
                text = text.strip()
        except:
            self.log.info("Failed to get text on element " + info)
            print_stack()
            text = None
        return text

    def is_element_present(self, locator="", locatorType="id", element=None):
        """
        Check if element is present -> MODIFIED
        Either provide element or a combination of locator and locatorType
        """
        try:
            if locator:  # This means if locator is not empty
                element = self.get_element(locator, locatorType)
            if element is not None:
                self.log.info("Element present with locator: " + locator +
                              " locatorType: " + locatorType)
                return True
            else:
                self.log.info("Element not present with locator: " + locator +
                              " locatorType: " + locatorType)
                return False
        except:
            print("Element not found")
            return False

    def is_element_displayed(self, locator="", locatorType="id", element=None):
        """
        NEW METHOD
        Check if element is displayed
        Either provide element or a combination of locator and locatorType
        """
        isDisplayed = False
        try:
            if locator:  # This means if locator is not empty
                element = self.get_element(locator, locatorType)
            if element is not None:
                isDisplayed = element.is_displayed()
                self.log.info("Element is displayed" )
            else:
                self.log.info("Element not displayed")
            return isDisplayed
        except:
            print("Element not found")
            return False

   # def element_presence_check(self, locator, byType):
        """
        Check if element is present
        """
        try:
            elementList = self.driver.find_elements(byType, locator)
            if len(elementList) > 0:
                self.log.info("Element present with locator: " + locator +
                              " locatorType: " + str(byType))
                return True
            else:
                self.log.info("Element not present with locator: " + locator +
                              " locatorType: " + str(byType))
                return False
        except:
            self.log.info("Element not found")
            return False

    def wait_for_element(self, locator, locatorType="id",
                               timeout=10, pollFrequency=0.5):
        element = None
        try:
            byType = self.get_by_type(locatorType)
            self.log.info("Waiting for maximum :: " + str(timeout) +
                  " :: seconds for element to be clickable")
            wait = WebDriverWait(self.driver, timeout=timeout,
                                 poll_frequency=pollFrequency,
                                 ignored_exceptions=[NoSuchElementException,
                                                     ElementNotVisibleException,
                                                     ElementNotSelectableException])
            element = wait.until(EC.element_to_be_clickable((byType, locator)))
            self.log.info("Element appeared on the web page")
        except:
            self.log.info("Element not appeared on the web page")
            print_stack()
        return element

    def web_scroll(self, direction="up", px=800):
        """
        NEW METHOD
        """
        if direction == "up":
            # Scroll Up
            self.driver.execute_script(f"window.scrollBy(0, -{px});")

        if direction == "down":
            # Scroll Down
            self.driver.execute_script(f"window.scrollBy(0, {px});")

    def switch_to_frame(self, id="", name="", title='',index=None):
        """
        Switch to iframe using element locator inside iframe

        Parameters:
            1. Required:
                None
            2. Optional:
                1. id    - id of the iframe
                2. name  - name of the iframe
                3. index - index of the iframe
        Returns:
            None
        Exception:
            None
        """
        if id:
            self.driver.switch_to.frame(id)
        elif name:
            self.driver.switch_to.frame(name)
        elif title:
            self.driver.switch_to.frame(title)
        else:
            self.driver.switch_to.frame(index)


    def switch_to_default_content(self):
        """
        Switch to default content

        Parameters:
            None
        Returns:
            None
        Exception:
            None
        """
        self.driver.switch_to.default_content()

    def get_element_attribute_value(self, attribute, element=None, locator="", locatorType="id"):
        """
        Get value of the attribute of element

        Parameters:
            1. Required:
                1. attribute - attribute whose value to find

            2. Optional:
                1. element   - Element whose attribute need to find
                2. locator   - Locator of the element
                3. locatorType - Locator Type to find the element

        Returns:
            Value of the attribute
        Exception:
            None
        """
        if locator:
            element = self.get_element(locator=locator, locatorType=locatorType)
        value = element.get_attribute(attribute)
        return value

    def is_enabled(self, locator, locatorType="id", info=""):
        """
        Check if element is enabled

        Parameters:
            1. Required:
                1. locator - Locator of the element to check
            2. Optional:
                1. locatorType - Type of the locator(id(default), xpath, css, className, linkText)
                2. info - Information about the element, label/name of the element
        Returns:
            boolean
        Exception:
            None
        """
        element = self.getElement(locator, locatorType=locatorType)
        enabled = False
        try:
            attributeValue = self.get_element_attribute_value(element=element, attribute="disabled")
            if attributeValue is not None:
                enabled = element.is_enabled()
            else:
                value = self.get_element_attribute_value(element=element, attribute="class")
                self.log.info("Attribute value From Application Web UI --> :: " + value)
                enabled = not ("disabled" in value)
            if enabled:
                self.log.info("Element :: '" + info + "' is enabled")
            else:
                self.log.info("Element :: '" + info + "' is not enabled")
        except:
            self.log.info("Element :: '" + info + "' state could not be found")
        return enabled

In [8]:
class LinkedinScraper(SeleniumDriver):
    """This class has the main methods used to scrape Linkedin. It is a subclass of SeleniumDriver, so it has the methods of Selenium."""

    def __init__(self, driver, searched_job):
        super(LinkedinScraper, self).__init__(driver)
        self.searched_job = searched_job
        self.final_link = f'https://www.linkedin.com/jobs/search/?keywords={searched_job}&location=Brasil&geoId=106057199'


    def get_linkedin(self, page=0):
        """Access the Linkedin job opportunities page.
        
        Parameters
        ----------
        page: int
            Allows to access diferent pages of the job opportunities page
        """

        start = page * 25
        self.driver.get(f'{self.final_link}&start={start}')
        print(f'{self.final_link}+&start={start}')

    def get_data_from_linkedin_page(self, limit=0):
        """Access the list of jobs from a Linkedin page and get its data.
        
        Parameters
        ----------
        limit: int, optional
            For faster testing, you can limit the amount of jobs that it will get from a page

        Returns
        -------
        list
            List of Job objects with its data
        """

        # Waits for loading of the sidebar with the jobs
        self.wait_for_element('scaffold-layout__list-container', 'class')
        job_list = self.get_element('scaffold-layout__list-container', 'class')
        job_list = self.get_element_list('a', 'tag', False, parent_element=job_list)
        # Scrolls down the sidebar so the jobs can be loaded
        for i in range(10):
            self.send_keys(Keys.PAGE_DOWN, element=job_list[0])
        # Get the data of the jobs again, but now complete
        job_list = self.get_element('scaffold-layout__list-container', 'class')
        job_list = self.get_element_list('a', 'tag', False, parent_element=job_list)

        job_collection = []
        i = 0
        
        for i, job in enumerate(job_list):
            # Access the job and waits for loading
            try:
                new_job = self.get_job_data(job, i)
                job_collection.append(new_job)
            except Exception as e:
                self.log.info(f'-------> {e}')

            
            if limit > 0:
                i+=1
                if i == limit:
                    break
        
        return job_collection
    
    def get_job_data(self, job, i):
        """Get data from a specific job of Linkedin
        
        Parameters
        ----------
        job: SeleniumWebElement
            Info of a given job from Linkedin

        Returns
        -------
        Job object
            Info about the job as attributes of the Job object
        """
        if i != 0:
            job.click()
            time.sleep(1)
        # Waits for loading
        self.wait_for_element('jobs-unified-top-card__job-insight', 'class', timeout=20)
        self.wait_for_element('jobs-unified-top-card__posted-date', 'class', timeout=20)
        self.wait_for_element('//*[@id="job-details"]/span', 'xpath', timeout=20)
        job_content = self.get_element('jobs-unified-top-card__content--two-pane', 'class')
        
        new_job = Job(
            position=self.get_text(element=self.get_element('h2', 'tag', False, job_content)),
            company=Company(
                self.get_text(element=self.get_element('jobs-unified-top-card__company-name', 'class', False, job_content)),
                size=self.get_company_size(job_content),
                sector=self.get_company_sector(job_content)
            ),
            location=self.get_text(element=self.get_element('jobs-unified-top-card__bullet', 'class', False, job_content)),
            posted_date=Job.calculate_date(self.get_text(element=self.get_element('jobs-unified-top-card__posted-date', 'class', False, job_content))),
            no_applicants=self.get_text(element=self.get_element('jobs-unified-top-card__applicant-count', 'class', False, job_content)),
            date_collected=datetime.now(),
            type_workplace=self.get_text(element=self.get_element('jobs-unified-top-card__workplace-type', 'class', False, job_content)),
            required_skills=self.get_skills(job_content),
            competitive_advantages=None,
            level=self.get_level(job_content),
            worktype=self.get_worktype(job_content),
            description=self.get_text(element=self.get_element('job-details', 'id')),
            link_application=self.get_link_application(job_content),
            link_linkedin=self.get_link_linkedin(job_content)
        )
        return new_job

        
    def get_job_insights(self, job_content):
        """Gets the information from the session 'insights', where Linkedin describes some characteristics of the job"""
        self.wait_for_element('//*[@id="main"]/div/div[2]/div/div[2]/div[1]/div/div[1]/div/div[1]/div[1]/div[3]/ul/li[2]', 'xpath', timeout=20)
        job_insight = self.get_element('//*[@id="main"]/div/div[2]/div/div[2]/div[1]/div/div[1]/div/div[1]/div[1]/div[3]', 'xpath', False, job_content)
        job_insights = self.get_element_list('li', 'tag', False, job_insight)
        return [self.get_text(element=insight) for insight in job_insights]
        

    def get_company_size(self, job_content):
        job_insights = self.get_job_insights(job_content)
        if '·' in job_insights[1]:
            return job_insights[1].split(' · ')[0]
        return job_insights[1]

    def get_company_sector(self, job_content):
        job_insights = self.get_job_insights(job_content)
        if '·' in job_insights[1]:
            return job_insights[1].split(' · ')[1]
        return np.nan

    def get_skills(self, job_content):
        try:
            self.wait_for_element('jobs-unified-top-card__job-insight-text-button', 'class')
            btn_skills = self.get_element('jobs-unified-top-card__job-insight-text-button', 'class')
            btn_skills.click()
            self.wait_for_element('job-details-skill-match-status-list__unmatched-skill', 'class')
            #time.sleep(2)
            list_skills = self.get_element('job-details-skill-match-status-list', 'class')
            list_skills = self.get_element_list('display-flex', 'class', False, list_skills)
            btn_close = self.get_element('artdeco-button', 'class')
            list_skills = [self.get_text(element=skill) for skill in list_skills if self.get_text(element=skill) != ""]
            btn_close.click()
            return list_skills
        except:
            return None
    
    
    def get_worktype(self, job_content):
        """Gets the information about the worktype"""
        job_insights = self.get_job_insights(job_content)
        if '·' in job_insights[0]:
            return job_insights[0].split('·')[0].strip()
        return job_insights[0]
    
    def get_level(self, job_content):
        """Gets the information about the level of the job"""
        job_insights = self.get_job_insights(job_content)
        if '·' in job_insights[0]:
            return job_insights[0].split('·')[1].strip()
        return None

    def get_link_application(self, job_content):
        btn_application = self.get_element('jobs-apply-button--top-card', 'class')
        if self.get_text(element=btn_application) == 'Candidatura simplificada':
            return self.get_link_linkedin(job_content)
        else:
            btn_application.click()
            original_window = self.driver.current_window_handle
            for window_handle in self.driver.window_handles:
                if window_handle != original_window:
                    self.driver.switch_to.window(window_handle)
                    new_html = self.driver.current_url
                    self.driver.close()
                    break
            self.driver.switch_to.window(original_window)
            return new_html
    
    def get_link_linkedin(self, job_content):
        link = self.get_element('a', 'tag', False, job_content)
        return self.get_element_attribute_value('href', link)
    

# Data Extraction   

A página de vagas do Linkedin é dividida em dois painéis, um com a lista de vagas e outra com a descrição da vaga selecionada, começando a partir da primeira. A lista de vagas é carregada na medida em que descemos por ela, então o comando .execute_script irá fazer um scroll down para carregarmos todas as vagas da primeira página.

Em seguida, guardamos todas as vagas numa lista de WebElements.

In [9]:
scraper = LinkedinScraper(LinkedinScraper.initialize_driver(), 'analista de dados')
df_jobs = pd.DataFrame()
for page in range(40):
    print('scraping page', page)    
    scraper.get_linkedin(page)
    jobs = scraper.get_data_from_linkedin_page()
    df_jobs = pd.concat([df_jobs, pd.DataFrame([job.to_dict() for job in jobs])], axis=0)
df_jobs['company'] = df_jobs['company'].map(lambda x: Company.to_dict(x))
df_jobs['company_name'] = df_jobs['company'].map(lambda x: x['name'])
df_jobs['company_size'] = df_jobs['company'].map(lambda x: x['size'])
df_jobs['company_sector'] = df_jobs['company'].map(lambda x: x['sector'])

  driver = webdriver.Chrome(executable_path='./resources/chromedriver.exe', options=options)


scraping page 0
https://www.linkedin.com/jobs/search/?keywords=analista de dados&location=Brasil&geoId=106057199+&start=0


  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\alexa\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\alexa\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1043, in launch_instance
    app.start()
  File "C:\Users\alexa\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 725, in start
    self.io_loop.start()
  File "C:\Users\alexa\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "c:\Users\alexa\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 607, in run_forever
    self._run_once()
  File "c:\Users\alexa\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 1922, in _run_once
    handle._run()
  File "c:\Users\alexa\AppData\Loc

scraping page 1


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=114.0.5735.110)
Stacktrace:
Backtrace:
	GetHandleVerifier [0x00F68893+48451]
	(No symbol) [0x00EFB8A1]
	(No symbol) [0x00E05058]
	(No symbol) [0x00DED073]
	(No symbol) [0x00E4DEBB]
	(No symbol) [0x00E5BFD3]
	(No symbol) [0x00E4A0B6]
	(No symbol) [0x00E27E08]
	(No symbol) [0x00E28F2D]
	GetHandleVerifier [0x011C8E3A+2540266]
	GetHandleVerifier [0x01208959+2801161]
	GetHandleVerifier [0x0120295C+2776588]
	GetHandleVerifier [0x00FF2280+612144]
	(No symbol) [0x00F04F6C]
	(No symbol) [0x00F011D8]
	(No symbol) [0x00F012BB]
	(No symbol) [0x00EF4857]
	BaseThreadInitThunk [0x75D57D59+25]
	RtlInitializeExceptionChain [0x7721B74B+107]
	RtlClearBits [0x7721B6CF+191]


In [46]:
df_jobs.shape

(757, 17)

In [48]:
# Saving the data
data = datetime.now()
data = f'{data.year}.{data.month}.{data.day} - {data.hour}-{data.minute}'
df_jobs.to_csv(f'data/data_jobs_{data}.csv', sep=';', index=None)