<a href="https://colab.research.google.com/github/joekakone/Web-Scraping-with-Python/blob/master/Extrcact_jobs_offers_with_BeautifuSoup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Collect Job offers online with BeautifulSoup
prepared by [Joseph Konka](https://www.linkedin.com/in/joseph-koami-konka/)

## Packages

In [None]:
import json

import requests
from bs4 import BeautifulSoup
import pandas as pd

## Paths & Config

In [None]:
BASE_URL = "https://www.emploi.tg/recherche-jobs-togo?page={}"

MOTHER_BALISE = "div"
MOTHER_CLASS = "search-results jobsearch-results"

ITEM_BALISE = "div"
ITEM_CLASS = "job-description-wrapper"
ITEM_TITLE_BALISE = "h5"
ITEM_RECRUTER_BALISE = "p"
ITEM_RECRUTER_CLASS = "job-recruiter"
SPLITER = "|"
ITEM_OVERVIEW_BALISE = "div"
ITEM_OVERVIEW_CLASS = "search-description"
ITEM_LINK_BALISE = "a"

WEBSITE_BALISE = "td"
WEBSITE_CLASS = "website-url"

FIELD_BALISE = "div"
FIELD_CLASS = "field-item even"

POSITION_MOTHER_BALISE = "div"
POSITION_MOTHER_CLASS = "ad-ss-title"
POSITION_BALISE = "strong"

CONTENT_BALISE = "div"
CONTENT_CLASS = "jobs-ad-details"

OUTPUT_FILE = "data.xlsx"

## Functions & Classes

In [None]:
class BaseScraper(object):
    def __init__(self, params=None):
        self.params = params

    @staticmethod
    def get_source(url):
        # retreive source code 
        code = requests.get(url)

        return BeautifulSoup(code.text, "html.parser")

    def extract_item_details(self):
        raise NotImplementedError

    def get_items(self):
        raise NotImplementedError

    @staticmethod
    def save(data, path):
        # Save data
        print(f"Saving data at {path}")
        writer = pd.ExcelWriter(path)
        data.to_excel(writer, index=False, sheet_name="Offers")

In [None]:
class EmploiTogoScraper(BaseScraper):
    def __init__(self, params=None):
        super().__init__(params=params)

    def get_item_details(self, url):
        print("->", url)
        soup = self.get_source(url)
        field = soup.find(FIELD_BALISE, {"class": FIELD_CLASS}).text
        content = soup.find(CONTENT_BALISE, {"class": CONTENT_CLASS}).text.strip()

        details = {
            "field": field,
            "content": content
        }
        return details

    def get_items(self, soup):
        soup_items = soup.find_all(ITEM_BALISE, {"class": ITEM_CLASS})
        items = []
        for soup_item in soup_items:
            title = soup_item.find(ITEM_TITLE_BALISE).text
            recruter = soup_item.find(ITEM_RECRUTER_BALISE, {"class": ITEM_RECRUTER_CLASS})
            date, recruter = recruter.text.split(SPLITER)
            date, recruter = date.strip().replace(".", "/"), recruter.strip()
            url = soup_item.get("data-href")
            overview = soup_item.find(ITEM_OVERVIEW_BALISE, {"class": ITEM_OVERVIEW_CLASS}).text
            details = self.get_item_details(url)
            items.append(
                {
                    "title": title,
                    "url": url,
                    "recruter": recruter,
                    "date": date,
                    "overview": overview,
                    "content": details["content"]
                }
            )
        return items

## Start collecting data

In [None]:
scraper = EmploiTogoScraper()

In [None]:
jobs = []
i = 0
while True:
    soup = scraper.get_source(BASE_URL.format(i))
    try:
        # Pagination
        items = scraper.get_items(soup)
        assert len(items) > 0
        jobs.extend(items)
        i += 1
    except:
        break


-> https://www.emploi.tg/offre-emploi-togo/techniciens-agricoles-163821
-> https://www.emploi.tg/offre-emploi-togo/charge-mission-ressources-humaines-etablissements-sante-hf-53421
-> https://www.emploi.tg/offre-emploi-togo/gestionnaire-facturation-166671
-> https://www.emploi.tg/offre-emploi-togo/data-scientist-dimensionnement-radio-166611
-> https://www.emploi.tg/offre-emploi-togo/chef-projet-ran-166603
-> https://www.emploi.tg/offre-emploi-togo/agent-accueil-operations-champions-166236
-> https://www.emploi.tg/offre-emploi-togo/developpeur-125049
-> https://www.emploi.tg/offre-emploi-togo/technicien-technicienne-informatique-hf-166421
-> https://www.emploi.tg/offre-emploi-togo/developpeur-front-end-165789
-> https://www.emploi.tg/offre-emploi-togo/agent-telemarketing-166234
-> https://www.emploi.tg/offre-emploi-togo/analyste-programmeur-developpeur-165776
-> https://www.emploi.tg/offre-emploi-togo/administrateur-systemes-reseaux-165778
-> https://www.emploi.tg/offre-emploi-togo/comme

## Transform into dataframe

In [None]:
data = pd.DataFrame(jobs)
data.head()

Unnamed: 0,title,url,recruter,date,overview,content
0,Techniciens Agricoles,https://www.emploi.tg/offre-emploi-togo/techni...,GEBANA TOGO,01/09/2021,Entreprise : Gebana Togo – Production et expor...,Détails de l'annonce\n\nPoste proposé : Techni...
1,Chargé Mission Ressources Humaines pour Etabli...,https://www.emploi.tg/offre-emploi-togo/charge...,OIGH,27/09/2021,Pour venir en appui à des établissements de sa...,Détails de l'annonce\n\nPoste proposé : Chargé...
2,Gestionnaire Facturation,https://www.emploi.tg/offre-emploi-togo/gestio...,T&T CONSULTING,23/09/2021,Au sein de la direction de Production du Clien...,Détails de l'annonce\n\nPoste proposé : Gestio...
3,Data Scientist - Dimensionnement Radio,https://www.emploi.tg/offre-emploi-togo/data-s...,T&T CONSULTING,23/09/2021,ACTIVITES PRINCIPALES :Vous déterminez les cri...,Détails de l'annonce\n\nPoste proposé : Data S...
4,Chef Projet RAN,https://www.emploi.tg/offre-emploi-togo/chef-p...,T&T CONSULTING,22/09/2021,Descriptif de la mission :• Être le porteur du...,Détails de l'annonce\n\nPoste proposé : Chef P...


## Save dat into Excel file

In [None]:
scraper.save(data, OUTPUT_FILE)

Saving data at data.xlsx


## Load the excel file

In [None]:
dt = pd.read_excel(OUTPUT_FILE, index_col=0)
dt.head()

Unnamed: 0,title,url,recruter,date,overview,content
0,Techniciens Agricoles,https://www.emploi.tg/offre-emploi-togo/techni...,GEBANA TOGO,01/09/2021,Entreprise : Gebana Togo – Production et expor...,\n\nDétails de l'annonce\n\nPoste proposé : Te...
1,Chargé Mission Ressources Humaines pour Etabli...,https://www.emploi.tg/offre-emploi-togo/charge...,OIGH,27/09/2021,Pour venir en appui à des établissements de sa...,\n\nDétails de l'annonce\n\nPoste proposé : Ch...
2,Gestionnaire Facturation,https://www.emploi.tg/offre-emploi-togo/gestio...,T&T CONSULTING,23/09/2021,Au sein de la direction de Production du Clien...,\n\nDétails de l'annonce\n\nPoste proposé : Ge...
3,Data Scientist - Dimensionnement Radio,https://www.emploi.tg/offre-emploi-togo/data-s...,T&T CONSULTING,23/09/2021,ACTIVITES PRINCIPALES :Vous déterminez les cri...,\n\nDétails de l'annonce\n\nPoste proposé : Da...
4,Chef Projet RAN,https://www.emploi.tg/offre-emploi-togo/chef-p...,T&T CONSULTING,22/09/2021,Descriptif de la mission :• Être le porteur du...,\n\nDétails de l'annonce\n\nPoste proposé : Ch...


## Let get in touch
[![Github Badge](https://img.shields.io/badge/-Github-000?style=flat-square&logo=Github&logoColor=white&link=https://github.com/joekakone)](https://github.com/joekakone) [![Facebook Badge](https://img.shields.io/badge/-Facebook-blue?style=flat-square&logo=Facebook&logoColor=white&link=https://www.facebook.com/josephkonka1999)](https://www.facebook.com/josephkonka1999) [![Linkedin Badge](https://img.shields.io/badge/-LinkedIn-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/joseph-koami-konka/)](https://www.linkedin.com/in/joseph-koami-konka/) [![Twitter Badge](https://img.shields.io/badge/-Twitter-blue?style=flat-square&logo=Twitter&logoColor=white&link=https://www.twitter.com/joekakone)](https://www.twitter.com/joekakone) [![Gmail Badge](https://img.shields.io/badge/-Gmail-c14438?style=flat-square&logo=Gmail&logoColor=white&link=mailto:joseph.kakone@gmail.com)](mailto:joseph.kakone@gmail.com)