<a href="https://colab.research.google.com/github/franklinokech/CeKe-Master-Database/blob/main/web_scrapping_brighter_monday_jobs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import sys
import pandas as pd

In [2]:
template = 'https://www.brightermonday.co.ke/jobs'

In [3]:
url = template

# Send a Get Request

In [4]:
response = requests.get(url)

In [5]:
response

<Response [200]>

In [6]:
response.reason

'OK'

In [7]:
soup = BeautifulSoup(response.text, 'html.parser')

In [8]:
cards = soup.find_all('header', 'search-result__header')

In [9]:
len(cards)

20

# Prototype The Model with a single record

In [10]:
card = cards[0]

In [11]:
card

<header class="search-result__header"><div class="customer-card--column"><div class="wrapper--inline-flex justify--space-between padding-lr-20"><div class="flex--3 wrapper--inline-flex align--center direction--row"><a class="search-result__job-title metrics-apply-now " href="https://www.brightermonday.co.ke/listings/client-relations-manager-must-be-fluent-in-portuguese-6kmgmd" onclick="window.dataLayer &amp;&amp; window.dataLayer.push({'listing_type': 'Normal'});" title="Client Relations Manager – (Must be fluent in Portuguese) "><h3>Client Relations Manager – (Must be fluent in Portuguese) </h3></a></div><div class="flex--2 wrapper--inline-flex justify--flex-end align--center direction--row width--120 max-width--260"></div></div><div class="if-content-panel padding-lr-20 flex-direction-top-to-bottom--under-lg align--start--under-lg search-result__job-meta"><a href="/company/brightermonday-consulting">
BrighterMonday Consulting
</a></div><div class="if-content-panel align--center paddi

In [12]:
heading = card.find('h3')

In [13]:
heading_data = heading.text.strip()

In [14]:
print(heading_data)

Client Relations Manager – (Must be fluent in Portuguese)


In [15]:
company = card.find('div', 'if-content-panel').text.strip()

In [16]:
company

'BrighterMonday Consulting'

In [17]:
location = card.find('div', 'search-result__location').text.strip()

In [18]:
location

'Nairobi'

In [19]:
job_type = card.find('span', 'search-result__job-type').text.strip()

In [20]:
job_salary_obj = card.find('div', 'search-result__job-salary').text.strip()
job_salary_obj

'KSh\n\nConfidential'

In [21]:
job_function = card.find('div', 'if-wrapper-row').text.strip()
job_function

'Job Function:\n\nCustomer Service & Support'

In [22]:
post_date = card.find('div', 'top-jobs__content__time').text.strip()
post_date

'14h'

# Generalise the Model with a function

In [23]:
def get_record(card):
  """Extract job data from a single record"""
  job_title_data = card.find('h3')
  job_title = job_title_data.text.strip()
  # job_url = 'https://www.indeed.com' + atag.get('href')
  company = card.find('div', 'if-content-panel').text.strip()
  job_type = card.find('span', 'search-result__job-type').text.strip()
  location = card.find('div', 'search-result__location').text.strip()
  job_salary = card.find('div', 'search-result__job-salary').text.strip()
  job_function = card.find('div', 'if-wrapper-row').text.strip()
  post_date = card.find('div', 'top-jobs__content__time').text.strip()
  today = datetime.today().strftime('%Y-%m-%d')


  record = (job_title, company, location, post_date, today, job_type, job_salary, job_function)

  return record

In [24]:
records = []

for card in cards:
  record = get_record(card)
  records.append(record)


In [25]:
len(records)

20

# Next page

In [26]:
page = 1
while page > 0:
  try:
    url = 'https://www.brightermonday.co.ke/jobs?page={}'.format(page)
    page += 1

    response = requests.get(url)

    if response.reason != 'OK':
      break
    else:
      soup = BeautifulSoup(response.text, 'html.parser')
      cards = soup.find_all('header', 'search-result__header')
      
      for card in cards:
        record = get_record(card)
        records.append(record)

  except :
    print('error occured')
    break
  

  

In [27]:
len(records)

969

In [28]:
len(records)

969

In [29]:
  # Save the job
  with open('kenyan_jobs.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)    
    writer.writerow(['job_title', 'company_name', 'location', 'post_date', 'ExtractDate', 'job_type', 'job_salary', 'job_function'])
    writer.writerows(records)

# Export to pandas df

In [30]:
type(records)
columns_name = ['job_title', 'company_name', 'location', 'post_date', 'ExtractDate', 'job_type', 'job_salary', 'job_function']
df = pd.DataFrame(records, columns=columns_name)

df.head()

Unnamed: 0,job_title,company_name,location,post_date,ExtractDate,job_type,job_salary,job_function
0,Client Relations Manager – (Must be fluent in ...,BrighterMonday Consulting,Nairobi,14h,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nCustomer Service & Support
1,Client Relations Regional Representative – Afr...,BrighterMonday Consulting,Nairobi,14h,2021-04-29,Full Time,KSh\n \nConfidential,Job Function:\n\nCustomer Service & Support
2,Full Time Lecturer,i Manage Limited,Nairobi,14h,2021-04-29,Full Time,KSh\n\nConfidential,"Job Function:\n\nResearch, Teaching & Training"
3,Writing and Document Management Consultancy,Agri Experience Ltd,Nairobi,16h,2021-04-29,Contract,KSh\n\nConfidential,Job Function:\n\nConsulting & Strategy
4,Automotive Technician,Confidential,Nairobi,16h,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nEngineering & Technology


# Filter Outside Kenya

In [31]:
df = df[df['location'] != 'Outside Kenya']

df

Unnamed: 0,job_title,company_name,location,post_date,ExtractDate,job_type,job_salary,job_function
0,Client Relations Manager – (Must be fluent in ...,BrighterMonday Consulting,Nairobi,14h,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nCustomer Service & Support
1,Client Relations Regional Representative – Afr...,BrighterMonday Consulting,Nairobi,14h,2021-04-29,Full Time,KSh\n \nConfidential,Job Function:\n\nCustomer Service & Support
2,Full Time Lecturer,i Manage Limited,Nairobi,14h,2021-04-29,Full Time,KSh\n\nConfidential,"Job Function:\n\nResearch, Teaching & Training"
3,Writing and Document Management Consultancy,Agri Experience Ltd,Nairobi,16h,2021-04-29,Contract,KSh\n\nConfidential,Job Function:\n\nConsulting & Strategy
4,Automotive Technician,Confidential,Nairobi,16h,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nEngineering & Technology
...,...,...,...,...,...,...,...,...
964,Chemicals Sales Executive,BrighterMonday Consulting,Nairobi,1mo,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nSales
965,Digital Marketing Intern,Confidential,Nairobi,1mo,2021-04-29,Internship & Graduate,"KSh\n\n15,000 - 30,000",Job Function:\n\nMarketing & Communications
966,CYBERCRIME ANTI-PIRACY ANALYST,BrighterMonday Consulting,Nairobi,1mo,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nSoftware & Data
967,ANTI-PIRACY AND SECURITY INVESTIGATOR,BrighterMonday Consulting,Nairobi,1mo,2021-04-29,Full Time,KSh\n\nConfidential,Job Function:\n\nProduct & Project Management


In [32]:
df.to_csv('kenya_only.csv', index=False)

# Putting It All Together

In [33]:
# Put is all tpgethr