# <font color='#2F4F4F'>Web Scraping with Python Project

## <font color='#2F4F4F'>Prerequisites</font>

In [12]:
# We first import the required libraries
# ---
#
import pandas as pd             # library for data manupation
import requests                 # library for fetching a web page 
from bs4 import BeautifulSoup   # library for extrating contents from a webpage 

## <font color='#2F4F4F'>Step 1: Obtaining our Data</font>

In [13]:
# PigiaMe: https://www.pigiame.co.ke/it-software-jobs
# ---
#
pigia_me = requests.get('https://www.pigiame.co.ke/it-software-jobs')
pigia_me

<Response [200]>

In [14]:
# MyJobMag: https://www.myjobmag.co.ke/jobs-by-field/information-technology
# ---
#
my_job_mag = requests.get('https://www.myjobmag.co.ke/jobs-by-field/information-technology')
my_job_mag

<Response [200]>

In [15]:
# KenyanJob: https://www.kenyajob.com/job-vacancies-search-kenya?f%5B0%5D=im_field_offre_secteur%3A133
# ---
#
kenyan_job = requests.get('https://www.kenyajob.com/job-vacancies-search-kenya?f%5B0%5D=im_field_offre_secteur%3A133')
kenyan_job

<Response [200]>

## <font color='#2F4F4F'>Step 2: Parsing</font>

In [16]:
# Parsing our document: pigia_me
# ---
# 
soup_pm = BeautifulSoup(pigia_me.content, "html.parser")

In [17]:
# Parsing our document: my_job_mag
# ---
#  
soup_mjm = BeautifulSoup(my_job_mag.text, "html.parser")

In [18]:
# Parsing our document: kenyan_job
# ---
# 
soup_kj = BeautifulSoup(kenyan_job.text, "html.parser")

## <font color='#2F4F4F'>Step 3: Extracting Required Elements</font>

In [19]:
# 1. Extracting job titles and links: pigia me
# ---
# 
jobs = soup_pm.find_all("div", "listings-cards__list-item")

titles = []
links = []

for job in jobs:
  title = job.find('div', 'listing-card__header__title').get_text().strip()
  link =  job.a.get('href')
  titles.append(title)
  links.append(link)

# Create a Pandas dataframe from the lists
# df = pd.DataFrame({'Title': titles, 'Link': links})
# df
  



In [20]:
# 2. Extracting job titles: my_job_mag
# ---
# 

jobs_mgm = soup_mjm.find_all("li", "job-info")

titles_mjm = []
links_mjm = []

for job in jobs_mgm:
  title = job.h2.a.get_text().strip()
  link = 'https://www.myjobmag.co.ke/jobs-by-field/information-technology' + job.h2.a.get('href')
  titles_mjm.append(title)
  links_mjm.append(link)

# Create a Pandas dataframe from the lists
# df_mjm = pd.DataFrame({'Title': titles_mjm, 'Link': links_mjm})
# df_mjm


In [21]:
# 3. Extracting job titles: kenya_job
# ---

jobs_kj = soup_kj.find_all('div', 'col-lg-5 col-md-5 col-sm-5 col-xs-12 job-title')

titles_kj = []
links_kj = []

for job in jobs_kj:
  title = job.h5.a.get_text().strip()
  link = 'https://www.kenyajob.com/job-vacancies-search-kenya?f%5B0%5D=im_field_offre_secteur%3A133' + job.h5.a.get('href')
  titles_kj.append(title)
  links_kj.append(link)

# Create a Pandas dataframe from the lists
# df_kj = pd.DataFrame({'Title': titles_kj, 'Link': links_kj})
# df_kj

## <font color='#2F4F4F'>Step 4: Saving our Data</font>

In [22]:
# Saving the scraped contents in a dataframe and preview our data
# ---
#

df_All = pd.DataFrame({'Title': (titles_kj + titles_mjm + titles), 'Link': (links_kj + links_mjm + links)})
df_All

Unnamed: 0,Title,Link
0,JAVA EE / JAVA 8 Developer with SQL Skills,https://www.kenyajob.com/job-vacancies-search-...
1,Senior Freelance Web Designer,https://www.kenyajob.com/job-vacancies-search-...
2,CCTV and Fire Alarms Systems Technician,https://www.kenyajob.com/job-vacancies-search-...
3,Information Technology Sales Specialist,https://www.kenyajob.com/job-vacancies-search-...
4,AWS Cloud Architect (M/F),https://www.kenyajob.com/job-vacancies-search-...
5,AWS Solutions Architect (M/F),https://www.kenyajob.com/job-vacancies-search-...
6,AZURE Solutions Architect (M/F),https://www.kenyajob.com/job-vacancies-search-...
7,Cloud Architect (M/F),https://www.kenyajob.com/job-vacancies-search-...
8,Cloud Computing and Virtualization Engineer (M/F),https://www.kenyajob.com/job-vacancies-search-...
9,Cloud Engineer (M/F),https://www.kenyajob.com/job-vacancies-search-...
