In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
import math
def find_no_of_jobs(job):
    req  = requests.get('https://wuzzuf.net/search/jobs/?a=hpb&q=' + job.replace(' ', '%20'))
    soup = BeautifulSoup(req.content, 'lxml')
    jobs = int(soup.find({'strong'}).text.replace(',', ''))
    pages = math.ceil(jobs / 15)
    return jobs, pages

In [3]:
print(find_no_of_jobs('Machine learning'))
print(find_no_of_jobs('Data science'))
print(find_no_of_jobs('Data analysis'))
print(find_no_of_jobs('Business intelligence'))

(35, 3)
(557, 38)
(614, 41)
(73, 5)


In [4]:
def scrap_pages(query):
    num_jobs, num_pages = find_no_of_jobs(query)
    query = query.replace(' ', '%20')
    titles_lst, links_lst, occupations_lst, companies_lst, specs_lst = [], [], [], [], []
    for pageNo in range(num_pages):
        page = requests.get('https://wuzzuf.net/search/jobs/?a=hpb&q=' + query + '&start=' + str(pageNo))
        soup = BeautifulSoup(page.content, 'lxml')

        titles = soup.find_all("h2", {'class': 'css-m604qf'})
        titles_lst += [title.a.text for title in titles]
        links_lst += ['https://wuzzuf.net' + title.a['href'] for title in titles]

        occupations = soup.find_all("div", {'class': 'css-1lh32fc'})
        occupations_lst += [occupation.text for occupation in occupations]

        companies = soup.find_all("a", {'class': 'css-17s97q8'})
        companies_lst += [company.text for company in companies]

        specs = soup.find_all("div", {'class': 'css-y4udm8'})
        specs_lst += [spec.text for spec in specs]

    scraped_data = {}
    scraped_data['Title'] = titles_lst
    scraped_data['Link'] = links_lst
    scraped_data['Occupation'] = occupations_lst
    scraped_data['Company'] = companies_lst
    scraped_data['Specs'] = specs_lst

    df = pd.DataFrame(scraped_data)
    
    return scraped_data, df

In [5]:
ml_dict, ml_df = scrap_pages('Machine learning')

In [6]:
ml_df

Unnamed: 0,Title,Link,Occupation,Company,Specs
0,Senior Machine Learning Engineer,https://wuzzuf.net/jobs/p/YDxhBIJimaOR-Senior-...,Full TimeWork From Home,Media Sci -,Full TimeWork From HomeExperienced · 5+ Yrs of...
1,Machine Learning Engineer,https://wuzzuf.net/jobs/p/63GDVXe1mip9-Machine...,Full Time,Edentech -,Full TimeExperienced · 3 - 7 Yrs of Exp · IT/S...
2,Data Scientist,https://wuzzuf.net/jobs/p/MXunj0kftZ1D-Data-Sc...,Full Time,BlinkApp -,Full TimeExperienced · 3+ Yrs of Exp · IT/Soft...
3,Software Team Lead,https://wuzzuf.net/jobs/p/tDpJrj9GeBO6-Softwar...,Full Time,TMentors -,Full TimeManager · 8 - 12 Yrs of Exp · IT/Soft...
4,Python Developer - Internship,https://wuzzuf.net/internship/ks2Azi1kboPo-Pyt...,Internship,TMentors -,InternshipStudent · IT/Software Development · ...
5,Senior Research Scientist,https://wuzzuf.net/jobs/p/WjuL0LeHiwuo-Senior-...,Full Time,Atomica -,Full TimeExperienced · 4+ Yrs of Exp · IT/Soft...
6,Junior Production Engineer,https://wuzzuf.net/jobs/p/45lysRZRecXG-Junior-...,Full Time,Delta Egypt For Lighting -,Full TimeEntry Level · 0 - 1 Yrs of Exp · male...
7,Training Section Head,https://wuzzuf.net/jobs/p/KY4bSeU9W8aU-Trainin...,Full Time,Giza Cable Industries -,Full TimeManager · 10 - 20 Yrs of Exp · Traini...
8,Senior Java Developer,https://wuzzuf.net/jobs/p/1TQAeepNOzTT-Senior-...,Full TimeWork From Home,Media Sci -,Full TimeWork From HomeExperienced · 5+ Yrs of...
9,Junior Embedded Software Engineer,https://wuzzuf.net/jobs/p/etWvboHXv2KK-Junior-...,Full Time,Swift Act -,Full TimeEntry Level · 0 - 1 Yrs of Exp · IT/S...


In [9]:
da_dict, da_df = scrap_pages('Data analysis')

In [10]:
ds_dict, ds_df = scrap_pages('Data science')

In [11]:
bi_dict, bi_df = scrap_pages('Business intelligence')

In [12]:
def combine_dfs(dfs):
    df = pd.concat(dfs)
    df = df.drop_duplicates()
    return df

In [13]:
combine_dfs([ml_df, da_df, ds_df, bi_df])

Unnamed: 0,Title,Link,Occupation,Company,Specs
0,Senior Machine Learning Engineer,https://wuzzuf.net/jobs/p/YDxhBIJimaOR-Senior-...,Full TimeWork From Home,Media Sci -,Full TimeWork From HomeExperienced · 5+ Yrs of...
1,Machine Learning Engineer,https://wuzzuf.net/jobs/p/63GDVXe1mip9-Machine...,Full Time,Edentech -,Full TimeExperienced · 3 - 7 Yrs of Exp · IT/S...
2,Data Scientist,https://wuzzuf.net/jobs/p/MXunj0kftZ1D-Data-Sc...,Full Time,BlinkApp -,Full TimeExperienced · 3+ Yrs of Exp · IT/Soft...
3,Software Team Lead,https://wuzzuf.net/jobs/p/tDpJrj9GeBO6-Softwar...,Full Time,TMentors -,Full TimeManager · 8 - 12 Yrs of Exp · IT/Soft...
4,Python Developer - Internship,https://wuzzuf.net/internship/ks2Azi1kboPo-Pyt...,Internship,TMentors -,InternshipStudent · IT/Software Development · ...
...,...,...,...,...,...
68,Market Access Lead - GCC (Based in Hurghada ),https://wuzzuf.net/jobs/p/vpiUFGkbg2IF-Market-...,Full Time,Caduceus Lane -,Full TimeManager · 10 - 15 Yrs of Exp · Busine...
69,Graphic Designer,https://wuzzuf.net/jobs/p/gFySRDAhxW7h-Graphic...,Full Time,GEMS Education -,Full TimeExperienced · 2+ Yrs of Exp · Creativ...
70,Financial Analysis Manager,https://wuzzuf.net/jobs/p/K0yvFAdubfkt-Financi...,Full Time,Confidential -,Full TimeManager · 15 - 20 Yrs of Exp · Accoun...
71,Social Media Manager,https://wuzzuf.net/jobs/p/F5VKBZCZpqP9-Social-...,Full Time,Venu Apps -,Full TimeExperienced · 3 - 5 Yrs of Exp · Writ...


In [14]:
def combine_dicts(dicts):
    combined_dict = {}
    for key in dicts[0].keys():
        combined_dict[key] = []
        for dict in dicts:
            combined_dict[key] += dict[key]
    return combined_dict

In [16]:
all_dicts = combine_dicts([ml_dict, da_dict, ds_dict, bi_dict])

In [17]:
all_df = pd.DataFrame(all_dicts)
all_df

Unnamed: 0,Title,Link,Occupation,Company,Specs
0,Senior Machine Learning Engineer,https://wuzzuf.net/jobs/p/YDxhBIJimaOR-Senior-...,Full TimeWork From Home,Media Sci -,Full TimeWork From HomeExperienced · 5+ Yrs of...
1,Machine Learning Engineer,https://wuzzuf.net/jobs/p/63GDVXe1mip9-Machine...,Full Time,Edentech -,Full TimeExperienced · 3 - 7 Yrs of Exp · IT/S...
2,Data Scientist,https://wuzzuf.net/jobs/p/MXunj0kftZ1D-Data-Sc...,Full Time,BlinkApp -,Full TimeExperienced · 3+ Yrs of Exp · IT/Soft...
3,Software Team Lead,https://wuzzuf.net/jobs/p/tDpJrj9GeBO6-Softwar...,Full Time,TMentors -,Full TimeManager · 8 - 12 Yrs of Exp · IT/Soft...
4,Python Developer - Internship,https://wuzzuf.net/internship/ks2Azi1kboPo-Pyt...,Internship,TMentors -,InternshipStudent · IT/Software Development · ...
...,...,...,...,...,...
1274,Market Access Lead - GCC (Based in Hurghada ),https://wuzzuf.net/jobs/p/vpiUFGkbg2IF-Market-...,Full Time,Caduceus Lane -,Full TimeManager · 10 - 15 Yrs of Exp · Busine...
1275,Graphic Designer,https://wuzzuf.net/jobs/p/gFySRDAhxW7h-Graphic...,Full Time,GEMS Education -,Full TimeExperienced · 2+ Yrs of Exp · Creativ...
1276,Financial Analysis Manager,https://wuzzuf.net/jobs/p/K0yvFAdubfkt-Financi...,Full Time,Confidential -,Full TimeManager · 15 - 20 Yrs of Exp · Accoun...
1277,Social Media Manager,https://wuzzuf.net/jobs/p/F5VKBZCZpqP9-Social-...,Full Time,Venu Apps -,Full TimeExperienced · 3 - 5 Yrs of Exp · Writ...


In [18]:
all_df.to_csv('Wuzzuf.csv', index=False)