<a href="https://colab.research.google.com/github/cenkkiran/job-scrape/blob/main/JobListingScraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import urllib.request
import time
from datetime import datetime
import os
import argparse
from bs4 import BeautifulSoup
import pandas as pd
import gspread
from google.colab import auth
from oauth2client.client import GoogleCredentials
import matplotlib.pyplot as plt

In [3]:
pd.set_option('display.max_colwidth',1000)
auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [24]:
# Feature set
columns = ['Reference','Title','Salary','Location','Desc','Author','URL','Expired','Date']

# Google Drive Authentication and Google Sheets implementation
wb = gc.open_by_url('https://docs.google.com/spreadsheets/d/167_olmhOGFikR6tffGDHSvNR0zcaBq3wWfScZKI6KUg/edit#gid=0')
sheet = wb.worksheet('List')

# Get the data for our pandas DataFrame
data = sheet.get_all_values()
del data[0]
if len(data):
  df = pd.DataFrame(data)
else:
  df= pd.DataFrame(pd.np.empty((0, 9)))
df.columns = columns

  


In [12]:
df.head()

Unnamed: 0,Reference,Title,Salary,Location,Desc,Author,URL,Expired,Date


In [25]:
condition = True
page = 1
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

while (condition):
  url = 'https://www.harnham.com/jobs?options=1035,261&page=' + str(page) + '&size=24'
  response = requests.get(url, headers=headers)
  soup = BeautifulSoup(response.content, 'html.parser')
  print(response.status_code)
  if not soup.find('p', class_='search-no-results'):
    mydivs = soup.find_all("div", class_="job-block-wrapper")

    for job in mydivs:
      title = job.find('span', class_='inner-title').text.strip()
      salary = job.find('p', class_='salary-text').text.strip().split()
      location = job.find('p', class_='location-text').text.strip().split(',')
      desc = job.find('p', class_='description-text').text.strip().replace('\n',' ')
      reference = job.find('p', class_='job-block__reference-text').text.strip()
      author = job.find('div', class_='job-author-name').text.strip().replace('\n','').split('Author')
      url = 'https://www.harnham.com' + job.find('a', class_='job-block__learnmore-link')['href']
      exp = job.find('time-until')['datetime'].replace('.','')
      synced = datetime.today().strftime('%d/%m/%Y')
      dict = {'Reference' : reference, 'Title': title, 'Salary' : salary[2], 'Location' : location[0], 'Desc' : desc, 'Author' : author[1].strip(), 'URL' : url, 'Expired' : exp, 'Date': synced}

      if not reference in df.values:
        df = df.append(dict, ignore_index=True)
      else:
        print( ' Duplicate ')
        condition = False
        break
  else:
    print( ' no page ')
    condition = False
  page += 1
      
# sheet.append_row(list(pd.Series(dict)))


200




200
200
 no page 


Unnamed: 0,Reference,Title,Salary,Location,Desc,Author,URL,Expired,Date
0,9640/ZJ,User Researcher,£450,London,A User Researcher is required for the next 6 months to aid a global online marketplace in its expansion plans.,Zavina Johnson,https://www.harnham.com/job/user-researcher-in-london-jid-35112,13 September 2021,28/08/2021
1,106123/ES,Service Designer - Contract,£400,London,A Data and Analytics consultancy are seeking a Service Designer to work on an important project for one of their public sector clients.,Elizabeth Stone,https://www.harnham.com/job/service-designer-contract-in-london-jid-31796,15 October 2021,28/08/2021
2,2019/ZJ,Social Data Analyst,£300,London,A 3 month contract has come up for a Social Data Analyst to join a global media agency group to aid their social analysis objectives.,Zavina Johnson,https://www.harnham.com/job/social-data-analyst-in-london-jid-35113,13 September 2021,28/08/2021
3,118038/AD,Web Analyst,€720,Netherlands,"Do you want to work in a company where data leads the decisions, in a company with a flat structure so you are an integral part of the decision-making process?",Andrew Denham,https://www.harnham.com/job/web-analyst-in-netherlands-jid-35168,15 September 2021,28/08/2021
4,45612/TG,Digital Optimisation Analyst,€45000,Leiden,Digital Optimisation Analyst\r Leiden\r Competitive Salary + Benefits,Tom Gijsen,https://www.harnham.com/job/digital-optimisation-analyst-in-leiden-south-holland-jid-35635,08 October 2021,28/08/2021
5,10796118/ES,Digital Marketing Manager - Contract,£300,London,An exciting eCommerce retailer is seeking a Digital Marketing Manager to join them on an initial 3 month contract basis.,Elizabeth Stone,https://www.harnham.com/job/digital-marketing-manager-contract-in-london-jid-33933,28 September 2021,28/08/2021
6,26099/DL,Programmatic Campaigns Consultant,€550,Belgium,This is a great chance to join a fast paced and growing tech company in central Belgium.,Daniel Lewis,https://www.harnham.com/job/programmatic-campaigns-consultant-in-belgium-jid-6719,28 September 2021,28/08/2021
7,00008/LvE,CDP Team Lead,€1062,Amsterdam,A great freelance CDP Team Lead with both hands-on and leadership components,Laura Laura Van Eer,https://www.harnham.com/job/cdp-team-lead-in-amsterdam-north-holland-jid-35106,13 September 2021,28/08/2021
8,21696/ES1,User Researcher - Contract,£600,London,A tech company is recruiting for a User Researcher to join them to work on an exciting Web Based Experience project.,Elizabeth Stone,https://www.harnham.com/job/user-researcher-contract-in-london-jid-31673,01 September 2021,28/08/2021
9,00006/LvE,Technisch Web Analist,€680,Den Haag,An opportunity for all technical web analysts that want to turn marketing into a more data-driven craft.,Laura Laura Van Eer,https://www.harnham.com/job/technisch-web-analist-in-den-haag-south-holland-jid-34788,22 October 2021,28/08/2021


In [27]:
# Let's update our Google Sheets with the updated data.

for index, row in df.iterrows():
  sheet.append_row(list(row))
