In [1]:
import time
from datetime import datetime

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore')

In [2]:
def sleep(x):
    time.sleep(x)
    
def wait(x):
    driver.implicitly_wait(x)
    
def click_bann_byID(ID):
    actions = ActionChains(driver)
    akzeptieren = driver.find_element(By.ID, ID)
    actions.click(akzeptieren).perform()
    wait(10)
    sleep(0.5)

def find_elements_HPCO(H,P,C,O):
    if website_name == 'jobware':
        header = driver.find_elements(By.TAG_NAME, H)
    else:
        header = driver.find_elements(By.CLASS_NAME, H)
    publish = driver.find_elements(By.CLASS_NAME, P)
    company = driver.find_elements(By.CLASS_NAME, C)
    ort = driver.find_elements(By.CLASS_NAME, O) 

    list_header = [title.text for title in header]
    list_publish = [pub.text for pub in publish]
    list_company = [comp.text for comp in company]
    list_ort = [o.text for o in ort]
    return list_header, list_publish, list_company, list_ort
    
def scroll_down(x):
    n=0
    while n < x:
        n+=1
        driver.execute_script("window.scrollBy(0,document.body.scrollHeight)", "")
        wait(10)
        sleep(2)

# 01 - STEPSTONE

In [3]:
'''
Title : Web Scrapping by Selenium 
Project Purpose: From StepStone scrap data for some Job Titels
1 - Create Driver
2 - Go to Website
3 - Create ActionChain Object
    3.1 - Click Banned Accept
4 - Take Title and Infos from Page
    4.1 - Create Lists 
    4.2 - Create DataFrame
    4.3 - Repeat Process
    4.4 - Print and Save DataFrame
'''

print('---------------------- StepStone Job Searching Selenium Project ----------------------')
start=datetime.now()  
# 0 Link Descriptions
link_original_stepstone = 'https://www.stepstone.de/jobs/data-analyst/in-rietberg?radius=50&page=2'

website_name = 'stepstone'
job_name = 'Data Analyst'
ort_ = 'Rietberg'
radius = 50
page_number = 1

#  1 - Create Driver
Path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/chromedriver'
driver = webdriver.Chrome(Path)

#  2 - Go to Website
job_link = job_name.replace(' ', '-').lower()
ort_link = ort_.lower()
link = f'https://www.stepstone.de/jobs/{job_link}/in-{ort_link}?radius={radius}&page={page_number}'

driver.get(link)
wait(10)
sleep(2)

#  3 - ActionChain Object created
# 3.1 - Click Banned Accept
ID = 'ccmgt_explicit_accept'
click_bann_byID(ID)

# 4 -  Take Infos from Page
# Headers, Publish_Time ,Company, City
H, P, C, O = 'resultlist-12iu5pk', 'resultlist-3asi6i', 'resultlist-1v262t5', 'resultlist-dettfq'
list_header, list_publish, list_company, list_ort = find_elements_HPCO(H,P,C,O)

# Description and Page number of results
description = driver.find_elements(By.CLASS_NAME, 'resultlist-1pq4x2u')
result = driver.find_elements(By.CLASS_NAME, 'resultlist-xeyevn')

'''
# Get Links
header = driver.find_elements(By.CLASS_NAME, H)
list_link = [link.get_attribute('href') for link in header]
'''
# 4.1 - Get Texts for each finding
list_description = [des.text for des in description]
print('Header',len(list_header), 'Publish',len(list_publish), 'Company',len(list_company), 'Ort',len(list_ort), 'desc', len(list_description))

# Total Search Page Number
list_result = [res.text for res in result]
number_of_page = int(list_result[0].split(' ')[-1])
print(f'Number of Jobs Pages = {number_of_page}')

# 4.2 - DataFrame df
d = dict(job_title=np.array(list_header), publish=np.array(list_publish), company_name=np.array(list_company), city=np.array(list_ort) , description=np.array(list_description))
df = pd.DataFrame.from_dict(d, orient='index')
df = df.T


# 4.3 Repeat Process for every Web Page
while  page_number < number_of_page:
    page_number+=1
    
    # 4.1 - Go to another page
    link = f'https://www.stepstone.de/jobs/{job_link}/in-{ort_link}?radius={radius}&page={page_number}'
    driver.get(link)
    wait(10)
    sleep(1.5)
    
    # 4.2 - Find the elements and get the Texts
    list_header, list_publish, list_company, list_ort = find_elements_HPCO(H,P,C,O) 
    description = driver.find_elements(By.CLASS_NAME, 'resultlist-1pq4x2u')
    list_description = [des.text for des in description]
 
    # 4.3 - Create new page Dataframe
    d = dict(job_title=np.array(list_header), publish=np.array(list_publish), company_name=np.array(list_company), city=np.array(list_ort) , description=np.array(list_description))
    df2 = pd.DataFrame.from_dict(d, orient='index')
    df2 = df2.T
    
    # 4.4 - Concatenate the DataFrames
    df = pd.concat([df,df2], axis=0, ignore_index=True)
    print(f'Page Number : {page_number}, DataFrame Shape : {df2.shape}')
    

# 4.4 Save Data as csv and xlsx    
print(f'DataFrame End : {df.shape}')
df['website'] = website_name
# 4.3 - Save DataFrame
# 4.3.1 - to csv
path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/data'
job_name2 = job_name.replace(' ', '-')
time_ = datetime.today().strftime('%Y-%m-%d')
df.to_csv(f'{path}/{job_name2}-{time_}.csv', index=False)

end =datetime.now() 
print('Code Runned No Problem')
print(f'Time = {end - start}')
sleep(5)
driver.quit()

---------------------- StepStone Job Searching Selenium Project ----------------------
Header 25 Publish 25 Company 25 Ort 25 desc 25
Number of Jobs Pages = 22
Page Number : 2, DataFrame Shape : (25, 5)
Page Number : 3, DataFrame Shape : (25, 5)
Page Number : 4, DataFrame Shape : (25, 5)
Page Number : 5, DataFrame Shape : (25, 5)
Page Number : 6, DataFrame Shape : (25, 5)
Page Number : 7, DataFrame Shape : (25, 5)
Page Number : 8, DataFrame Shape : (25, 5)
Page Number : 9, DataFrame Shape : (25, 5)
Page Number : 10, DataFrame Shape : (25, 5)
Page Number : 11, DataFrame Shape : (25, 5)
Page Number : 12, DataFrame Shape : (25, 5)
Page Number : 13, DataFrame Shape : (25, 5)
Page Number : 14, DataFrame Shape : (25, 5)
Page Number : 15, DataFrame Shape : (25, 5)
Page Number : 16, DataFrame Shape : (25, 5)
Page Number : 17, DataFrame Shape : (25, 5)
Page Number : 18, DataFrame Shape : (25, 5)
Page Number : 19, DataFrame Shape : (25, 5)
Page Number : 20, DataFrame Shape : (25, 5)
Page Number 

# 02 - JOBWARE

In [4]:
print('---------------------- Jobware Job Searching Selenium Project ----------------------')

start=datetime.now()  
# 0 Link Descriptions
link_original = 'https://www.jobware.de/jobsuche?jw_jobname=data%20analyst&jw_jobort=333**%20Rietberg&jw_ort_distance=50'

website_name = 'jobware'
job_name = 'Data Analyst'
ort_ = 'Rietberg'
radius = 50
page_number = 0

#  1 - Create Driver
Path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/chromedriver'
driver = webdriver.Chrome(Path)

#  2 - Go to Website
job_link = job_name.replace(' ', '%20').lower()
ort_link = ort_.capitalize()
link = f'https://www.jobware.de/jobsuche?jw_jobname={job_link}&jw_jobort=333**%20{ort_}&jw_ort_distance={radius}'

driver.get(link)
wait(10)
sleep(2)

#  3 - ActionChain Object created
# 3.1 - Click Banned Accept
actions = ActionChains(driver)
akzeptieren = driver.find_element(By.XPATH, '/html/body/div[1]/div/div[3]/div[2]/button')
actions.click(akzeptieren).perform()
wait(10)
sleep(0.5)
#dsgvo-1B76C4DA4B-orange dsgvo-1B76C4DA4B-accept

# 4 -  Take Infos from Page
# Headers, Company, City, Description
H, P, C, O = 'h2', 'date', 'company', 'location'
list_header, list_publish, list_company, list_ort = find_elements_HPCO(H,P,C,O)
description = driver.find_elements(By.CLASS_NAME, 'task')
list_description = [des.text for des in description]

print('Header',len(list_header), 'Publish',len(list_publish), 'Company',len(list_company), 'Ort',len(list_ort), 'Desc', len(list_description))

# Total Search Page Number
result = driver.find_elements(By.CLASS_NAME, 'result-sort')
list_result = [res.text for res in result]
print(list_result)

# 4.2 - DataFrame df
d = dict(job_title=np.array(list_header), publish=np.array(list_publish), company_name=np.array(list_company), city=np.array(list_ort) , description=np.array(list_description))
df = pd.DataFrame.from_dict(d, orient='index')
df = df.T

# 4.4 Save Data as csv and xlsx    
print(f'DataFrame End : {df.shape}')
df['website'] = website_name
# 4.3 - Save DataFrame
# 4.3.1 - to csv
df.to_csv(f'{path}/{job_name2}-{time_}.csv', mode='a', index=False, header=False)

end =datetime.now() 
print('Code Runned No Problem')
print(f'Time = {end - start}')
sleep(5)
driver.quit()

---------------------- Jobware Job Searching Selenium Project ----------------------
Header 12 Publish 12 Company 12 Ort 12 Desc 11
['12 Treffer\nSortierung: Relevanz - Datum']
DataFrame End : (12, 5)
Code Runned No Problem
Time = 0:00:12.610382


# 03 - LINKEDIN (Chrome)

In [5]:
print('---------------------- Linkedin Job Searching Selenium Project ----------------------')
def sleep(x):
    time.sleep(x)
def wait(x):
    driver.implicitly_wait(x)
    

    #sleep(1.5)
    #more_option = driver.find_element(By.CLASS_NAME, 'infinite-scroller__show-more-button')
    #actions.click(more_option).perform()
    #wait(10)
    #sleep(0.5)    
    

start=datetime.now()  
# 0 Link Descriptions
link_original = 'https://www.linkedin.com/jobs/search/?currentJobId=3199974140&distance=25&keywords=data%20analyst&location=Rietberg' 

website_name =  'linkedin'
radius = 40
page_number = 1

#  1 - Create Driver
Path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/chromedriver'
driver = webdriver.Chrome(Path)

#  2 - Go to Website
job_link = job_name.replace(' ', '%20').lower()

link2 = f'https://www.linkedin.com/jobs/search/?distance=25&keywords={job_link}&location={ort_}'
driver.get(link2)
wait(10)
sleep(2)


#  3 - ActionChain Object created
# 3.1 - Click Banned Accept
actions = ActionChains(driver)
akzeptieren = driver.find_element(By.TAG_NAME, 'button')
actions.click(akzeptieren).perform()
wait(10)
sleep(0.5)

# 3.1 - 
scroller = driver.find_element(By.CLASS_NAME, 'infinite-scroller__show-more-button')

scroll_down(7)

# 4 -  Take Infos from Page
# Headers, Company, City, Description
header = driver.find_elements(By.CLASS_NAME, 'base-search-card__title')
publish = driver.find_elements(By.CLASS_NAME, 'job-search-card__listdate')
company = driver.find_elements(By.CLASS_NAME, 'hidden-nested-link')
ort = driver.find_elements(By.CLASS_NAME, 'job-search-card__location') 
#description = driver.find_elements(By.CLASS_NAME, 'resultlist-1pq4x2u')
result = driver.find_elements(By.CLASS_NAME, 'results-context-header__context')

# 4.1 -
list_header = [title.text for title in header]
list_publish = [pub.text for pub in publish]
list_company = [comp.text for comp in company]
list_ort = [o.text for o in ort]
#list_description = [des.text for des in description]

print('Header',len(list_header), 'Publish',len(list_publish), 'Company',len(list_company), 'Ort',len(list_ort))

# Total Search Page Number
list_result = [res.text for res in result]
print(f'Number of Jobs Pages = {list_result}')

# 4.2 - DataFrame df
d = dict(job_title=np.array(list_header), publish=np.array(list_publish), company_name=np.array(list_company), city=np.array(list_ort))
df = pd.DataFrame.from_dict(d, orient='index')
df = df.T
df['description'] = None
df['website'] = website_name

# 4.4 Save Data as csv and xlsx    
print(f'DataFrame End : {df.shape}')
# 4.3 - Save DataFrame
# 4.3.1 - to csv
df.to_csv(f'{path}/{job_name2}-{time_}.csv', mode='a', index=False, header=False)

# 4.3.2 - to excel
# install openpyxl
#df.to_excel(f'{path}/{job_name2}-{time_}.xlsx', sheet_name='Sheet3')

end =datetime.now() 
print('Code Runned No Problem')
print(f'Time = {end - start}')
sleep(5)
driver.quit()

df.head()

---------------------- Linkedin Job Searching Selenium Project ----------------------
Header 175 Publish 170 Company 162 Ort 175
Number of Jobs Pages = ['Rietberg, Kuzey Ren-Vestfalya, Almanya konumunda 847 Data Analyst iş ilanı (20 yeni)']
DataFrame End : (175, 6)
Code Runned No Problem
Time = 0:00:38.961719


Unnamed: 0,job_title,publish,company_name,city,description,website
0,Data Analyst (w/m/d) Power BI mit Remote-Anteil,1 hafta önce,ATLAS TITAN Mitte GmbH,"Gütersloh, Kuzey Ren-Vestfalya, Almanya",,linkedin
1,Data Analyst / Scientist (m/w/d),2 hafta önce,Dr. Wolff Group,"Bielefeld, Kuzey Ren-Vestfalya, Almanya",,linkedin
2,Data / Business Analyst (m/w/d),2 hafta önce,Dr. Wolff Group,"Bielefeld, Kuzey Ren-Vestfalya, Almanya",,linkedin
3,CRM Data Analyst (m/w/d),1 gün önce,hachmeister+partner,"Bielefeld, Kuzey Ren-Vestfalya, Almanya",,linkedin
4,Data Analyst für Datenmodellierung und Kundenb...,1 ay önce,Lurse,"Salzkotten, Kuzey Ren-Vestfalya, Almanya",,linkedin


# Read Data 

In [38]:
jobs = pd.read_csv(f'{path}/{job_name2}-{time_}.csv')
df = jobs.copy()
df.head()

Unnamed: 0,job_title,publish,company_name,city,description,website
0,Data Scientist / Analyst - Web (m/w/d),vor 5 Tagen,Neue Westfälische GmbH & Co. KG,Bielefeld,Wir suchen nach einem Data Analyst/Data Scient...,stepstone
1,ERP Specialist / Business Data Analyst (m/w/d),vor 1 Woche,DAYTON PROGRESS GmbH,"Hamburg, Köln, Bielefeld, Hannover, Berlin, Le...",Aktuell sucht Dayton Progress einen ERP Specia...,stepstone
2,Data Analyst / Scientist (m/w/d),vor 1 Woche,eWolff GmbH,Bielefeld,Finde Deine Rolle und verstärke unser Team Dat...,stepstone
3,Consultant Data Analyst (m/w/d),vor 2 Tagen,Arvato Systems GmbH,Gütersloh,Und darum suchen wir dich als Consultant Data ...,stepstone
4,Data / Business Analyst (m/w/d),vor 1 Woche,eWolff GmbH,Bielefeld,Finde Deine Rolle und verstärke unser Team Dat...,stepstone


In [104]:
df = jobs.copy()

In [22]:
df.shape

(717, 6)

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 717 entries, 0 to 716
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   job_title     717 non-null    object
 1   publish       712 non-null    object
 2   company_name  704 non-null    object
 3   city          717 non-null    object
 4   description   541 non-null    object
 5   website       717 non-null    object
dtypes: object(6)
memory usage: 33.7+ KB


## Dropn Null Values

In [105]:
df.isna().sum()

job_title         0
publish           5
company_name     13
city              0
description     176
website           0
dtype: int64

In [106]:
df.dropna(subset=['company_name'], inplace=True)
df.isna().sum()

job_title         0
publish           0
company_name      0
city              0
description     163
website           0
dtype: int64

## Drop Duplicated Values

In [125]:
df[df.duplicated(subset=['job_title', 'company_name', 'city', 'description'])].count()

job_title       5
publish         5
company_name    5
city            5
description     5
website         5
dtype: int64

In [128]:
df.drop_duplicates(subset=['job_title', 'company_name', 'city', 'description'], inplace=True)
df[df.duplicated(subset=['job_title', 'company_name', 'city', 'description'])].count()

job_title       0
publish         0
company_name    0
city            0
description     0
website         0
dtype: int64

In [130]:
df.shape

(699, 6)

In [302]:
df.loc[df.website =='linkedin', 'city'] = df.loc[df.website =='linkedin', 'city'].str.replace(', Kuzey Ren-Vestfalya, Almanya', '')

df.describe()

Unnamed: 0,job_title,publish,company_name,city,description,website
count,699,699,699,699,536,699
unique,651,24,297,256,404,3
top,Smart Banking Kundenberater (m/w/d),2022-10-31,NTT DATA Business Solutions AG,Bielefeld,NTT DATA Business Solutions AG - Als Teil der ...,stepstone
freq,5,226,97,109,67,525


In [287]:
# Recap 01
from datetime import timedelta

def replace_column(column_name, old, new):
    df[column_name] = df[column_name].str.replace(old, new)

def func1(value):
    """ Explain What makes func1 """
    if 'tag' in  value:
        number_tag = value.replace('tagen','').replace('tag', '').strip()
        return (number_tag)
    elif 'woc' in  value:
        number_tag = value.replace('wochen','').replace('woche', '').strip()
        return (f'{number_tag}w')
    elif 'mo' in  value:
        number_tag = value.replace('monaten','').replace('monat', '').strip()
        return (f'{number_tag}mm')
    else:
        return value    
    
def func_to_date(value):
    if len(value) == 1:
        new_value = int(value)
        date = (datetime.today() - timedelta(days=new_value)).strftime('%Y-%m-%d')
        return date
    if len(value) == 2:
        value = value.replace('w','')
        new_value = int(value)*7
        date = (datetime.today() - timedelta(days=new_value)).strftime('%Y-%m-%d')
        return date    
    if len(value) == 3:
        value = value.replace('mm','')        
        new_value = int(value)*30
        date = (datetime.today() - timedelta(days=new_value)).strftime('%Y-%m-%d')
        return date    
    else:
        return value

df = jobs.copy()

print(f'Shape first : {df.shape}')

df.dropna(subset=['company_name'], inplace=True)

df.drop_duplicates(subset=['job_title', 'company_name', 'city', 'description'], inplace=True)
print(f'Shape Last : {df.shape}')

# Recap 02
# Fill NAN to no_info
df.publish = df.publish.str.lower()
df.publish.fillna('no_info', inplace=True)

# Fill stunde to today
today = datetime.today().strftime('%Y-%m-%d')
df.publish.loc[df.publish.str.contains('stunden')] = time_

# clean vor and önce
replace_column('publish', 'vor', '')
replace_column('publish', 'önce', '')
replace_column('publish', 'gün', 'tagen')
replace_column('publish', 'hafta', 'wochen')
replace_column('publish', 'ay', 'monat')

df.publish = df.publish.apply(func1)
df.publish = df.publish.apply(func_to_date)

# Linkedin City Clean
df.loc[df.website =='linkedin', 'city'] = df.loc[df.website =='linkedin', 'city'].str.replace(', Kuzey Ren-Vestfalya, Almanya', '')

Shape first : (717, 6)
Shape Last : (699, 6)


array(['2022-11-02', '2022-10-31', '2022-11-05', '2022-11-04',
       '2022-10-17', '2022-11-03', '2022-10-24', '2022-11-06',
       '2022-10-10', '2022-11-07', '2022-11-01', '2022-10-08',
       '29.10.2022', '27.10.2022', '24.10.2022', '26.10.2022',
       '04.10.2022', '30.10.2022', '19.10.2022', '26.06.2022',
       '2022-08-09', '2022-09-08', '2022-07-10', '2022-05-11'],
      dtype=object)

In [314]:
df[df.job_title.str.contains('Data')].job_title.value_counts().reset_index().sort_values('job_title', ascending=False).head(10)

Unnamed: 0,index,job_title
0,Cloud Data Engineer (m/w/d),4
1,Data Scientist (m/w/d),3
2,Teamleiter (m/w/d) Data Services,2
7,Cloud Data Engineer (w/m/d) - in Bielefeld,2
3,Data Architect (gn),2
9,Data / Business Analyst (m/w/d),2
8,Data Platform Solution Architect EMEA,2
10,Data Analyst / Scientist (m/w/d),2
6,Enterprise Data Architect (m/f/d),2
5,Consultant für Data & Analytics im SAP Hana Pr...,2


In [315]:
df[df.job_title.str.contains('Data')].company_name.value_counts().reset_index().sort_values('company_name', ascending=False).head(10)

Unnamed: 0,index,company_name
0,PwC,8
1,Kienbaum Consultants International GmbH - Zent...,4
2,SoftwareONE Deutschland GmbH,3
3,Arvato Infoscore GmbH,3
4,HELLA,3
10,Goldbeck GmbH,2
15,eWolff GmbH,2
13,GOLDBECK GmbH,2
12,FORTIS IT-Services GmbH,2
11,4ALLPORTAL,2


In [316]:
df.company_name.value_counts().reset_index().sort_values('company_name', ascending=False).head(10)

Unnamed: 0,index,company_name
0,NTT DATA Business Solutions AG,97
1,FORTIS IT-Services GmbH,24
2,adesso SE,17
3,DATAGROUP,13
4,Reply,12
5,PwC,11
6,Tourlane GmbH,11
7,Arvato Systems,11
8,Takeaway.com,9
9,Arvato Systems GmbH,8


In [320]:
print('---------------------- StepStone Job Searching Selenium Project ----------------------')
start=datetime.now()  
# 0 Link Descriptions
link_original_stepstone = 'https://www.stepstone.de/jobs/data-analyst/in-rietberg?radius=50&page=2'

website_name = 'stepstone'
job_name = 'Data Analyst'
ort_ = 'Rietberg'
radius = 50
page_number = 1

#  1 - Create Driver
Path = '/Users/macbook/Desktop/projects/Github_Repositories/Portfolio Projects/02 - Web_Scraping_Job_Search/chromedriver'
driver = webdriver.Chrome(Path)

#  2 - Go to Website
job_link = job_name.replace(' ', '-').lower()
ort_link = ort_.lower()
link = f'https://www.stepstone.de/jobs/{job_link}/in-{ort_link}?radius={radius}&page={page_number}'

driver.get(link)
wait(10)
sleep(2)

#  3 - ActionChain Object created
# 3.1 - Click Banned Accept
ID = 'ccmgt_explicit_accept'
click_bann_byID(ID)

# 4 -  Take Infos from Page
# Headers, Publish_Time ,Company, City
H, P, C, O = 'resultlist-12iu5pk', 'resultlist-3asi6i', 'resultlist-1v262t5', 'resultlist-dettfq'
list_header, list_publish, list_company, list_ort = find_elements_HPCO(H,P,C,O)

header = driver.find_elements(By.CLASS_NAME, H)
list_link = [link.get_attribute('href') for link in header]

print(list_link)

end =datetime.now() 
print('Code Runned No Problem')
print(f'Time = {end - start}')
sleep(5)
driver.quit()

---------------------- StepStone Job Searching Selenium Project ----------------------
['https://www.stepstone.de/stellenangebote--Data-Scientist-Analyst-Web-m-w-d-Bielefeld-Neue-Westfaelische-GmbH-Co-KG--8866842-inline.html', 'https://www.stepstone.de/stellenangebote--ERP-Specialist-Business-Data-Analyst-m-w-d-Hamburg-Koeln-Bielefeld-Hannover-Berlin-Leipzig-Frankfurt-am-Main-Mannheim-Muenchen-DAYTON-PROGRESS-GmbH--8812571-inline.html', 'https://www.stepstone.de/stellenangebote--Data-Analyst-Scientist-m-w-d-Bielefeld-eWolff-GmbH--8228787-inline.html', 'https://www.stepstone.de/stellenangebote--Consultant-Data-Analyst-m-w-d-Guetersloh-Arvato-Systems-GmbH--8788053-inline.html', 'https://www.stepstone.de/stellenangebote--Data-Business-Analyst-m-w-d-Bielefeld-eWolff-GmbH--8227460-inline.html', 'https://www.stepstone.de/stellenangebote--Data-Scientist-m-w-d-Home-Office-oder-Bremen-Berlin-Paderborn-Duisburg-Koeln-Frankfurt-Karlsruhe-Augsburg-Muenchen-CONTACT-Software-GmbH--8869313-inline.htm

In [None]:
def find_elements_HPCO(H,P,C,O):
    if website_name == 'jobware':
        header = driver.find_elements(By.TAG_NAME, H)
    else:
        header = driver.find_elements(By.CLASS_NAME, H)
    publish = driver.find_elements(By.CLASS_NAME, P)
    company = driver.find_elements(By.CLASS_NAME, C)
    ort = driver.find_elements(By.CLASS_NAME, O) 

    list_header = [title.text for title in header]
    list_publish = [pub.text for pub in publish]
    list_company = [comp.text for comp in company]
    list_ort = [o.text for o in ort]
    return list_header, list_publish, list_company, list_ort