In [None]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
from urllib.parse import urlencode
import json
import logging
import threading
from datetime import datetime
import random

In [None]:


# Configure the logging settings
logging.basicConfig(
    filename='process_log.txt',  # Log file name
    level=logging.INFO,          # Default log level
    format='%(asctime)s - %(levelname)s - %(message)s'  # Log message format
)

def log_process(message, level='INFO'):
    # Get the current date and time
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Log the message with the appropriate level
    if level.upper() == 'INFO':
        logging.info(f"{current_time} - {message}")
    elif level.upper() == 'WARNING':
        logging.warning(f"{current_time} - {message}")
    elif level.upper() == 'ERROR':
        logging.error(f"{current_time} - {message}")
    else:
        raise ValueError("Invalid log level. Use 'INFO', 'WARNING', or 'ERROR'.")

    return f"Log entry created successfully with level {level}."

# Example usage
log_process("This is an informational message.", level='INFO')
log_process("This is a warning message.", level='WARNING')
log_process("This is an error message.", level='ERROR')



def open_url(url):
    user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36']
    session = requests.Session()
    headers = {'User-Agent': random.choice(user_agents)}
    response = session.get(url, headers=headers,timeout=10)
    response.raise_for_status()
    return response.text


In [None]:
def extract_data_remoteyeah(num):
    url = f'https://remoteyeah.com/?page={num}'
    response = open_url(url)
    soup = bs(response,'html.parser')
    big_box = soup.find_all('div',{'class':"gap-2 md:gap-4 p-4 md:p-8"})
    job_list = []
    for box in big_box:
        a = box.find('a')
        job_name = a.text.replace('\n',"")
        job_url = a.get('href')
        company = box.find('span',{"class":"font-semibold text-text-base leading-none mt-1"}).text
        dict_1 = {'Job_Name':job_name,
                'Job_url':job_url,
                'Company':company}
        job_list.append(dict_1)
    return job_list



In [None]:

def get_data_remotetrove(page):
  form_data = {
    "lang": "",
    "search_keywords": "",
    "search_location": "",
    "search_categories": [],
    "filter_job_type": [
      "freelance",
      "full-time",
      "internship",
      "part-time",
      "temporary",
      ""
    ],
    "per_page": 40,
    "orderby": "featured",
    "order": "DESC",
    "job_types": "",
    "page": f'{page}',
    "featured": "",
    "filled": "",
    "list_layout": "grid",
    "remote_position": "",
    "show_pagination": False,}
  headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
      "Accept": "application/json, text/plain, */*"
  }
  encoded_data = urlencode(form_data, doseq=True)
  response = requests.post('https://remotetrove.com/jm-ajax/get_listings/',headers=headers,data=encoded_data)
  return response.json()

def extract_data_remotetrove(result):
  big_box = result['html']
  soup = bs(big_box,'html.parser')
  name_element = soup.find_all('li')
  job_list = []
  for i in name_element:
    verify = i.get('data-company')
    if verify:
      job_name = i.find('h4').text
      job_url = i.find('a').get('href')
      company = i.find('h3').text.replace('\n',"")
      dict_1 = {'Job_Name':job_name,
              'Job_url':job_url,
              'Company':company}
      job_list.append(dict_1)
  return job_list

def remote_yeah():
    jobs_list = []
    for i in range(1,10):
        data = extract_data_remoteyeah(i)
        jobs_list.extend(data)
        print(data)
    return jobs_list

def remotetrove():
    jobs_list = []
    for i in range(1,6):
        result = get_data_remotetrove(i)
        data = extract_data_remotetrove(result)
        jobs_list.extend(data)
    return jobs_list

def weworkremotely(search):
    url = f'https://weworkremotely.com/remote-jobs/search?term={search}'
    response = open_url(url)
    soup = bs(response,'html.parser')
    big_box = soup.find_all('li',{"class":"feature"})
    job_list = []
    for i in big_box:
        job_name = i.find('span',{"class":"title"}).text
        element_url = i.find_all('a')
        job_url = element_url[1].get('href')
        company = i.find('span',{'class':"company"}).text
        dict_1 = {'Job_Name':job_name,
                'Job_url':f"https://weworkremotely.com{job_url}",
                'Company':company}
        job_list.append(dict_1)
    return job_list


In [89]:
def fetch_all(search):
    shared_job = []
    lock = threading.Lock()
    def fetch_remote_yeah():
        result = remote_yeah()
        with lock:
            shared_job.extend(result)
    def fetch_remotetrove():
        result = remotetrove()
        with lock:
            shared_job.extend(result)
    def fetch_weworkremotely():
        result = weworkremotely(search)
        with lock:
            shared_job.extend(result)
    thread_1 = threading.Thread(target=fetch_remote_yeah)
    thread_2 = threading.Thread(target=fetch_remotetrove)
    thread_3 = threading.Thread(target=fetch_weworkremotely)

    thread_1.start()
    thread_2.start()
    thread_3.start()

    thread_1.join()
    thread_2.join()
    thread_3.join()

    return shared_job

def convert_to_df(data):
    df = pd.DataFrame(data)
    return df

def apply_filter(df,search):
    filtered_df = df[df['Job_Name'].str.contains(search,case=False,na=False)]
    return filtered_df


In [93]:
search = input(str('Enter your Kewyword: '))
data = fetch_all(search)
df = convert_to_df(data)
filtered_df = apply_filter(df, search)

[{'Job_Name': 'Back end Developer', 'Job_url': 'https://remoteyeah.com/remote-jobs/back-end-developer-8d2c5e76-8115-4634-94a5-49bd49080ff5', 'Company': 'Viseven'}, {'Job_Name': 'Senior Backend Software Developer', 'Job_url': 'https://remoteyeah.com/remote-jobs/senior-backend-software-developer-d4ff881f-3be8-49d7-9575-62dcfbc836a0', 'Company': 'Wealthsimple'}, {'Job_Name': 'Full-Stack Engineer - Senior Consultant', 'Job_url': 'https://remoteyeah.com/remote-jobs/full-stack-engineer-senior-consultant-33ec1827-617c-4a29-99e8-af5d9f4817c9', 'Company': 'Provectus'}, {'Job_Name': 'Senior Angular Developer - EG', 'Job_url': 'https://remoteyeah.com/remote-jobs/senior-angular-developer-eg-7aa1f8b7-4df2-4cac-a1db-d24a2675f08e', 'Company': 'Gorilla Logic'}, {'Job_Name': 'Azure Engineer II', 'Job_url': 'https://remoteyeah.com/remote-jobs/azure-engineer-ii-0872fe67-10cd-415e-96a9-99392d4053ba', 'Company': 'Rackspace'}, {'Job_Name': 'Staff Software Engineer, AI Products', 'Job_url': 'https://remoteye

In [None]:

def extract_remote(search,num):
    url = f'https://remote.com/jobs/all?query={search}&page={num}'
    response = open_url(url)
    soup = bs(response,'html.parser')

    big_box = soup.find_all('article',{"class":"sc-506be909-0 sc-31ccc88a-1 caZBsO fDTnCW"})
    job_list = []
    for box in big_box:
        job_name = box.find('span',{'class':"sc-a6d70f3d-0 fsvfbz"}).text
        job_url = box.find('a').get('href')
        company = box.find('span',{'class':"sc-a6d70f3d-0 cWvlWe"}).text
        dict_1 = {'Job_Name':job_name,
                    'Job_url':f"https://weworkremotely.com{job_url}",
                    'Company':company}
        job_list.append(dict_1)
    return job_list
    

In [None]:
def remote():
    num = 1
    jobs_list =[]
    while True:
        data = extract_remote('Data Engineer',num)
        if not data:
            log_process('No More data')
            break
        jobs_list.extend(data)
        print(data)
        num+=1
    return jobs_list

[{'Job_Name': 'Data Developer', 'Job_url': 'https://weworkremotely.com/jobs/ci-t-c1z36ylw/data-developer-j1znyawy', 'Company': 'CI&T'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/bounteous-c1w63zo1/data-engineer-j1yq9rsa', 'Company': 'Bounteous'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/olo-c1tfu90v/data-engineer-j1jrjj60', 'Company': 'Olo'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/hcvt-c154ijmg/data-engineer-j13w05s6', 'Company': 'HCVT'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/clearer-io-c1v9v5l4/data-engineer-j1k91lcz', 'Company': 'Clearer.io'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/motive-c1cnheod/data-engineer-j1vc0gd2', 'Company': 'Motive'}, {'Job_Name': 'Data Engineer', 'Job_url': 'https://weworkremotely.com/jobs/viseven-c1drfab6/data-engineer-j1zr226y', 'Company': 'Viseven'}, {'Job_Name': 'Data Engineer', 'Job_u