### DEPENDENCIES

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from time import sleep
import os
from time import perf_counter
import inspect, itertools
import urllib
import json
########################################################################

#---

### MAIN

In [None]:
# ______________________________________________________
# Instatiating driver

driver      = webdriver.Firefox()

#______________________________________________________
# Logging in

login_url   = "https://www.linkedin.com/login/en"

#~ env vars for credentials
user_env = 'LINKEDIN_USER'
pass_env = 'LINKEDIN_PASS'

login_ret = login(
      driver      = driver,
      login_url   = login_url,
      user_env    = user_env,
      pass_env    = pass_env
)

In [None]:
#______________________________________________________
# Initiating search terms

search_root_url = "https://www.linkedin.com/jobs/search/?"

#______________________________________________________
# Dict. containing the results

results = {}

keywords_array = create_array_of_keywords_to_search_for(2)
                  
# for keywords in keywords_array:

keywords = "machine learning"
result = recursive_search(keywords)
results[keywords] = result

print(json.dumps(results, indent=4))

In [None]:
def recursive_search(search_terms):
      """
      For a given set of search terms, the function
      will loop through all mapped filters and
      returns a dict with the search results for each
      filter cobination.
      
      input:      search terms
      
      output:     dict with search results for every
                  mapped filter (all_filters)
      """
      
      #~ filters options
      xp_levels   = all_filters["xp_level"  ]["options"]
      work_models = all_filters["work_model"]["options"]
      locations   = all_filters["location"  ]["options"]

      results = {}

      for xp_level in xp_levels:
            results[xp_level] = {}
            
            for work_model in work_models:
                  results[xp_level][work_model] = {}
                  
                  for location in locations:
                        
                        results[xp_level][work_model][location] = {}
                        
                        filters = [
                              ("xp_level"       , xp_level  ),
                              ("work_model"     , work_model),
                              ("location"       , location  ),
                        ]
                        
                        result = search(
                              keywords = search_terms,
                              filters = filters
                        )
                        
                        results[xp_level][work_model][location] = result
                      
      return results

In [None]:
def create_array_of_keywords_to_search_for(mode):
    """
    Creates an array with keyword arguments to be
    used as keyword argument in the search url.
    
    modes (for 'keyword' search argument):
        
        - 1:    programming languages
        
        - 2:    job_fields
        
        - 3:    a combination of job + and title
    """
    
    array_of_keywords_to_search_for = []
    
    if mode == 1:    
        array_of_keywords_to_search_for = search_terms["prog_langs"] 
    
    elif mode == 2:
        array_of_keywords_to_search_for = search_terms["job_fields"] 

    elif mode == 3:
        job_fields  = search_terms["job_fields"]
        job_titles  = search_terms["job_titles"]
        
        for job_field in job_fields:
            
            for job_title in job_titles:
                keywords = f"{job_field} {job_title}"
                array_of_keywords_to_search_for.append(keywords)
            
    return array_of_keywords_to_search_for

### LOGIN

In [None]:
def login(driver, login_url, user_env, pass_env, timeout_s=10):
    
    # login elements' xpath
    user_xpath = '//*[@id="username"]'
    pass_xpath = '//*[@id="password"]'
    
    #~ credentials
    user        = os.getenv(user_env)
    password    = os.getenv(pass_env)
    
    driver.get(login_url)
    assert "LinkedIn Login" in driver.title
    
    if "Welcome back" not in driver.page_source:
        username_elem = driver.find_element(By.XPATH, user_xpath)
        username_elem.send_keys(user)
    
    pass_elem = driver.find_element(By.XPATH, pass_xpath)

    pass_elem.send_keys(password)
    pass_elem.send_keys(Keys.RETURN)
    
    start = perf_counter()
    
    while "Feed" not in driver.title:
        sleep(1)
        
        timer_has_expired = (perf_counter() - start) > timeout_s
        
        if timer_has_expired:
            return False    
    
    return True
        
#---

### SEARCH

In [None]:
def search( keywords,
    filters = {"location": "Worldwide"},
    base_url = "https://www.linkedin.com/jobs/search/?"
):
    # print("Initiating search")
    
    keywords_quoted = urllib.parse.quote(
        keywords, safe='/', encoding=None, errors=None)

    search_url = f"{search_root_url}keywords={keywords_quoted}"
    
    # compose search_url with filters
    for filter_name, filter_option in filters:
        
        if filter_name not in all_filters:
            print(f"{20*'-'}\nFilter is not valid: '{filter}'\n{20*'-'}")
            continue
        
        elif filter_option not in all_filters[filter_name]["options"]:
            print(f"{20*'-'}\nFilter option is not valid: '{filter_option}'\n{20*'-'}")
            continue
        
        filter_prop  = all_filters[filter_name]["prop"]
        filter_value = all_filters[filter_name]["options"][filter_option]
        
        new_filter = f"&{filter_prop}={filter_value}"
        search_url += new_filter
        
    search_url += "&refresh=true"
    #print(search_url)
    driver.get(search_url)
    
    try:
        assert "No matching jobs found." in driver.page_source
        return 0
    except:
        pass
    
    if "location" not in filters:
        location = "Worldwide"
    else:
        location = filter["location"]
        
    search_query = f"{keywords} Jobs".upper()
    assert search_query in driver.title.upper()
    
    no_of_results_elem = driver.find_element(By.TAG_NAME, "small")
    return no_of_results_elem.text.split()[0]

#### search testing

In [None]:
search_root_url = "https://www.linkedin.com/jobs/search/?"
keywords = "machine learning engineer"
filters = [
    ("location", "Worldwide"),
    ("xp_level", "entry_level"),
    ("work_model", "remote"),
]

ret = search(search_root_url, keywords, filters)
print(ret)

### FILTERS

In [None]:
all_filters = {
    
    "xp_level" : {
        "name"  : "xp_level",
        "desc"  : "Experience Level",
        "prop"  : "f_E",
        
        "options"   : {
            "internship"    : 1,
            "entry_level"   : 2,
            "associate"     : 3,
            "mid_senior"    : 4,
            "director"      : 5,
            "executive"     : 6,
        },
    },

    "time_query" : {
        "name"  : "time_query",
        "desc"  : "Date Posted",
        "prop"  : "f_TPR",
        
        "options"   : {
            "past_24H"  : "r86400",
            "past_week" : "r604800",
            "past_month": "r2592000",
        },
    },

    "hiring_type" : {
        "name"  : "hiring_type",
        "desc"  : "Job Type",
        "prop"  : "f_JT",
        
        "options"   : {
            "fulltime"      : "F",
            "part_time"     : "P",
            "contract"      : "C",
            "temporary"     : "T",
            "volunteer"     : "V",
            "internship"    : "I",
            "other"         : "O",
        },
    },

    "work_model" : {
        "name"  : "work_model",
        "desc"  : "On-site/Remote",
        "prop"  : "f_WT",
        
        "options"   : {
            "on_site"   : 1,
            "remote"    : 2,
            "hybrid"    : 3,
        },
    },
    
    "location" : {
        "name"  : "location",
        "desc"  : "Location",
        "prop"  : "geoId",
        
        "options"   : {
            "Worldwide" : 92000000,
            "Brazil"    : 106057199,
        },
       
    }

    # @todo: map companies code
}

### SEARCH TERMS

In [None]:
search_terms = {
    "job_fields" : [
        "data",
        "data science",
        "machine learning",
        "deep learning",
        "AI",
        "cloud",
        "analytics"
    ],
    "prog_langs" : [
        "python",
        "java",
        "javascript",
        "ruby",
        "go",
        "c",
        "c++",
        "c#",
    ],
    "job_titles" : [
        "engineer",
        "specialist",
        "developer",
        "analyst",
    ]
}