In [67]:
import getpass
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

def __prompt_email_password():
  """
  Prompt the user for their email and password. This is a private function.
  """
  u = input("Email: ")
  p = getpass.getpass(prompt="Password: ")
  return (u, p)

def login(driver=None, email=None, password=None, timeout=10):
    """
    Login into LinkedIn with the given credentials.
    """

    # Constants
    VERIFY_LOGIN_ID = "global-nav__primary-link"
    REMEMBER_PROMPT = 'remember-me-prompt__form-primary'

    if not driver:
        driver = webdriver.Chrome()
    
    if not email or not password:
        email, password = __prompt_email_password()

    driver.get('https://www.linkedin.com/login')
    element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username")))

    email_field = driver.find_element(By.ID, 'username')
    email_field.send_keys(email)

    password_field = driver.find_element(By.ID, 'password')
    password_field.send_keys(password)
    password_field.submit()

    if driver.current_url == 'https://www.linkedin.com/checkpoint/lg/login-submit':
        remember = driver.find_element(By.ID,REMEMBER_PROMPT)
        if remember:
            remember.submit()
  
    element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.CLASS_NAME, VERIFY_LOGIN_ID)))


#login(email='wej.purvis@gmail.com', password='EFhackathonapples')

In [70]:
import os
""" 
Create Person class from given LinkedIn profile URL which will have the following attributes:
    - Name
    - About
    - Experience
    - Education
    - Volunteering
    - Skills
    - Languages
"""

class Person:
    def __init__(
            self,
            linkedin_url=None,
            name=None,
            about=None,
            experience=None,
            educations=None,
            volunteering=None,
            skills=None,
            languages=None,
            driver=None,
            get=True,
            scrape=True,
            close_on_complete=True,
    ):
        self.linkedin_url = linkedin_url
        self.name = name
        self.about = about or []
        self.experience = experience or []
        self.educations = educations or []
        self.volunteering = volunteering or []
        self.skills = skills or []
        self.languages = languages or []


        if driver is None:
            try:
                if os.getenv("CHROMEDRIVER") == None:
                    driver_path = os.path.join(
                        os.path.dirname(__file__), "drivers/chromedriver"
                    )
                else:
                    driver_path = os.getenv("CHROMEDRIVER")

                driver = webdriver.Chrome(driver_path)
            except:
                driver = webdriver.Chrome()

        if get:
            driver.get(linkedin_url)

        self.driver = driver

        if scrape:
            self.scrape_profile(close_on_complete)

    def add_about(self, about):
        self.about.append(about)
    
    def add_experience(self, experience):
        self.experience.append(experience)
    
    def add_education(self, education):
        self.educations.append(education)
    
    def add_volunteering(self, volunteering):
        self.volunteering.append(volunteering)

    def add_skills(self, skills):
        self.skills.append(skills)
    
    def add_languages(self, languages):
        self.languages.append(languages)

    def get_name(self):
        url = os.path.join(self.linkedin_url, "overlay/about-this-profile/")
        self.driver.get(url)
        title_element = self.driver.title
        self.name = ' '.join(title_element.split(' | ')[0].split(' ')[1:])

    def get_about(self):
        self.driver.get(self.linkedin_url)
        elements = self.driver.find_elements(By.CLASS_NAME, 'pv-shared-text-with-see-more')
        if len(elements) != 0:
            self.about.append(elements[0].text)

 

    def scrape_profile(self, close_on_complete=True):
        """
        Scrape the profile and populate the attributes.
        """
        self.get_name()
        self.get_about()

        if close_on_complete:
            self.driver.close()
 
    def __str__(self):
        return f"Person(name={self.name},linkedin_url={self.linkedin_url},about={self.about},)"
    


In [82]:
# Local testing
html_file_path = r"C:\Users\William Purvis\OneDrive - University of Cambridge\EF_Hackathon_2023\CV_reader\testing\sample.html"
abs_file_path = 'file://' + os.path.abspath(html_file_path)

driver = webdriver.Chrome()

driver.get(abs_file_path)

elements = driver.find_elements(By.CLASS_NAME, 'pv-shared-text-with-see-more')
if len(elements) != 0:
    about = elements[0].text
print(about)

Motivated student currently undertaking an MPhil in Data Intensive Science at the University of Cambridge, graduating in 2024. Keen to acquire experience in Data Analytics, Machine Learning and Artificial Intelligence while working in versatile and dynamic teams within global corporate companies or marketing disrupting start-ups with the goal to explore demanding environments, learn how to work under pressure meeting tight deadlines. Interested in observing agile practices in teamwork, witness sparks of wisdom and creativity in processes of solution design and earn a reputation of a valuable contributor.


In [72]:
# Testing

driver = webdriver.Chrome()

dp = 'https://www.linkedin.com/in/danielspetrov/'
wp = 'https://www.linkedin.com/in/williampurvis00/'

email = 'wej.purvis@gmail.com'
password = 'EFhackathonapples'
login(driver, email, password)
person = Person(linkedin_url=wp, driver=driver)


print(person)

TimeoutException: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7146E82B2+55298]
	(No symbol) [0x00007FF714655E02]
	(No symbol) [0x00007FF7145105AB]
	(No symbol) [0x00007FF71455175C]
	(No symbol) [0x00007FF7145518DC]
	(No symbol) [0x00007FF71458CBC7]
	(No symbol) [0x00007FF7145720EF]
	(No symbol) [0x00007FF71458AAA4]
	(No symbol) [0x00007FF714571E83]
	(No symbol) [0x00007FF71454670A]
	(No symbol) [0x00007FF714547964]
	GetHandleVerifier [0x00007FF714A60AAB+3694587]
	GetHandleVerifier [0x00007FF714AB728E+4048862]
	GetHandleVerifier [0x00007FF714AAF173+4015811]
	GetHandleVerifier [0x00007FF7147847D6+695590]
	(No symbol) [0x00007FF714660CE8]
	(No symbol) [0x00007FF71465CF34]
	(No symbol) [0x00007FF71465D062]
	(No symbol) [0x00007FF71464D3A3]
	BaseThreadInitThunk [0x00007FFB1AC3257D+29]
	RtlUserThreadStart [0x00007FFB1C46AA58+40]


In [40]:
def split_title(title):
    """ 
    Get name from title in format (1) Daniel Petrov | LinkedIn
    """
    name = title.split(' | ')[0].split(' ')[1:]
    return ' '.join(name)

split_title('1) Daniel Petrov | LinkedIn')

'Daniel Petrov'