In [81]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm
import time
import pandas as pd

In [7]:
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.set_window_size(1920, 1080)
driver.maximize_window()

In [99]:
# click helper, wait until page source changed
def click(button, timeout=3):
        prev_src = driver.page_source
        elapsed = 0
        unit_time = timeout / 10
        button.click()
        while prev_src == driver.page_source and elapsed <= timeout:
            time.sleep(unit_time)
            timeout -= unit_time
            elapsed += unit_time
            
# login to leetcode
def login(username, password):
    login_url = 'https://leetcode.com/accounts/login/'
    driver.get(login_url)
    time.sleep(2)
    username_field = driver.find_element_by_xpath('//input[@id="id_login"]')
    password_field = driver.find_element_by_xpath('//input[@id="id_password"]')
    signin_button = driver.find_element_by_xpath('//button[@id="signin_btn"]')
    username_field.send_keys(username)
    password_field.send_keys(password)
    click(signin_button)
    print('successfully logged in!') 

# get company info from a problem URL
def get_problem_companies_info(problem_url):
    companies_info = dict()  # key: company name, value: number of occurances
    driver.get(problem_url)
    time.sleep(2)
    # locate and click on the 'Companies' <div>
    try:
        companies_div = driver.find_element_by_xpath("//div[text()='Companies']")
        click(companies_div)
    except NoSuchElementException as e:
        return companies_info
    # if there exists a 'More' <span>, click on it to show all companies
    try:
        show_more_span = driver.find_element_by_xpath("//span[text()='More']")
        click(show_more_span)
    except NoSuchElementException as e:
        pass
    # locate the <div> that leetcode places all its little "company tags"
    company_tag_wrapper = driver.find_element_by_xpath("//div[starts-with(@class, 'company-tag-wrapper')]")
    companies_info_raw = company_tag_wrapper.text.replace('\n|\n', '|').split('\n')
    for company in companies_info_raw:
        company_info = company.split('|')
        company_name = company_info[0]
        num_occur = company_info[1]
        companies_info[company_name] = num_occur
    return companies_info

# iterate through leetcode_problems to collect their company info
def get_problems_companies_info(leetcode_problems):
    problems_companies_info = []  # list of tuples
    for _, problem in leetcode_problems.iterrows():
        problem_link = problem['LINK']
        problem_name = problem['NAME']
        try:
            companies_info = get_problem_companies_info(problem_link)
            # pivot longer
            for company_name, num_occur in companies_info.items():
                new_row = (problem_link, problem_name, company_name, num_occur,)
                problems_companies_info.append(new_row)
                print(new_row)
        except Exception as e:
            # log the error
            print(e)
    return pd.DataFrame(problems_companies_info, 
                        columns = ['problem_link', 'problem_name', 'company_name', 'num_occur'])

In [101]:
# go through questions in the leetcode question list
leetcode_problems = pd.read_csv('../data/leetcode_problems.csv')[2:3]
problems_companies_info = get_problems_companies_info(leetcode_problems)
problems_companies_info.to_csv('../data/leetcode_problems_and_company.csv', index=False)

('https://leetcode.com/problems/satisfiability-of-equality-equations', 'Satisfiability of equality equations', 'Google', '5')


In [140]:
import re

parsed_list = []

df = pd.read_csv('../data/leetcode_repo_readme.csv')
for _, row in df.iterrows():
    name_and_link = row['Title']
    m = re.search(r'\[(.*)\]\((.*)\)', name_and_link)
    name = m[1]
    link = m[2]
    parsed_list.append((name, link, ))

pased_df = pd.DataFrame(parsed_list, columns=['name', 'link'])


Unnamed: 0,name,link
0,Largest 3-Same-Digit Number in String,https://leetcode.com/problems/largest-3-same-d...
1,Minimum Consecutive Cards to Pick Up,https://leetcode.com/problems/minimum-consecut...
2,Remove Digit From Number to Maximize Result,https://leetcode.com/problems/remove-digit-fro...
3,Minimum Average Difference,https://leetcode.com/problems/minimum-average-...
4,Count Prefixes of a Given String,https://leetcode.com/problems/count-prefixes-o...
...,...,...
1333,Longest Palindromic Substring,https://leetcode.com/problems/longest-palindro...
1334,Median of Two Sorted Arrays,https://leetcode.com/problems/median-of-two-so...
1335,Longest Substring Without Repeating Characters,https://leetcode.com/problems/longest-substrin...
1336,Add Two Numbers,https://leetcode.com/problems/add-two-numbers/
