# Imports, constants & common functions

In [1]:
# ! pip install undetected_chromedriver

In [2]:
# ! pip install git+ssh://git@github.com/forcat2/forcommon.git

In [3]:
import requests
import bs4 as bs
from sklearn.model_selection import train_test_split
import time
import random
from pathlib import Path
import pandas as pd
import undetected_chromedriver as uc

from forcommon.time.time_utils import convert_to_seconds

In [4]:
# Configurable constants
URL_FORMAT = 'https://fragment.com/username/{name}'
STATUS_SELECTOR = '#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status'
PRICE_SELECTOR = '#aj_content > main > section.tm-section.clearfix > div.tm-table-wrap > table > tbody > tr > td:nth-child(1) > div > div'

DATA_DIR = Path('./data')

# Inconsistent variable type demo 1

In [5]:
name = 'angela'
response = requests.get(URL_FORMAT.format(name=name))
soup = bs.BeautifulSoup(response.text, 'html')
status = soup.select_one(STATUS_SELECTOR)
status_text = status.text
print(f'Status of "{name}":', status_text)

Status of "angela": Sold


In [6]:
name = 'global'
response = requests.get(URL_FORMAT.format(name=name))
soup = bs.BeautifulSoup(response.text, 'html')
status = soup.select_one(STATUS_SELECTOR).text
print(f'Status of "{name}":', status)

Status of "global": Taken


# Inconsistent variable type demo 2

In [7]:
name = 'angela'
response = requests.get(URL_FORMAT.format(name=name))
soup = bs.BeautifulSoup(response.text, 'html')
status_element = soup.select_one(STATUS_SELECTOR)
status_text = status_element.text
print(f'Status of "{name}":', status_text)

Status of "angela": Sold


In [8]:
name = 'global'
response = requests.get(URL_FORMAT.format(name=name))
soup = bs.BeautifulSoup(response.text, 'html')
price = soup.select_one(PRICE_SELECTOR)
price_text = price.text

print(f'Price of "{name}":', price_text)

Price of "global": 7


# Inconsistent variable sense demo

In [9]:
X_test = 15

In [10]:
# ...

In [11]:
df = pd.DataFrame({'value': range(100)})

In [12]:
X_train, X_test = train_test_split(df, test_size=0.3)

# Self descriptive variables

In [13]:
# used in a sense -- value to test
X_test = 15
# -> 
value_to_test = 15

In [14]:
# Acceptable use cases of shorn names

In [15]:
#1 within relatively loops

users = ['Adam', 'Bob', 'Coty']
for u in users:
    response = requests.get(URL_FORMAT.format(name=u))
    soup = bs.BeautifulSoup(response.text, 'html')
    status_element = soup.select_one(STATUS_SELECTOR)
    if status_element is None:
        print(f'No status for {u}')
        continue
    status_text = status_element.text
    print(f'Status of "{u}":', status_text)
    time.sleep(0.5)

Status of "Adam": Sold
No status for Bob
Status of "Coty": Available


In [16]:
#2 within short & clear functions

def arithmetic_mean(a, b):
    return (a + b) / 2

In [17]:
#3 -- when short name is defined by the domain formula

def binomial_distribution(n, p, size=1):
    return [sum(random.random() < p for _ in range(n)) for _ in range(size)]


n = 10
p = 0.5
print(binomial_distribution(n, p, size=5)) 

[5, 4, 7, 4, 3]


# pathlib.Path demo

In [18]:
DATA_DIR.mkdir(exist_ok=True, parents=True)

In [19]:
DATA_DIR.resolve()

PosixPath('/Users/avorobyov/_Repos/deepstudy5/jupyter_best_practices/data')

In [20]:
DATA_DIR.resolve().parent.parent

PosixPath('/Users/avorobyov/_Repos/deepstudy5')

In [21]:
df.to_parquet(DATA_DIR / 'dataset.parquet')


# Logical blocks separation

In [22]:
import pandas as pd
from tqdm.auto import tqdm
import time
from IPython.core.display import display, HTML

  from IPython.core.display import display, HTML


In [23]:
def create_driver(use_subprocess: bool):
    options = uc.ChromeOptions()
    if True:
        options.add_argument("--disable-extensions")
        options.add_argument(
            "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.5672.126 Safari/537.36")
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_argument('--no-sandbox')
        options.add_argument('--blink-settings=imagesEnabled=false')

    if True:
        prefs = {
            'download.prompt_for_download': False,
            'download.directory_upgrade': True,
            'disable-popup-blocking': True,
            'safebrowsing.enabled': True,
            'block_third_party_cookies': True,

            # Disable downloading images
            'profile.managed_default_content_settings.images': 2,
        }
        options.add_experimental_option('prefs', prefs)


    print("Creating the driver")
    result = uc.Chrome(use_subprocess=use_subprocess, options=options, desired_capabilities=None)
    return result


    print("Creating the driver")
    result = uc.Chrome(use_subprocess=use_subprocess, options=options, desired_capabilities=None)
    return result