In [None]:
import pandas as pd
from tqdm.auto import tqdm
import time
from IPython.core.display import display, HTML

# Use requests.get

In [None]:
import requests

In [None]:
response = requests.get('https://fragment.com/username/angela')

In [None]:
display(HTML(response.text))

# Extracting data from HTML

In [None]:
import bs4 as bs

response = requests.get('https://fragment.com/username/angela')
soup = bs.BeautifulSoup(response.text, 'html')
status_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status.tm-status-unavail')
status_element.text


In [None]:
# Extracting price
price_element = soup.select_one('#aj_content > main > section.tm-section.clearfix > div.tm-table-wrap > table > tbody > tr > td:nth-child(1) > div > div')
price_element.text

# Scraping in the loop

In [None]:
# Note status was too specific
target_names = ['angela', 'mike', 'steve', 'marijose']
accumulator = list()
for n in target_names:
    try:
        response = requests.get(f'https://fragment.com/username/{n}')
        soup = bs.BeautifulSoup(response.text, 'html')
#         status_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status.tm-status-unavail')
        status_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status')        
        price_element = soup.select_one('#aj_content > main > section.tm-section.clearfix > div.tm-table-wrap > table > tbody > tr > td:nth-child(1) > div > div')
        rec = {
            'name': n,
            'status': None if status_element is None else status_element.text,
            'price': None if price_element is None else price_element.text,
        }
        accumulator.append(rec)
    except Exception as ex:
        print(f'Got exception scraping "{n}". Exception: {ex}')

In [None]:
pd.DataFrame(accumulator)

# Scraping in larger scale

In [None]:
names_df = pd.read_csv('./data/common-forenames-by-country.csv')
names_df[:2]

In [None]:
names_df['name_normalised'] = names_df['Romanized Name'] \
    .str.replace('[^\w\s]', '') \
    .str.lower()

In [None]:
target_names = names_df[names_df['Country'] == 'US']['name_normalised'].values
target_names

In [None]:
accumulator = list()
for n in tqdm(target_names):
    try:
        response = requests.get(f'https://fragment.com/username/{n}')
        soup = bs.BeautifulSoup(response.text, 'html')
        status_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status')
        price_element = soup.select_one('#aj_content > main > section.tm-section.clearfix > div.tm-table-wrap > table > tbody > tr > td:nth-child(1) > div > div')
        rec = {
            'name': n,
            'status': status_element.text,
            'price': None if price_element is None else price_element.text.strip(),
        }
        accumulator.append(rec)
        time.sleep(0.5)
    except Exception as ex:
        print(f'Got exception scraping "{n}". Exception: {ex}')

In [None]:
pd.DataFrame(accumulator)

In [None]:
# Improving price scraping, note Chrome added tr tag

accumulator = list()
for n in tqdm(target_names):
    try:
        response = requests.get(f'https://fragment.com/username/{n}')
        soup = bs.BeautifulSoup(response.text, 'html')
        status_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-header > h2 > span.tm-section-header-status')
        price1_element = soup.select_one('#aj_content > main > section.tm-section.clearfix > div.tm-table-wrap > table > tbody > tr > td:nth-child(1) > div > div')
#         price2_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-box.tm-section-bid-info > table > tbody > tr > td:nth-child(1) > div > div')
        price2_element = soup.select_one('#aj_content > main > section.tm-section.tm-auction-section > div.tm-section-box.tm-section-bid-info > table > tbody > td:nth-child(1) > div > div')
        rec = {
            'name': n,
            'status': status_element.text,
            'price1': None if price1_element is None else price1_element.text.strip(),
            'price2': None if price2_element is None else price2_element.text.strip(),
        }
        accumulator.append(rec)
        time.sleep(0.5)
    except Exception as ex:
        print(f'Got exception scraping "{n}". Exception: {ex}')



In [None]:
pd.DataFrame(accumulator)

# A case of requests.get not working

In [None]:
response = requests.get('https://dns.ton.org/#angela')

In [None]:
display(HTML(response.text))