In [163]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import regex as re
from tqdm import tqdm
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=(FutureWarning, DeprecationWarning))

In [146]:
def init():
    options = Options()
    options.add_argument('--headless=new')
    options.add_argument(
        '--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/80.0.3987.163 Chrome/80.0.3987.163 Safari/537.36')
    driver = webdriver.Chrome(options=options)

    return driver

driver = init()

In [147]:
def filter(links):
    pattern = re.compile(r'(WeightExercises).*')

    filtered_links = []
    for link in links:
        if re.search(pattern, str(link)):
            filtered_links.append(str(link).replace(
                "../../", "https://exrx.net/"))

    return filtered_links


def scrape(link, driver=init()):
    driver.get(link)

    page_source = driver.page_source

    soup = BeautifulSoup(page_source, "html.parser")

    links = []
    for link in soup.find_all('a'):
        links.append(link.get('href'))

    links = filter(links)

    return links

In [148]:
links = ['https://exrx.net/Lists/ExList/NeckWt', 'https://exrx.net/Lists/ExList/NeckWt#Sternocleidomastoid', 'https://exrx.net/Lists/ExList/NeckWt#Splenius', 'https://exrx.net/Lists/ExList/ShouldWt', 'https://exrx.net/Lists/ExList/ShouldWt#Anterior', 'https://exrx.net/Lists/ExList/ShouldWt#Lateral', 'https://exrx.net/Lists/ExList/ShouldWt#Posterior', 'https://exrx.net/Lists/ExList/ShouldWt#Supraspinatus', 'https://exrx.net/Lists/ExList/ArmWt', 'https://exrx.net/Lists/ExList/ArmWt#Triceps', 'https://exrx.net/Lists/ExList/ArmWt#Biceps', 'https://exrx.net/Lists/ExList/ArmWt#Brachialis', 'https://exrx.net/Lists/ExList/ForeArmWt', 'https://exrx.net/Lists/ExList/ForeArmWt#Brachioradialis', 'https://exrx.net/Lists/ExList/ForeArmWt#WristFlexors', 'https://exrx.net/Lists/ExList/ForeArmWt#Extensors', 'https://exrx.net/Lists/ExList/ForeArmWt#Pronators', 'https://exrx.net/Lists/ExList/ForeArmWt#Supinators', 'https://exrx.net/Lists/ExList/BackWt', 'https://exrx.net/Lists/ExList/BackWt#General', 'https://exrx.net/Lists/ExList/BackWt#Latissimus', 'https://exrx.net/Lists/ExList/BackWt#UpperTrap', 'https://exrx.net/Lists/ExList/BackWt#MiddleTrap', 'https://exrx.net/Lists/ExList/BackWt#Trapezius', 'https://exrx.net/Lists/ExList/BackWt#UpperTrap', 'https://exrx.net/Lists/ExList/BackWt#Rhomboids', 'https://exrx.net/Lists/ExList/BackWt#Infraspinatus',
         'https://exrx.net/Lists/ExList/BackWt#Subscapularis', 'https://exrx.net/Lists/ExList/ChestWt', 'https://exrx.net/Lists/ExList/ChestWt#General', 'https://exrx.net/Lists/ExList/ChestWt#General', 'https://exrx.net/Lists/ExList/ChestWt#Clavicular', 'https://exrx.net/Lists/ExList/ChestWt#Pectoralis', 'https://exrx.net/Lists/ExList/ChestWt#Anterior', 'https://exrx.net/Lists/ExList/WaistWt', 'https://exrx.net/Lists/ExList/WaistWt#Rectus', 'https://exrx.net/Lists/ExList/WaistWt#Transverse', 'https://exrx.net/Lists/ExList/WaistWt#Obliques', 'https://exrx.net/Lists/ExList/WaistWt#Quadratus', 'https://exrx.net/Lists/ExList/WaistWt#Erector', 'https://exrx.net/Lists/ExList/HipsWt', 'https://exrx.net/Lists/ExList/HipsWt#Gluteus', 'https://exrx.net/Lists/ExList/HipsWt#Abductors', 'https://exrx.net/Lists/ExList/HipsWt#Hip', 'https://exrx.net/Lists/ExList/HipsWt#DeepHip', 'https://exrx.net/Lists/ExList/ThighWt', 'https://exrx.net/Lists/ExList/ThighWt#Quadriceps', 'https://exrx.net/Lists/ExList/ThighWt#Hamstrings', 'https://exrx.net/Lists/ExList/ThighWt#HipAdductors', 'https://exrx.net/Lists/ExList/CalfWt', 'https://exrx.net/Lists/ExList/CalfWt#Gastrocnemius', 'https://exrx.net/Lists/ExList/CalfWt#Gastrocnemius', 'https://exrx.net/Lists/ExList/CalfWt#Soleus', 'https://exrx.net/Lists/ExList/CalfWt#Tibialis', 'https://exrx.net/Lists/ExList/CalfWt#Popliteus']

In [149]:
results = []
with tqdm(total=len(links)) as pbar:
    for link in links:
        results.extend(scrape(link, driver))
        pbar.update(1)

100%|██████████| 55/55 [01:39<00:00,  1.81s/it] 


In [150]:
results = list(set(results))
print(len(results))

1465


In [188]:
df = pd.DataFrame(columns=["exercise", "utility", "mechanics", "force", "target_muscles", "synergist_muscles", "stabilizer_muscles"])

def scrape_info(link, df, driver=init()):
    driver.get(link)

    page_source = driver.page_source

    soup = BeautifulSoup(page_source, "html.parser")

    title = soup.find("h1").text

    table = soup.find_all("table")

    classifications = []
    for row in table:
        row = row.find_all("tr")
        for td in row:
            td = td.find_all("td")
            for i in td:
                classifications.append(i.text.replace(":",""))

    if not classifications:
        classifications = ["", "", "", "", "", ""]

    target_ul = soup.find("strong", text="Target")
    target_muscles = []
    try:
        target_ul = target_ul.next_element.next_element
        for i in target_ul.find_all("a"):
            target_muscles.append(i.text)
    except (AttributeError, TypeError):
        target_muscles.append(target_ul.text if target_ul else '')

    synergists_ul = soup.find("strong", text="Synergists")
    synergist_muscles = []
    try:
        synergists_ul = synergists_ul.next_element.next_element
        for i in synergists_ul.find_all("a"):
            synergist_muscles.append(i.text)
    except (AttributeError, TypeError):
        synergist_muscles.append(synergists_ul.text if synergists_ul else '')

    stabilizers_ul = soup.find("strong", text="Stabilizers")
    stabilizer_muscles = []
    try:
        stabilizers_ul = stabilizers_ul.next_element.next_element
        for i in stabilizers_ul.find_all("a"):
            stabilizer_muscles.append(i.text)
    except (AttributeError, TypeError):
        stabilizer_muscles.append(stabilizers_ul.text if stabilizers_ul else '')

    data = pd.DataFrame({"exercise": [title], "utility": [classifications[1]], "mechanics": [classifications[3]], "force": classifications[5], "target_muscles": [target_muscles], "synergist_muscles": [synergist_muscles], "stabilizer_muscles": [stabilizer_muscles]})

    df = df.append(data, ignore_index=True)

    return df

In [189]:
with tqdm(total=len(results)) as pbar:
    for link in results:
        df = scrape_info(link, df)
        pbar.update(1)

df.to_csv("exrx.csv", index=False)

100%|██████████| 1465/1465 [1:28:41<00:00,  3.63s/it]


In [190]:
df.head(n=10)

Unnamed: 0,exercise,utility,mechanics,force,target_muscles,synergist_muscles,stabilizer_muscles
0,Cable Neck Extension (with belt),Basic or Auxiliary,Isolated,Pull,[Splenius],"[Trapezius, Upper, Levator Scapulae, Erector S...","[Pectoralis Major, Sternal, Pectoralis Major, ..."
1,Cable Standing Overhead Crunch (multi-exercise...,Auxiliary,Isolated,Pull,[Rectus Abdominis],[Obliques],"[Iliopsoas, Tensor Fasciae Latae, Rectus Femor..."
2,Cable Standing Preacher Curl,Auxiliary,Isolated,Pull,[Brachialis],"[Biceps Brachii, Brachioradialis]",[Wrist Flexors]
3,Dumbbell Lying One Arm Rear Lateral Raise,Auxiliary,Isolated,Pull,"[Deltoid, Posterior]","[Infraspinatus, Teres Minor, Supraspinatus, De...","[Flexor Carpi Radialis, Extensor Carpi Radialis]"
4,Single Leg Squat (with leg back),Basic or Auxiliary,Compound,Push,[Gluteus Maximus],"[Quadriceps, Adductor Magnus, Soleus]","[Erector Spinae, Gluteus Medius, Gluteus Minim..."
5,Suspended Front Raise,Auxiliary,Isolated,Push,"[Deltoid, Anterior]","[Pectoralis Major, Clavicular, Deltoid, Latera...","[Erector Spinae, Gluteus Maximus, Hamstrings, ..."
6,Single Leg Reclining Squat (bar),Basic or Auxiliary,Compound,Push,[Quadriceps],"[Gluteus Maximus, Adductor Magnus, Soleus, Lat...","[Gluteus Medius, Gluteus Minimus, Quadratus Lu..."
7,Machine-assisted Triceps Dip,Basic,Compound,Push,[Triceps Brachii],"[Deltoid, Anterior, Pectoralis Major, Sternal,...","[Trapezius, Lower]"
8,Vertical Leg-Hip Raise (parallel bars),Basic,Compound,Pull,[Rectus Abdominis],"[Iliopsoas, Tensor Fasciae Latae, Pectineus, S...","[Latissimus Dorsi, Deltoid, Posterior, Pectora..."
9,Smith Standing Leg Calf Raise,Basic,Isolated,Push,[Gastrocnemius],[Soleus],[]
