# Imports

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd
import json

# Configuración Driver

In [3]:
options = Options()
options.add_argument("--headless")  # Ejecutar en modo sin ventana (opcional)
options.add_argument("--disable-blink-features=AutomationControlled")  # Evitar detección de Selenium
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)

driver = webdriver.Chrome(options=options)

# Eliminar la propiedad 'navigator.webdriver'
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": """
        Object.defineProperty(navigator, 'webdriver', {
            get: () => undefined
        })
    """
})

{'identifier': '2'}

# Petición de la página

In [3]:
driver.get("https://www.muscleandstrength.com/workout-routines")
time.sleep(2)

# Aceptar Cookies

In [4]:
try:
    cookie_button = driver.find_element(By.XPATH, '//*[@id="iubenda-cs-banner"]/div/div/div/div[3]/div[2]/button[2]')
    cookie_button.click()
    print("Cookies aceptadas.")
except:
    print("No se encontró el banner de cookies o ya está aceptado.")

Cookies aceptadas.


# Localizar las categorías de rutinas de entrenamiento

In [5]:
categories = driver.find_elements(By.XPATH, '//*[@id="block-system-main"]/div/div[1]/div//a')
category_links = [category.get_attribute("href") for category in categories]


# Creamos una lista para guardar los datos de cada rutina

In [6]:
workout_data = []

# Obtenemos los enlaces de las rutinas

In [7]:
try:
    category_section = driver.find_element(By.XPATH, '//*[@id="block-system-main"]/div/div[1]/div')
    category_links = [category.get_attribute("href") for category in category_section.find_elements(By.TAG_NAME, "a")]
    print(f"Encontradas {len(category_links)} categorías.")
    category_links = list(set(category_links))
    print(f"Categorías únicas: {len(category_links)}")
except Exception as e:
    print(f"Error al obtener categorías: {e}")
    driver.quit()
    exit()

Encontradas 40 categorías.
Categorías únicas: 20


# Los enlaces de las categorías de rutinas se repiten en la página

In [8]:
print(category_links)
category_links = list(set(category_links))
print(f"Categorías únicas: {len(category_links)}")

['https://www.muscleandstrength.com/workouts/fat-loss', 'https://www.muscleandstrength.com/workouts/abs', 'https://www.muscleandstrength.com/workouts/celebrity', 'https://www.muscleandstrength.com/workouts/other', 'https://www.muscleandstrength.com/workouts/beginner', 'https://www.muscleandstrength.com/workouts/men', 'https://www.muscleandstrength.com/workouts/biceps', 'https://www.muscleandstrength.com/workouts/sports', 'https://www.muscleandstrength.com/workouts/bodyweight', 'https://www.muscleandstrength.com/workouts/chest', 'https://www.muscleandstrength.com/workouts/home', 'https://www.muscleandstrength.com/workouts/shoulders', 'https://www.muscleandstrength.com/workouts/strength', 'https://www.muscleandstrength.com/workouts/muscle-building', 'https://www.muscleandstrength.com/workouts/back', 'https://www.muscleandstrength.com/workouts/women', 'https://www.muscleandstrength.com/workouts/triceps', 'https://www.muscleandstrength.com/workouts/cardio', 'https://www.muscleandstrength.c

# Accedemos a cada rutina y obtenemos los datos de repeticiones y resumenes

In [9]:
# Iterar sobre cada categoría
for category_link in category_links:
    driver.get(category_link)
    time.sleep(3)

    print(f"Procesando categoría: {category_link}")

# Extraer enlaces de todas las rutinas en la categoría
    try:
        workout_section = driver.find_element(By.XPATH, '//*[@id="mnsview-list"]/div[2]/div[1]')
        routine_links = [
            routine.find_element(By.TAG_NAME, "a").get_attribute("href")
            for routine in workout_section.find_elements(By.CLASS_NAME, "cell.small-12.bp600-6")
        ]
        print(f"  Encontradas {len(routine_links)} rutinas en esta categoría.")
    except Exception as e:
        print(f"  Error al procesar la categoría {category_link}: {e}")
        routine_links = []

    # Iterar sobre cada rutina
    for routine_link in routine_links:
        try:
            driver.get(routine_link)
            time.sleep(3)

            # Extraer título de la rutina
            title = driver.find_element(By.TAG_NAME, "h1").text
            print(f"    Procesando rutina: {title}")

            # Localiza la tabla resumen
            try:
                table = driver.find_element(By.CLASS_NAME, "node-stats-block")
                rows = table.find_elements(By.TAG_NAME, "li")  # Encuentra todas las filas
            except NoSuchElementException:
                print("No se pudo encontrar la tabla. Verifica la estructura del HTML.")
                driver.quit()
                exit()
            
            # Extrae los datos
            workout_summary = {}
            for row in rows:
                try:
                    key_element = row.find_element(By.TAG_NAME, "span")
                    key = key_element.text.strip() if key_element else 'N/A'
                    
                    try:
                        value_element = row.find_element(By.TAG_NAME, "div")
                        value = value_element.text.strip() if value_element else row.text.strip().replace(key, '').strip()
                    except NoSuchElementException:
                        value = row.text.strip().replace(key, '').strip()
                except Exception as e:
                    print(f"    Error al procesar rutina {routine_link}: {e}")
                    continue
                
                workout_summary[key] = value

            # Extraer tablas bajo encabezados h4
            try:
                tables = driver.find_elements(By.XPATH, '//h4/following-sibling::table')
                table_data = []
                for table in tables:
                    h4_element = table.find_element(By.XPATH, 'preceding-sibling::h4[1]')
                    table_title = h4_element.text if h4_element else "Sin título"
                    rows = table.find_elements(By.TAG_NAME, "tr")
                    table_content = []
                    for row in rows:
                        cols = row.find_elements(By.TAG_NAME, "td")
                        cols_text = [col.text.strip() for col in cols]
                        table_content.append(cols_text)
                    table_data.append({"title": table_title, "content": table_content})
            except Exception as e:
                print(f"      Error al extraer tablas: {e}")
                table_data = []

            # Guardar datos de la rutina
            workout_data.append({
                "Workout Title": title,
                "Category": category_link,
                "Routine Link": routine_link,
                "Tables": table_data,
                "Summary": workout_summary
            })

        except Exception as e:
            print(f"    Error al procesar rutina {routine_link}: {e}")
            continue
        # Cerrar el navegador
driver.quit()

# Guardar los datos en un archivo JSON
with open("workout_con_resumen.json", "w") as file:
    json.dump(workout_data, file, indent=4)

print("Proceso finalizado. Datos guardados en 'workout_con_resumen.json'.")

Procesando categoría: https://www.muscleandstrength.com/workouts/fat-loss
  Encontradas 50 rutinas en esta categoría.
    Procesando rutina: 12 Week Fat Destroyer: Complete Fat Loss Workout & Diet Program
    Procesando rutina: 8 Week Fat Loss Workout for Beginners
    Procesando rutina: 8 Week Beginner Fat Loss Workout for Women
    Procesando rutina: Spring Shred: 8 Week Fat Loss Workout Plan
    Procesando rutina: Muscle & Strength’s 10 Week Women’s Fat Loss Workout
    Procesando rutina: Doug's 6 Day Cutting Routine
    Procesando rutina: Fat Blaster: 6 Day Weight/Cardio Cutting Workout
    Procesando rutina: 4 Day Dumbbell and Bodyweight Fat Loss Workout
    Procesando rutina: Fast & Furious: 21 Day Shredding Workout Cycle And Diet
    Procesando rutina: The Fat Incinerator: 8 Week Fat Burning Workout
    Procesando rutina: 3 Day Full Body Toning Workout for Women
    Procesando rutina: Doug's 5 Day High Definition Routine
    Procesando rutina: 10 Weeks to Shredded: Maximize Your

# Eliminamos las rutinas en las que no se han podido obtener la información debido a que tienen un formato diferente

In [2]:
df = pd.read_json("workout_con_resumen.json")
print(len(df))
# Contar las veces que la columna 'Tables' tiene []
empty_tables_count = df['Tables'].apply(lambda x: x == []).sum()
print(f"El número de veces que la columna 'Tables' tiene [] es: {empty_tables_count}")

684
El número de veces que la columna 'Tables' tiene [] es: 339


In [3]:
df_general = df[df['Tables'].apply(lambda x: x != [])]
print(df_general)


                                         Workout Title  \
0    12 Week Fat Destroyer: Complete Fat Loss Worko...   
3           Spring Shred: 8 Week Fat Loss Workout Plan   
4    Muscle & Strength’s 10 Week Women’s Fat Loss W...   
5                         Doug's 6 Day Cutting Routine   
6     Fat Blaster: 6 Day Weight/Cardio Cutting Workout   
..                                                 ...   
676  Scarlett Johansson Inspired Workout Routine: T...   
677  The Calorie Destroyer: 12 Week Full Body HIIT ...   
680  Jai Courtney Workout: How Jai Got Swole for Te...   
682  Chadwick Boseman Inspired Workout: Train Like ...   
683       2 Day Intense Fat Loss & Muscle Tone Workout   

                                              Category  \
0    https://www.muscleandstrength.com/workouts/fat...   
3    https://www.muscleandstrength.com/workouts/fat...   
4    https://www.muscleandstrength.com/workouts/fat...   
5    https://www.muscleandstrength.com/workouts/fat...   
6    https://

# Creamos un DataFrame con los datos obtenidos

In [4]:
category_dfs = {}

# Iterar sobre cada fila del DataFrame general
for index, row in df_general.iterrows():
    category = row['Category'].split('/')[-1]  # Obtener la categoría de la URL
    if category not in category_dfs:
        category_dfs[category] = []
    category_dfs[category].append(row)


# Mostrar cantidad de rutinas de cada categoría

In [5]:
for category, rows in category_dfs.items():
    print(f"{category}: {len(rows)}")

fat-loss: 37
abs: 7
celebrity: 40
other: 2
beginner: 18
men: 36
biceps: 8
sports: 5
bodyweight: 7
chest: 8
home: 22
shoulders: 10
strength: 20
muscle-building: 36
back: 9
women: 29
triceps: 8
cardio: 4
legs: 10
full-body: 29


# Ejecutar si queremos guardar cada categoría en un archivo csv 

In [6]:
#df_general.to_csv("rutinas_sin_nulas.csv", index=False)

# Filtrar las filas que no contienen las URLs específicas en la columna 'Category'

In [7]:
df_general = df_general[~df_general['Category'].str.contains('https://www.muscleandstrength.com/workouts/women|https://www.muscleandstrength.com/workouts/men')]


In [8]:
print(df_general)

                                         Workout Title  \
0    12 Week Fat Destroyer: Complete Fat Loss Worko...   
3           Spring Shred: 8 Week Fat Loss Workout Plan   
4    Muscle & Strength’s 10 Week Women’s Fat Loss W...   
5                         Doug's 6 Day Cutting Routine   
6     Fat Blaster: 6 Day Weight/Cardio Cutting Workout   
..                                                 ...   
676  Scarlett Johansson Inspired Workout Routine: T...   
677  The Calorie Destroyer: 12 Week Full Body HIIT ...   
680  Jai Courtney Workout: How Jai Got Swole for Te...   
682  Chadwick Boseman Inspired Workout: Train Like ...   
683       2 Day Intense Fat Loss & Muscle Tone Workout   

                                              Category  \
0    https://www.muscleandstrength.com/workouts/fat...   
3    https://www.muscleandstrength.com/workouts/fat...   
4    https://www.muscleandstrength.com/workouts/fat...   
5    https://www.muscleandstrength.com/workouts/fat...   
6    https://

# Verificamos los datos que obtenemos de la tabla resumen

In [9]:
df_fat_loss = df_general[df_general['Category'].str.contains('fat-loss')]

print(df_fat_loss.iloc[1]['Summary'])


{'Main Goal': 'Lose Fat', 'Workout Type': 'Split', 'Training Level': 'Advanced', 'Program Duration': '8 weeks', 'Days Per Week': '5', 'Time Per Workout': '45-60 minutes', 'Equipment Required': 'Barbell, Bodyweight, Cables, Dumbbells, EZ Bar, Machines', 'Target Gender': 'Male & Female', 'Recommended Supps': 'Protein Powder\nFish Oil\nMultivitamin\nFat Burner (Optional)\nPre-Workout (Optional)', 'Workout PDF': 'Download Workout'}


# Separamos los datos de la tabla resumen en distintas columnas

In [10]:

summary_expanded = pd.json_normalize(df_general['Summary'])

df_general_expanded = pd.concat([df_general, summary_expanded], axis=1)

df_general_expanded.drop(columns=['Summary'], inplace=True)

df_general_expanded['Recommended Supps'] = df_general_expanded['Recommended Supps'].str.replace('\n', ', ')

df_general_expanded.drop(columns=['Workout PDF'], inplace=True)


In [11]:
# Mostrar el dataframe resultante
print(df_general_expanded)

                                         Workout Title  \
0    12 Week Fat Destroyer: Complete Fat Loss Worko...   
3           Spring Shred: 8 Week Fat Loss Workout Plan   
4    Muscle & Strength’s 10 Week Women’s Fat Loss W...   
5                         Doug's 6 Day Cutting Routine   
6     Fat Blaster: 6 Day Weight/Cardio Cutting Workout   
..                                                 ...   
270                                                NaN   
274                                                NaN   
277                                                NaN   
278                                                NaN   
279                                                NaN   

                                              Category  \
0    https://www.muscleandstrength.com/workouts/fat...   
3    https://www.muscleandstrength.com/workouts/fat...   
4    https://www.muscleandstrength.com/workouts/fat...   
5    https://www.muscleandstrength.com/workouts/fat...   
6    https://

# Separamos las divisiones de las rutinas en distintas filas

In [12]:
# Expansión del DataFrame
data_expanded = []
df = df_general_expanded
# Iteramos por cada fila del DataFrame
for idx, row in df.iterrows():
    tables = row.get('Tables', [])
    if not isinstance(tables, list):
        continue  # Saltar si no es una lista

    for dic in tables:
        if not isinstance(dic, dict):
            continue  # Saltar si no es un diccionario

        # Extraer datos con manejo seguro de claves
        title = dic.get('title')
        content = dic.get('content', [])

        if title is not None and isinstance(content, list) and len(content) > 1:
            # Crear una nueva fila con la información expandida
            data_expanded.append({
                'title': title,
                'content': content[1:],  # Saltar el primer elemento si existe
                **{col: row[col] for col in df.columns if col != 'Tables'}
            })

# Crear un nuevo DataFrame con los datos expandidos
df_expanded = pd.DataFrame(data_expanded)

# Eliminar la columna 'Tables' si existiera (en el DataFrame original)
df_expanded.drop(columns=['Tables'], errors='ignore', inplace=True)

print("DataFrame expandido:")
df_expanded.head()

DataFrame expandido:


Unnamed: 0,title,content,Workout Title,Category,Routine Link,Main Goal,Workout Type,Training Level,Program Duration,Days Per Week,Time Per Workout,Equipment Required,Target Gender,Recommended Supps
0,Upper A,"[[Incline Bench Press, 3, 8-10], [One Arm Dumb...",12 Week Fat Destroyer: Complete Fat Loss Worko...,https://www.muscleandstrength.com/workouts/fat...,https://www.muscleandstrength.com/workouts/12-...,Lose Fat,Full Body,Beginner,12 weeks,4,30-45 minutes,"Barbell, Bodyweight, Cables, Dumbbells, Kettle...",Male & Female,"Fat Burner, Whey Protein, BCAA Intra-Workout, ..."
1,Lower A,"[[Squats, 3, 8-10], [Leg Curl, 3, 12-15], [Leg...",12 Week Fat Destroyer: Complete Fat Loss Worko...,https://www.muscleandstrength.com/workouts/fat...,https://www.muscleandstrength.com/workouts/12-...,Lose Fat,Full Body,Beginner,12 weeks,4,30-45 minutes,"Barbell, Bodyweight, Cables, Dumbbells, Kettle...",Male & Female,"Fat Burner, Whey Protein, BCAA Intra-Workout, ..."
2,Upper B,"[[Dumbbell Bench Press, 3, 10], [Barbell Row, ...",12 Week Fat Destroyer: Complete Fat Loss Worko...,https://www.muscleandstrength.com/workouts/fat...,https://www.muscleandstrength.com/workouts/12-...,Lose Fat,Full Body,Beginner,12 weeks,4,30-45 minutes,"Barbell, Bodyweight, Cables, Dumbbells, Kettle...",Male & Female,"Fat Burner, Whey Protein, BCAA Intra-Workout, ..."
3,Lower B,"[[Leg Press, 3, 15-20], [Stiff Leg Deadlift, 3...",12 Week Fat Destroyer: Complete Fat Loss Worko...,https://www.muscleandstrength.com/workouts/fat...,https://www.muscleandstrength.com/workouts/12-...,Lose Fat,Full Body,Beginner,12 weeks,4,30-45 minutes,"Barbell, Bodyweight, Cables, Dumbbells, Kettle...",Male & Female,"Fat Burner, Whey Protein, BCAA Intra-Workout, ..."
4,Day 1: Chest & Abs,"[[1. Incline Barbell Bench Press, 2x8-10, 3x10...",Spring Shred: 8 Week Fat Loss Workout Plan,https://www.muscleandstrength.com/workouts/fat...,https://www.muscleandstrength.com/workouts/8-w...,Lose Fat,Split,Advanced,6 weeks,6,60-75 minutes,"Barbell, Bodyweight, Cables, Dumbbells, EZ Bar...",Male & Female,"Protein Powder, Meal Replacement, Multivitamin..."


# Eliminamos las filas que no contienen datos en la columna 'Workout Type'

In [13]:
df_expanded = df_expanded.dropna(subset=['Workout Type'])
# Mostrar el DataFrame resultante
print(df_expanded)

                                                 title  \
0                                              Upper A   
1                                              Lower A   
2                                              Upper B   
3                                              Lower B   
4                                   Day 1: Chest & Abs   
..                                                 ...   
534      Workout 2 (Turbo Tuesday) Cardio Conditioning   
535         Workout 3 (Wild Out Wednesdays) Legs & Abs   
536  Workout 4: (Turned Up Thursday) Upper Body Wor...   
537               Workout 5 (Fired Up Friday) Cardio 2   
538  Workout 6 (Super Saturdays) Strength Workout w...   

                                               content  \
0    [[Incline Bench Press, 3, 8-10], [One Arm Dumb...   
1    [[Squats, 3, 8-10], [Leg Curl, 3, 12-15], [Leg...   
2    [[Dumbbell Bench Press, 3, 10], [Barbell Row, ...   
3    [[Leg Press, 3, 15-20], [Stiff Leg Deadlift, 3...   
4    [[1. Inc

# Eliminamos las filas que contienen la palabrea 'Phase'

In [15]:
df_expanded = df_expanded[~df_expanded['title'].str.contains('Phase', case=False)]

# Mostrar el DataFrame resultante
print(df_expanded)

                                                 title  \
0                                              Upper A   
1                                              Lower A   
2                                              Upper B   
3                                              Lower B   
4                                   Day 1: Chest & Abs   
..                                                 ...   
534      Workout 2 (Turbo Tuesday) Cardio Conditioning   
535         Workout 3 (Wild Out Wednesdays) Legs & Abs   
536  Workout 4: (Turned Up Thursday) Upper Body Wor...   
537               Workout 5 (Fired Up Friday) Cardio 2   
538  Workout 6 (Super Saturdays) Strength Workout w...   

                                               content  \
0    [[Incline Bench Press, 3, 8-10], [One Arm Dumb...   
1    [[Squats, 3, 8-10], [Leg Curl, 3, 12-15], [Leg...   
2    [[Dumbbell Bench Press, 3, 10], [Barbell Row, ...   
3    [[Leg Press, 3, 15-20], [Stiff Leg Deadlift, 3...   
4    [[1. Inc

# Buscamos las distintos objetivos de las rutinas

In [16]:
unique_main_goals = df_expanded['Main Goal'].unique()
print(unique_main_goals)

['Lose Fat' 'Build Muscle' 'General Fitness' 'Increase Strength'
 'Sports Performance' 'Increase Endurance']


# Buscamos diferentes tipos de dificultad

In [17]:
unique_training_levels = df_expanded['Training Level'].unique()
print(unique_training_levels)

['Beginner' 'Advanced' 'Intermediate']


# Creamos el CSV con los datos finales

In [18]:
df_expanded.to_csv('Definitivo.csv')

In [19]:
print(df_expanded)

                                                 title  \
0                                              Upper A   
1                                              Lower A   
2                                              Upper B   
3                                              Lower B   
4                                   Day 1: Chest & Abs   
..                                                 ...   
534      Workout 2 (Turbo Tuesday) Cardio Conditioning   
535         Workout 3 (Wild Out Wednesdays) Legs & Abs   
536  Workout 4: (Turned Up Thursday) Upper Body Wor...   
537               Workout 5 (Fired Up Friday) Cardio 2   
538  Workout 6 (Super Saturdays) Strength Workout w...   

                                               content  \
0    [[Incline Bench Press, 3, 8-10], [One Arm Dumb...   
1    [[Squats, 3, 8-10], [Leg Curl, 3, 12-15], [Leg...   
2    [[Dumbbell Bench Press, 3, 10], [Barbell Row, ...   
3    [[Leg Press, 3, 15-20], [Stiff Leg Deadlift, 3...   
4    [[1. Inc