In [1]:
import pandas as pd
from datetime import date
import requests
from bs4 import BeautifulSoup

# Create a function to extract data from a URL
def extract_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        extracted_data = soup.select_one(".entry-content")
        if extracted_data:
            return extracted_data.get_text(separator="\n", strip=True)
        else:
            print(f"No data found in {url}")
            return "No Data"
    else:
        print(f"Failed to retrieve data from {url}")
        return "No Workout Today"

# Create a function to extract sections from text
def extract_section(text, start_marker, end_marker):
    start = text.find(start_marker)
    if start != -1:
        start += len(start_marker)  # Move the start index after the start_marker
        if end_marker:
            end = text.find(end_marker, start)
        else:
            end = len(text)
        if end != -1:
            return text[start:end].strip()
        else:
            return text[start:].strip()
    return ''


# Create a DataFrame with dates
start = date(2022, 1, 1)
end = date(2022, 12, 31)
dates = pd.date_range(start, end, freq="D", name='Date').to_frame()
dates["Month"] = dates['Date'].dt.strftime('%B')
dates["Day"] = dates['Date'].dt.strftime('%-d')
dates["Year"] = dates['Date'].dt.strftime('%Y')
dates["Day_name"] = dates['Date'].dt.day_name()
dates = dates.loc[dates["Day_name"] != 'Sunday'] # no workouts posted on Sundays
dates['url'] = 'https://www.crossfitinvictus.com/wod/' + dates.Month + '-' + dates.Day + '-' + dates.Year + '-performance/'

# Apply the extract_data function to fetch content from URLs
dates['ExtractedData'] = dates['url'].apply(extract_data)

# Apply the extract_section function to extract different sections
dates["Warm_Up"] = dates['ExtractedData'].apply(lambda x: extract_section(x, "Warm-Up", "A."))
dates["A"] = dates['ExtractedData'].apply(lambda x: extract_section(x, "A.", "B."))
dates["B"] = dates['ExtractedData'].apply(lambda x: extract_section(x, "B.", ""))

# Drop the 'ExtractedData' column
dates.drop(columns=['ExtractedData'], inplace=True)

#export
csv_filename = '/Python/data/cf_invictus_WOD_data_2022.csv'
dates.to_csv(csv_filename, index=False)

: 

: 