# Setup

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support.expected_conditions import element_to_be_clickable
from selenium.webdriver.firefox.options import Options

from bs4 import BeautifulSoup

import time
from datetime import datetime
import re
import json

In [4]:
with open("config.json", "r") as f:
    config = json.load(f)

GYM_ID = config["GYM_ID"]  # One Playground Surry Hills
USERNAME = config["USERNAME"]
PASSWORD = config["PASSWORD"]

## Helpers

In [5]:
def get_url(date, gym_id):
    year = date[0:4]
    month = date[4:6]
    day = date[6:8]
    url = f"https://clients.mindbodyonline.com/classic/mainclass?studioid={gym_id}&tg=&vt=&lvl=&stype=&view=&trn=0&page=&catid=&prodid=&date={month}%2f{day}%2f{year}&classid=0&prodGroupId=&sSU=&optForwardingLink=&qParam=&justloggedin=&nLgIn=&pMode=0&loc=1"
    return url

In [6]:
# test get_url
today = time.strftime("%Y%m%d")
get_url(today, GYM_ID)

'https://clients.mindbodyonline.com/classic/mainclass?studioid=152065&tg=&vt=&lvl=&stype=&view=&trn=0&page=&catid=&prodid=&date=10%2f29%2f2023&classid=0&prodGroupId=&sSU=&optForwardingLink=&qParam=&justloggedin=&nLgIn=&pMode=0&loc=1'

In [7]:
def rowgetDataText(tr, coltag="td"):  # td (data) or th (header)
    row_data = [td.get_text(strip=True) for td in tr.find_all(coltag)]

    # find the SignupButton and extract its 'name' attribute
    signup_button = tr.find(class_="SignupButton")
    if signup_button:
        signup_button_name = signup_button.get("name", "N/A")
    else:
        signup_button_name = "N/A"

    row_data.append(signup_button_name)

    return row_data

In [8]:
def tableDataText(table):
    """Parses a html segment started with tag <table> followed
    by multiple <tr> (table rows) and inner <td> (table data) tags.
    It returns a list of rows with inner columns.
    Accepts only one <th> (table header/data) in the first row.
    """
    rows = []
    trs = table.find_all("tr")
    headerow = rowgetDataText(trs[0], "th")
    if headerow:  # if there is a header row include first
        rows.append(headerow + ["SignupButton Name"])
        trs = trs[1:]
    for tr in trs:  # for every table row
        rows.append(rowgetDataText(tr, "td"))  # data row
    return rows

In [9]:
def clean_html(html):
    # replace all \xa0 with space
    html = html.replace("\xa0", " ")

    # replace all &nbsp; with space
    html = html.replace("&nbsp;", " ")

    # replace all &amp; with &
    html = html.replace("&amp;", "&")

    # replace all &quot; with "
    html = html.replace("&quot;", '"')

    return html

In [10]:
def find_availability(input_string):
    # Define the regular expression pattern
    pattern = r"\((\d+)&nbsp;Reserved,&nbsp;(\d+)&nbsp;Open\)"

    # Search for the pattern in the input string
    match = re.search(pattern, input_string)

    if match:
        # Extract the numbers
        x = int(match.group(1))
        y = int(match.group(2))
        return x, y
    else:
        return None, None

In [11]:
def convert_date(date_str):
    # Use regular expression to extract day, month, and year
    match = re.search(r"([a-zA-Z]{3})(\d{1,2}) ([a-zA-Z]+) (\d{4})", date_str)
    if match:
        day_of_week = match.group(1)
        day = int(match.group(2))
        month = match.group(3)
        year = int(match.group(4))

        # Convert month name to month number
        month_number = datetime.strptime(month, "%B").month

        # Create a datetime object
        dt = datetime(year, month_number, day)

        return dt
    else:
        return None

In [12]:
def parse_time_and_timezone(time_str):
    # Use regular expression to extract time and timezone
    match = re.search(r"(\d{1,2}:\d{2} [APMapm]{2})\s+([A-Z]{3,4})", time_str)
    if match:
        time_12hr = match.group(1)
        timezone = match.group(2)

        # Convert 12-hour time to datetime object
        dt_obj = datetime.strptime(time_12hr, "%I:%M %p")

        # Convert to 24-hour time
        time_24hr = dt_obj.time().strftime("%H:%M:%S")

        return time_24hr, timezone
    else:
        return None, None

In [13]:
def parse_instructor(instructor_str):
    # Use regular expression to extract instructor name and optional number
    match = re.search(r"([a-zA-Z\s]+)(?:\((\d+)\))?", instructor_str)
    if match:
        instructor_name = match.group(1).strip()
        is_replacement = bool(match.group(2))
        return instructor_name, is_replacement
    else:
        return None, False

In [14]:
def parse_duration(duration_str):
    # Initialize total_minutes to 0
    total_minutes = 0

    # Use regular expression to extract hours and minutes
    hours_match = re.search(r"(\d+) hour", duration_str)
    minutes_match = re.search(r"(\d+) minute", duration_str)

    # If hours are present, convert to minutes and add to total_minutes
    if hours_match:
        hours = int(hours_match.group(1))
        total_minutes += hours * 60

    # If minutes are present, add to total_minutes
    if minutes_match:
        minutes = int(minutes_match.group(1))
        total_minutes += minutes

    return total_minutes

In [15]:
def parse_availability(availability_str):
    # Use regular expression to extract reserved and open spaces
    match = re.search(r"(\d+) Reserved, (\d+) Open", availability_str)
    if match:
        reserved_spaces = int(match.group(1))
        open_spaces = int(match.group(2))
        return reserved_spaces, open_spaces
    else:
        return None, None

In [16]:
def print_classes(gym_classes):
    date = gym_classes[0].date  # first day of the week

    print("=" * 50)
    print(
        date.strftime("%A %d of %B")
        + f" ({gym_classes[0].time_zone}) at {gym_classes[0].location}"
    )

    for gym_class in gym_classes:
        if gym_class.date != date:
            date = gym_class.date
            print("=" * 50)
            print(
                date.strftime("%A %d of %B")
                + f" ({gym_classes[0].time_zone}) at {gym_classes[0].location}"
            )
        print(gym_class)

In [17]:
def strikethrough(mytext):
    """replacing space with 'non-break space' and striking through"""
    return "\u0336".join(mytext.replace(" ", "\u00a0")) + "\u0336"

## Custom Class

In [18]:
class GymClass:
    def __init__(
        self,
        id,
        date,
        start_time,
        time_zone,
        spaces_booked,
        spaces_available,
        class_name,
        instructor,
        is_replacement,
        assistant1,
        assistant2,
        location,
        sub_location,
        duration,
        signup_button_name,
    ):
        self.date = date
        self.id = id
        self.start_time = start_time
        self.time_zone = time_zone
        self.spaces_booked = spaces_booked
        self.spaces_available = spaces_available
        self.class_name = class_name
        self.instructor = instructor
        self.is_replacement = is_replacement
        self.assistant1 = assistant1
        self.assistant2 = assistant2
        self.location = location
        self.sub_location = sub_location
        self.duration = duration
        self.signup_button_name = signup_button_name

        try:
            self.spaces_total = self.spaces_booked + self.spaces_available
        except:
            self.spaces_total = None
        self.is_full = self.spaces_booked == self.spaces_total

    def __str__(self):
        if self.signup_button_name == "N/A":
            full = "-"
        elif self.is_full:
            full = "X"
        else:
            full = "O"

        if self.spaces_booked == None:  # class has happened already
            availability = "Finished"
        else:
            availability = (
                f"{self.spaces_booked}/{self.spaces_available + self.spaces_booked}"
            )

        if self.is_replacement:
            instructor = f"{self.instructor} (R)"
        else:
            instructor = self.instructor

        return_string = f"({self.id}) {self.start_time[0:5]} ({full}): {self.class_name} ({availability}) by {instructor} for {self.duration} minutes."

        if self.spaces_booked == None:  # Class has happened already
            return strikethrough(return_string)

        return return_string

    def to_dict(self):
        return {
            "id": self.id,
            "date": self.date,
            "start_time": self.start_time,
            "time_zone": self.time_zone,
            "spaces_booked": self.spaces_booked,
            "spaces_available": self.spaces_available,
            "class_name": self.class_name,
            "instructor": self.instructor,
            "is_replacement": self.is_replacement,
            "assistant1": self.assistant1,
            "assistant2": self.assistant2,
            "location": self.location,
            "sub_location": self.sub_location,
            "duration": self.duration,
            "signup_button_name": self.signup_button_name,
        }

## Scrape Data

In [19]:
def setup_driver(headless=True):
    firefox_options = Options()
    if headless:
        firefox_options.add_argument("--headless")
        return webdriver.Firefox(options=firefox_options)
    return webdriver.Firefox(options=firefox_options)

In [20]:
def wait_for_element_presence(driver, element_id, timeout=10):
    wait = WebDriverWait(driver, timeout)
    try:
        wait.until(presence_of_element_located((By.ID, element_id)))
    except:
        print(f"Element {element_id} not found, timeout reached.")

In [21]:
def wait_for_element_clickable(driver, element_id, timeout=10):
    wait = WebDriverWait(driver, timeout)
    try:
        wait.until(element_to_be_clickable((By.ID, element_id)))
    except:
        print(f"Element {element_id} not clickable or not found, timeout reached.")

In [22]:
def get_element_html(driver, element_id):
    element = driver.find_element(By.ID, element_id)
    return element.get_attribute("outerHTML")

In [23]:
def get_table(url, headless):
    with setup_driver(headless) as driver:
        driver.get(url)

        # Wait and click on Group Training tab
        wait_for_element_presence(driver, "tabA7")
        driver.find_element(By.ID, "tabA7").click()

        # Get table for this week
        wait_for_element_presence(driver, "classSchedule-mainTable")
        table_current_html = get_element_html(driver, "classSchedule-mainTable")
        print("Succesfully retrieved table for this week")

        # Get table for next week
        driver.find_element(By.ID, "week-arrow-r").click()
        wait_for_element_presence(driver, "classSchedule-mainTable")
        table_next_html = get_element_html(driver, "classSchedule-mainTable")
        print("Succesfully retrieved table for next week")

        # clean the html
        table_current_html, table_next_html = map(
            clean_html, [table_current_html, table_next_html]
        )
        print("Tables cleaned")

        # close the browser
        driver.close()
        print("Driver closed")

    return table_current_html, table_next_html

In [24]:
# table_current_html, table_next_html = get_table(get_url(today, GYM_ID), headless=True)

## Parse HTML

In [25]:
def replace_empty_with_none(row):
    return [None if x == "" else x for x in row]

In [26]:
def generate_class_id(date, counter):
    return date.strftime("%Y%m%d") + str(counter).zfill(2)

In [27]:
def parse_row(row, date, counter):
    start_time, time_zone = parse_time_and_timezone(row[0])

    if row[1] is None:
        spaces_booked, spaces_available = None, None
    else:
        spaces_booked, spaces_available = parse_availability(row[1])

    instructor, is_replacement = parse_instructor(row[3])

    return GymClass(
        id=generate_class_id(date, counter),
        date=date,
        start_time=start_time,
        time_zone=time_zone,
        spaces_booked=spaces_booked,
        spaces_available=spaces_available,
        class_name=row[2],
        instructor=instructor,
        is_replacement=is_replacement,
        assistant1=row[4],
        assistant2=row[5],
        location=row[6],
        sub_location=row[7],
        duration=parse_duration(row[8]),
        signup_button_name=row[9],
    )

In [28]:
def parse_html_table(table):
    gym_classes = []
    soup = BeautifulSoup(table, "html.parser")
    table = soup.find("table")
    list_table = tableDataText(table)

    # replace all empty strings with None
    list_table = [replace_empty_with_none(row) for row in list_table]

    date = convert_date(list_table[0][0])
    counter = 0

    for row in list_table[1:]:
        # Check if row is a date row
        if len(row) == 2:
            date = convert_date(row[0])
            counter = 0
            continue

        gym_class = parse_row(row, date, counter)
        gym_classes.append(gym_class)

        counter += 1

    return gym_classes

In [29]:
# gym_classes_current = parse_html_table(table_current_html)
# gym_classes_next = parse_html_table(table_next_html)

# print_classes(gym_classes_current)
# print_classes(gym_classes_next)

## Display Data

## Book a Class

In [30]:
def find_class_by_id(classes, class_id):
    try:
        return next(x for x in classes if x.id == str(class_id))
    except StopIteration:
        raise ValueError(
            "Class ID not found in classes. Please check the class ID is correct."
        )

In [31]:
def print_class_info(class_object):
    date_string = class_object.date.strftime("%A %d of %B %Y")
    print(f"Date:       {date_string}")
    print(f"Time:       {class_object.start_time} ({class_object.time_zone})")
    print(f"Class:      {class_object.class_name}")
    print(f"Instructor: {class_object.instructor}")
    print(f"Location:   {class_object.location}")
    print("...")

In [32]:
def book_class(classes, class_id, headless):
    class_object = find_class_by_id(classes, class_id)

    assert not class_object.is_full, "Class is already full."
    assert class_object.signup_button_name != "N/A", "Class is not yet bookable."

    # print information about the class
    print("Attempting to book the following class:")
    print("=" * 50)
    print_class_info(class_object)

    class_id = class_object.id
    # initial date for url
    date = class_object.date.strftime("%d/%m/%Y")
    url = get_url(class_object.date.strftime("%Y%m%d"), GYM_ID)

    with setup_driver(headless) as driver:
        driver.get(url)

        wait_for_element_presence(driver, "btnSignIn")

        # Set the date
        wait_for_element_presence(driver, "txtDate")
        date_input = driver.find_element(By.ID, "txtDate")
        date_input.send_keys(Keys.BACKSPACE * 11)
        date_input.send_keys(date)
        date_input.send_keys(Keys.ENTER)

        time.sleep(2)

        # Find the signup button
        signup_button = driver.find_element(By.NAME, class_object.signup_button_name)

        # Check availability one last time
        row_html = signup_button.find_element(By.XPATH, "./../..").get_attribute(
            "outerHTML"
        )
        reserved, open = find_availability(row_html)
        assert open > 0, "Class is full, consider running get_table() again."

        signup_button.click()

        # Fill in details
        wait_for_element_clickable(driver, "su1UserName")
        driver.find_element(By.ID, "su1UserName").send_keys(USERNAME)
        driver.find_element(By.ID, "su1Password").send_keys(PASSWORD)
        driver.find_element(By.ID, "su1Password").send_keys(Keys.ENTER)

        # Confirm booking
        wait_for_element_presence(driver, "SubmitEnroll2")
        driver.find_element(By.ID, "SubmitEnroll2").click()

        # Verify Booking
        wait_for_element_presence(driver, "notifyBooking")
        print("Class booked successfully!")

        driver.close()

    return

# Run

In [33]:
current_week_raw, next_week_raw = get_table(get_url(today, GYM_ID), headless=True)
current_week_classes, next_week_classes = map(
    parse_html_table, [current_week_raw, next_week_raw]
)
all_classes = current_week_classes + next_week_classes

Succesfully retrieved table for this week
Succesfully retrieved table for next week
Tables cleaned
Driver closed


In [34]:
book_class(all_classes, 2023103107, headless=True)

Attempting to book the following class:
Date:       Tuesday 31 of October 2023
Time:       08:15:00 (AEDT)
Class:      Strong
Instructor: Angela Aho
Location:   Surry Hills One Playground
...
Class booked successfully!
