In [1]:
from selenium import webdriver as wd

import chromedriver_binary
import random
import time
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
driver = wd.Chrome()
driver.implicitly_wait(10)



# Login

In [3]:
credentials = dict(line.strip().split('=') for line in open('docker/leisure_centre.properties'))

In [4]:
email_address = driver.find_element_by_xpath('//*[@id="account-login-email"]')
email_address.send_keys(credentials["username"])

WebDriverException: Message: chrome not reachable
  (Session info: chrome=106.0.5249.119)


In [None]:
password = driver.find_element_by_xpath('//*[@id="account-login-password"]')
password.send_keys(credentials["password"])

In [7]:
#submitLoginSpan = wd.find_element_by_id("account-login-submit-message")
#submitLoginButton = submitLoginSpan.find_element_by_xpath("..")
#submitLoginButton.click()

In [8]:
#submitLoginSpan = wd.find_element_by_id("account-login-submit-message")
#submitLoginButton = submitLoginSpan.parent
#submitLoginButton.click()

In [9]:
submitLoginButton = driver.find_element_by_xpath('//span[@id="account-login-submit-message"]/ancestor::button')
submitLoginButton.click()

# Make A Booking

First I tried to retrieve the Book Now button using a simple XPATH expression:

In [10]:


bookNowButton = driver.find_element_by_xpath("//a[@data-test-id='account-bookings-dropins']")
#bookNowButton = driver.find_element(By.XPATH, "//a[@data-test-id='account-bookings-dropins']")
bookNowButton.click()

ElementNotInteractableException: Message: element not interactable
  (Session info: chrome=106.0.5249.119)


However I was getting an error, saying that the button was not interactible and clickable. 

That was strange since I could see that the button was visible and I can click on it.

Then, just to confirm I wasn't going insane, I used the browser to copy the full XPATH for the node.


In [None]:
bookNowButton = driver.find_element_by_xpath("/html/body/div/div[2]/div/landing-page-base/div/div/div[2]/landing-page-bookings-base/div/div[2]/account-booking-actions/div/div/div/a")
bookNowButton.click()

I don't like doing this hack, but it does work. But why?

It turns out that there is more than one link in the page for "Book Now". One is visible, and the other one is invisible. This is because the website is responsive. Who-ever
developed this website decided that they needed two separate links for booking the courts. One for Mobile and the other for desktop screens.

Knowing that there are more than one button for "Book now" in the page, but only one is visible, we can do a check to see if the HTML element is visible before clicking on it:

In [None]:
bookNowButtons = driver.find_elements_by_xpath("//a[@data-test-id='account-bookings-dropins']")
#/html/body/div/div[2]/div/landing-page-base/div/div/div[2]/landing-page-bookings-base/div/div[2]/account-booking-actions/div/div/div/a
type(bookNowButtons)

for bookNowButton in bookNowButtons:
    if bookNowButton.is_displayed():
        bookNowButton.click()


It works! But, are we not complicating things?
The <a> tag we are clicking on, points to a URL, which we can call directly after login, so perhaps we should just do this:

In [11]:
driver.get("https://legacyleisure.legendonlineservices.co.uk/erith/bookingscentre/index")

## Remove Pre-selection of club - as we want to be searching for more than one club - inside a method name

In [12]:
driver.find_element_by_class_name("select2-selection__choice__remove").click()

## Select Search field if the droddown not already expanded

In [13]:

select_site = driver.find_element_by_xpath("//span[contains(@class, 'select2-selection')]")
is_expanded = select_site.get_attribute("aria-expanded") == 'true'
print(is_expanded)
if not is_expanded:
    print("About to open dropdown")
    wd.find_element_by_xpath("//span[@class='selection']").click()

True


We select the club where we want to do our search. More than one way of doing it. We could do a search query, or 
like I show below, I found the <option> element inside the <select> dropdown that I wanted and I clicked on it:

In [14]:
driver.find_element_by_xpath('//option[text()="Sidcup"]').click()

Oh, but this doesn't seem to work the same as the UI. So I must find another way to select the club, so that the remainder of the Javascript works properly.

In [19]:
driver.find_element_by_xpath("//li[text()='Sidcup']").click()

That seems to work. Now I can select the category.

## Selecting the Category

We want to look for availability in the Sports Hall


In [20]:
driver.find_element_by_xpath('//input[@type="radio" and @data-test-id="bookings-category-categories-racket-sports"]').click()

## Selecting the activities

We want to book in this case Badminton 60 minutes

In [21]:
driver.find_element_by_xpath('//input[@type="checkbox" and @data-test-id="bookings-activities-activity-badminton-60-mins"]').click()

We could select more than one activity. But in this case we only care about badminton!

## Viewing the Timetable

In [22]:
driver.find_element_by_xpath('//button[@data-test-id="bookings-viewtimetable"]').click()

# Displaying the Results

Now the real scraping action begins. We want to store the results in a Panda Dataframe, as this will make it much more easy to make complicated queries on our data.

In this website in particular, it shows us the slots available starting, from today up to 5 days in the future. What we want to do is to collect this data and add it to a
table for our own use.


## Let's create an empty Panda Data Frame with the columns that we want

In [23]:
import pandas as pd

df = pd.DataFrame([], columns=["Location", "Date", "Time", "Availability"])
df

Unnamed: 0,Location,Date,Time,Availability


## Now that we know roughly what information we want, let's parse the HTML and fill in our table

In [24]:
soup = BeautifulSoup(driver.page_source)

In [25]:
soup.findAll("div", {"class": "activityBox"})

[]

In [26]:
rows = soup.findAll("div", {"class": "activityBox"})
for row in rows:
    print("row")
    timeOfDay= row.find("div", {"class": "timeOfDay"})
    available = row.find("div", {"class": "spaceDetailsText"})
    print(timeOfDay.text)
    print(available.text)
    
# find what the date is using the date picker

datePicker = driver.find_element_by_xpath("//span[@class='Zebra_DatePicker_Icon_Wrapper']/input")
# date
print(datePicker.get_attribute('value'))
#soup.find("span", {"class", "Zebra_DatePicker_Icon_Wrapper"})


16 Oct 2022


## Let's try to put this into a neat Panda DataFrame

In [27]:
import pandas as pd

datePicker = driver.find_element_by_xpath("//span[@class='Zebra_DatePicker_Icon_Wrapper']/input")
current_date = datePicker.get_attribute('value')

activity_rows = soup.findAll("div", {"class": "activityBox"})
df_rows = []
for activity_row in activity_rows:
    location = activity_row.find("div", {"class": "activityDetailsMajor"})
    timeOfDay= activity_row.find("div", {"class": "timeOfDay"})
    available = activity_row.find("div", {"class": "spaceDetailsText"})
    df_rows.append([current_date,location.text, timeOfDay.text, available.text])
    
df = pd.DataFrame(df_rows, columns=["Date", "Location", "Time", "Availability"])


In [28]:
df

Unnamed: 0,Date,Location,Time,Availability


## Before we proceed further we should start creating functions, so we don't have to copy and past the same code multiple times


In [29]:
from selenium.webdriver.common.keys import Keys
import datetime
from datetime import date
import time

def getAvailabilityForDate(queryDate):
    queryDateStr = queryDate.strftime("%d %b %Y")
    # find date picker
    datePicker = driver.find_element_by_xpath("//span[@class='Zebra_DatePicker_Icon_Wrapper']/input")
    datePicker.clear()
    # set date
    datePicker.send_keys(queryDateStr)
    datePicker.send_keys(Keys.TAB);
    # we need to wait because otherwise we pick an empty list of results
    # there are different techniques to ensure that we have a fresh list of results
    # for instance we could wait for a cer
    current_date = datePicker.get_attribute('value')
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source)
    activity_rows = soup.findAll("div", {"class": "activityBox"})
    df_rows = []
    for activity_row in activity_rows:
        location = activity_row.find("div", {"class": "activityDetailsMajor"})
        timeOfDay= activity_row.find("div", {"class": "timeOfDay"})
        available = activity_row.find("div", {"class": "spaceDetailsText"})
        df_rows.append([current_date,location.text, timeOfDay.text, available.text])

    df = pd.DataFrame(df_rows, columns=["Date", "Location", "Time", "Availability"])

    return df


In [30]:
getAvailabilityForDate(date.today())

Unnamed: 0,Date,Location,Time,Availability


## The date picker accepts dates in the format of DAY MONTH YEAR. Let's find a way to construct this automatically based on the current date.

In [31]:
import datetime
from datetime import date

In [32]:
date.today()

datetime.date(2022, 10, 16)

In [33]:
date.today().strftime("%d %b %Y")

'16 Oct 2022'

In [34]:
availability = getAvailabilityForDate(date.today() + datetime.timedelta(days=5))

#  Now Let's Filter based on Availability

An hour is available if 1 or more slots are available. Or put in another way, if the availability is not set to Full.

In [35]:
availability

Unnamed: 0,Date,Location,Time,Availability
0,21 Oct 2022,Sidcup,06:00,Full
1,21 Oct 2022,Sidcup,07:00,4 Slots
2,21 Oct 2022,Sidcup,08:00,4 Slots
3,21 Oct 2022,Sidcup,09:00,Full
4,21 Oct 2022,Sidcup,10:00,Full
5,21 Oct 2022,Sidcup,11:00,Full
6,21 Oct 2022,Sidcup,12:00,Full
7,21 Oct 2022,Sidcup,13:00,Full
8,21 Oct 2022,Sidcup,14:00,Full
9,21 Oct 2022,Sidcup,15:00,Full


In [36]:
availability[availability["Availability"] != "Full"]

Unnamed: 0,Date,Location,Time,Availability
1,21 Oct 2022,Sidcup,07:00,4 Slots
2,21 Oct 2022,Sidcup,08:00,4 Slots
11,21 Oct 2022,Sidcup,17:00,4 Slots
12,21 Oct 2022,Sidcup,18:00,4 Slots
13,21 Oct 2022,Sidcup,19:00,3 Slots
14,21 Oct 2022,Sidcup,20:00,2 Slots
15,21 Oct 2022,Sidcup,21:00,3 Slots


# Converting a column to a another data type with Pandas
First we replace "N Slots" with N using a regular expression

In [37]:
# complicated version with regex
availability["Availability"].str.replace(r"(\d+) Slots", lambda m: m.group(1))
# simple version
availability["Availability"] = availability["Availability"].str.replace(" Slots", "").replace("Full", "0")

  availability["Availability"].str.replace(r"(\d+) Slots", lambda m: m.group(1))


In [38]:
availability

Unnamed: 0,Date,Location,Time,Availability
0,21 Oct 2022,Sidcup,06:00,0
1,21 Oct 2022,Sidcup,07:00,4
2,21 Oct 2022,Sidcup,08:00,4
3,21 Oct 2022,Sidcup,09:00,0
4,21 Oct 2022,Sidcup,10:00,0
5,21 Oct 2022,Sidcup,11:00,0
6,21 Oct 2022,Sidcup,12:00,0
7,21 Oct 2022,Sidcup,13:00,0
8,21 Oct 2022,Sidcup,14:00,0
9,21 Oct 2022,Sidcup,15:00,0


In [39]:
availability.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Date          16 non-null     object
 1   Location      16 non-null     object
 2   Time          16 non-null     object
 3   Availability  16 non-null     object
dtypes: object(4)
memory usage: 640.0+ bytes


As we can see all the columns have the data type object. This type is used when the types in a column are mixed. If we want to use for instance the time to filter based on a query of the hour of the day, or the availability based on the number of slots, we ought to convert the data types!

Let's start with converting our availability to an integer, so we can filter availability based on the number of slots.

In [40]:
# we can do
availability['Availability'].astype(int)

# or 
availability['Availability'] = pd.to_numeric(availability['Availability'])

In [41]:
availability.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Date          16 non-null     object
 1   Location      16 non-null     object
 2   Time          16 non-null     object
 3   Availability  16 non-null     int64 
dtypes: int64(1), object(3)
memory usage: 640.0+ bytes


Now we can filter based on the number of slots. This could be useful if we were trying to book for a larger group.

In [42]:
availability[availability["Availability"] > 3]

Unnamed: 0,Date,Location,Time,Availability
1,21 Oct 2022,Sidcup,07:00,4
2,21 Oct 2022,Sidcup,08:00,4
11,21 Oct 2022,Sidcup,17:00,4
12,21 Oct 2022,Sidcup,18:00,4


# Filtering by time
Time can also be a constraint. So let's try and convert our time to a timestamp, and see if we can do queries based on time of the day.
In this case the time column seems fairly inocuous. Date + Time could b econverted to a timestamp.

In [43]:
pd.to_datetime(availability["Date"] + " " + availability["Time"], format="%d %b %Y %H:%M")

0    2022-10-21 06:00:00
1    2022-10-21 07:00:00
2    2022-10-21 08:00:00
3    2022-10-21 09:00:00
4    2022-10-21 10:00:00
5    2022-10-21 11:00:00
6    2022-10-21 12:00:00
7    2022-10-21 13:00:00
8    2022-10-21 14:00:00
9    2022-10-21 15:00:00
10   2022-10-21 16:00:00
11   2022-10-21 17:00:00
12   2022-10-21 18:00:00
13   2022-10-21 19:00:00
14   2022-10-21 20:00:00
15   2022-10-21 21:00:00
dtype: datetime64[ns]

Now we know how to convert the date and time to a datime object, so we can do queries based on the date plus time.

In [35]:
from selenium.webdriver.common.keys import Keys
import datetime
from datetime import date
import time
import json

def startBooking(driver):
    bookNowButton = driver.find_element_by_xpath("/html/body/div/div[2]/div/landing-page-base/div/div/div[2]/landing-page-bookings-base/div/div[2]/account-booking-actions/div/div/div/a")
    bookNowButton.click()
    
def selectClub(club, driver):
    driver.find_element_by_class_name("select2-selection__choice__remove").click()
    
    select_site = driver.find_element_by_xpath("//span[contains(@class, 'select2-selection')]")
    is_expanded = select_site.get_attribute("aria-expanded") == 'true'
    print(is_expanded)
    driver.find_element_by_xpath(f"//li[text()='{club}']").click()
    
    if not is_expanded:
        print("About to open dropdown")
        wd.find_element_by_xpath("//span[@class='selection']").click()

def selectHome(driver):
    driver.find_element_by_xpath("//*[@id='lgd-navigation-menu-options']/li[1]/a/span[2]")

def selectBadminton(driver):
    driver.find_element_by_xpath('//input[@type="radio" and @data-test-id="bookings-category-categories-racket-sports"]').click()
    driver.find_element_by_xpath('//input[@type="checkbox" and @data-test-id="bookings-activities-activity-badminton-60-mins"]').click()
    driver.find_element_by_xpath('//button[@data-test-id="bookings-viewtimetable"]').click()
    
def loginToSportsCenterWebsite():
    driver = wd.Chrome()
    driver.implicitly_wait(10)
    
    driver.get("https://legacyleisure.legendonlineservices.co.uk/crook_log/account/login")
    credentials = dict(line.strip().split('=') for line in open('docker/leisure_centre.properties'))
    email_address = driver.find_element_by_xpath('//*[@id="account-login-email"]')
    email_address.send_keys(credentials["username"])
    
    password = driver.find_element_by_xpath('//*[@id="account-login-password"]')
    password.send_keys(credentials["password"])
    
    submitLoginButton = driver.find_element_by_xpath('//span[@id="account-login-submit-message"]/ancestor::button')
    submitLoginButton.click()
    return driver

# Using Dataframe.apply() to apply function to every row
def calculate_max_duration(row, all_available_times):
    startTime = row["Time"]
    duration = 1
    previousTime = startTime
    while True:
        nextTime = previousTime + datetime.timedelta(hours=1)         
        indices = all_available_times["Time"] == nextTime
        if not all_available_times.loc[indices].empty:
           duration+=1
           previousTime = nextTime
        else:
           break
        
    return duration
        

def getAvailabilityForDate(queryDate, driver):
    queryDateStr = queryDate.strftime("%d %b %Y")
    # find date picker
    datePicker = driver.find_element_by_xpath("//span[@class='Zebra_DatePicker_Icon_Wrapper']/input")
    datePicker.clear()
    # set date
    datePicker.send_keys(queryDateStr)
    datePicker.send_keys(Keys.TAB);
    # we need to wait because otherwise we pick an empty list of results
    # there are different techniques to ensure that we have a fresh list of results
    # for instance we could wait for a cer
    current_date = datePicker.get_attribute('value')
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source)
    activity_rows = soup.findAll("div", {"class": "activityBox"})
    df_rows = []
    for activity_row in activity_rows:
        location = activity_row.find("div", {"class": "activityDetailsMajor"})
        timeOfDay= activity_row.find("div", {"class": "timeOfDay"})
        available = activity_row.find("div", {"class": "spaceDetailsText"})
        df_rows.append([current_date,location.text, timeOfDay.text, available.text])

    df = pd.DataFrame(df_rows, columns=["Date", "Location", "Time", "Availability"])
    # complicated version with regex
    df["Availability"].str.replace(r"(\d+) Slots", lambda m: m.group(1))
    # simple version
    df["Availability"] = df["Availability"].str.replace(" Slots", "").replace("Full", "0")
    df['Availability'] = pd.to_numeric(df['Availability'])
    df['Time'] = pd.to_datetime(df["Date"] + " " + df["Time"], format="%d %b %Y %H:%M")

    return df



def findAvailableSlots(clubs, earliestDate=date.today(), numberOfDaysInFuture=1, earliestTime="8:00", latestTime="21:00", numberOfHours=1, slots=1):
    ''' Finds slots available using the given search criteria:
    club: the name of the club for which we want to search for availability
    earliestDate: the earliest date for which we want to look for available slots. If not specified, it defaults to today.
    numberOfDaysInTheFuture: 
    earliestTime: the earliest time for which we are searching for slots
    latestTime: the latest time for which we are searching for slots
    numberOfHours: How long we want to play for
    slots: How many Slots?
    '''
    # login
    driver = loginToSportsCenterWebsite()
    
    for club in clubs:
    
        startBooking(driver)
        selectClub(club, driver)
        selectBadminton(driver)
        startDate = earliestDate

        dframes= []
        for n in range(numberOfDaysInFuture+1):
            df = getAvailabilityForDate(startDate + datetime.timedelta(days=n), driver)
            # filter by number of slots
            df = df[df["Availability"] > slots]
            # filter by time range
            index = pd.DatetimeIndex(df['Time'])
            df = df.iloc[index.indexer_between_time(earliestTime, latestTime)]
            # # filter by desired duration
            dframes.append(df)
            # go back to home so we can search for availability in next club
            selectHome(driver)



        df = pd.concat(dframes)
        df["Max Duration"] = df.apply(lambda x:calculate_max_duration(x,df), axis=1)
        df = df[df["Max Duration"] >= numberOfHours]


        json_result = df.to_json(orient="records", date_format="iso")
        print(json_result)
       
    driver.close()
    return {}

In [36]:
range(2)

range(0, 2)

In [37]:
json_result = findAvailableSlots(["Erith","Sidcup"], numberOfDaysInFuture=5, slots=1, earliestTime="19:00", latestTime="21:00", numberOfHours=2 )  


json_formatted_str = json.dumps(json.loads(json_result), indent=4)
print(json_formatted_str)

True


  df["Availability"].str.replace(r"(\d+) Slots", lambda m: m.group(1))


[{"Date":"26 Oct 2022","Location":"Erith","Time":"2022-10-26T20:00:00.000Z","Availability":4,"Max Duration":2}]


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div/div[2]/div/landing-page-base/div/div/div[2]/landing-page-bookings-base/div/div[2]/account-booking-actions/div/div/div/a"}
  (Session info: chrome=106.0.5249.119)


In [109]:
import json
display(df)
json_result = df.to_json(orient="records", date_format="iso")
print(type(json_result))
json_formatted_str = json.dumps(json.loads(json_result), indent=4)
print(json_formatted_str)

'[{"Date":"19 Oct 2022","Location":"Erith","Time":"2022-10-19T20:00:00.000Z","Availability":4,"Max Duration":2}]'

AttributeError: 'str' object has no attribute 'to_json'

In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 11 to 13
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          4 non-null      object        
 1   Location      4 non-null      object        
 2   Time          4 non-null      datetime64[ns]
 3   Availability  4 non-null      int64         
 4   Max Duration  4 non-null      int64         
dtypes: datetime64[ns](1), int64(2), object(2)
memory usage: 192.0+ bytes


In [90]:
df

Unnamed: 0,Date,Location,Time,Availability,Max Duration
12,19 Oct 2022,Erith,2022-10-19 20:00:00,4,2


In [53]:
# Using Dataframe.apply() to apply function to every row
def calculate_max_duration(row):
    startTime = row["Time"]
    duration = 1
    previousTime = startTime
    while True:
        nextTime = previousTime + datetime.timedelta(hours=1)         
        indices = df["Time"] == nextTime
        if not df.loc[indices].empty:
           duration+=1
           previousTime = nextTime
        else:
           break
        
    return duration
        

df['Max Duration'] = df.apply(calculate_max_duration, axis=1)
df

Unnamed: 0,Date,Location,Time,Availability,Max Duration
11,18 Oct 2022,Sidcup,2022-10-18 17:00:00,3,1
11,20 Oct 2022,Sidcup,2022-10-20 17:00:00,3,2
12,20 Oct 2022,Sidcup,2022-10-20 18:00:00,3,1
11,21 Oct 2022,Sidcup,2022-10-21 17:00:00,4,4
12,21 Oct 2022,Sidcup,2022-10-21 18:00:00,4,3
13,21 Oct 2022,Sidcup,2022-10-21 19:00:00,3,2
14,21 Oct 2022,Sidcup,2022-10-21 20:00:00,2,1


In [13]:
!pip install discord.py

Collecting discord.py
  Downloading discord.py-2.0.1-py3-none-any.whl (1.1 MB)
Collecting aiohttp<4,>=3.7.4
  Downloading aiohttp-3.8.3-cp38-cp38-win_amd64.whl (324 kB)
Collecting yarl<2.0,>=1.0
  Downloading yarl-1.8.1-cp38-cp38-win_amd64.whl (56 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.2-cp38-cp38-win_amd64.whl (28 kB)
Collecting aiosignal>=1.1.2
  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting frozenlist>=1.1.1
  Downloading frozenlist-1.3.1-cp38-cp38-win_amd64.whl (34 kB)
Collecting async-timeout<5.0,>=4.0.0a3
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Installing collected packages: multidict, frozenlist, yarl, async-timeout, aiosignal, aiohttp, discord.py
Successfully installed aiohttp-3.8.3 aiosignal-1.2.0 async-timeout-4.0.2 discord.py-2.0.1 frozenlist-1.3.1 multidict-6.0.2 yarl-1.8.1


In [23]:
!python docker/src/bot.py

^C


In [32]:
import json
from dateutil import parser

json_response = '[{"Date":"26 Oct 2022","Location":"Erith","Time":"2022-10-26T20:00:00.000Z","Availability":4,"Max Duration":2}]'
json_response_obj = json.loads(json_response)

for available_time in json_response_obj:
    date = available_time["Date"]
    time = available_time["Time"]
    time_obj = parser.parse(time)
    date_to_str = time_obj.strftime("%H:%M")
   
    number_of_courts= available_time["Availability"]
    max_duration=available_time["Max Duration"]
    location = available_time["Location"]
    response_item = f"There is availability in {location} on the {date} at {date_to_str} for {number_of_courts} court(s) for {max_duration}h"
    print(response_item)
 

There is availability in Erith on the 26 Oct 2022 at 20:00 for 4 court(s) for 2h


datetime.datetime(2022, 10, 26, 20, 0, tzinfo=tzutc())