### FireFox Selenium 

In [18]:
import time
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup as bs
import re
import math

options = Options()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("--disable-features=NetworkService")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-features=VizDisplayCompositor")

In [8]:
def log_in_nlb(browser, account_name: str, password: str):
    """ Logins into the NLB app, and returns selenium browser object
    """

    # Go login page
    browser.get('https://cassamv2.nlb.gov.sg/cas/login')
    time.sleep(1)
    
    account_info = [account_name, password]
    tag_info = ["""//*[@id="username"]""", """//*[@id="password"]"""]
    
    for info, tag in zip(account_info, tag_info):
        browser.find_element("xpath", tag).send_keys("{}".format(info))
        time.sleep(1)
    
    # Click login
    browser.find_element("xpath", """//*[@id="fm1"]/section/input[4]""").click()
    return browser

In [10]:
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
log_in_nlb(driver, os.environ['nlb_login_account'], os.environ['nlb_login_pw'])



Current google-chrome version is 104.0.5112
Get LATEST chromedriver version for 104.0.5112 google-chrome
Driver [/Users/cliff/.wdm/drivers/chromedriver/mac64/104.0.5112.79/chromedriver] found in cache


<selenium.webdriver.chrome.webdriver.WebDriver (session="4021333bf83685206a6b6fbffacc0e51")>

In [19]:
url_link = "https://www.nlb.gov.sg/mylibrary/Bookmarks"
driver.get(url_link)
time.sleep(5)
soup = bs(driver.page_source, 'html5lib')

max_records = float(soup.find_all("div", text=re.compile("Showing"))[0].text.split(" ")[-2])
range_list = range(1, int(math.ceil(max_records / 20)) + 1)

# To indicate when the NEXT button is at
counter = range_list[-1] + 2
print(counter)
range_list

5


range(1, 4)

### Old code 

In [None]:
%load_ext autoreload
%autoreload 2

from bs4 import BeautifulSoup as bs
from zeep import Client, helpers
from tqdm import tqdm
from glob import glob
import pandas as pd
import numpy as np
import rpa as r

import warnings
import math
import time
import re
import os

# Some notebook configs
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', 1000)

In [None]:
from nlb_fun import *
from nlb_api_fun import *

### NLB section
#### Login 
- My username and password are saved locally on my machine, and are needed to load into the signup page. 
- I am using RPA, and executing it on headless_mode=True

In [None]:
def rpa_nlb_login(account_name, pw):
    
    r.init()
    
    r.url("https://cassamv2.nlb.gov.sg/cas/login")
    r.type('//*[@id="username"]', f'{account_name}')
    r.type('//*[@id="password"]', f'{password}')

    r.click("""//*[@id="fm1"]/section/input[4]""")

    return r

account_name = os.environ['nlb_login_account']
password = os.environ['nlb_login_pw']

In [None]:
r = rpa_nlb_login(account_name, password)

#### Login tests 

In [None]:
# blank account_name, password
account_name = ""
password = ""

r.init()    
r.url("https://cassamv2.nlb.gov.sg/cas/login")

In [None]:
r.type('//*[@id="username"]', f'{account_name}')
r.type('//*[@id="password"]', f'{password}')

r.click("""//*[@id="fm1"]/section/input[4]""")

In [None]:
r.close()

#### Finding pagination 

In [None]:
r.url("https://www.nlb.gov.sg/mylibrary/Bookmarks")
time.sleep(5)

soup = bs(r.read('page'), 'html5')
soup.find_all("div", text=re.compile("Showing"))[0].text.split(" ")[-2]

max_records = float(soup.find_all("div", text=re.compile("Showing"))[0].text.split(" ")[-2])
range_list = range(1, int(math.ceil(max_records / 20)) + 1)

# To indicate when the NEXT button is at
counter = range_list[-1] + 2
print(counter)

#### Calculating number of bookmarked books

In [None]:
book_urls_dict = dict()
soup = bs(r.read('page'), 'html5')
book_urls_dict[0] = list(set(get_book_urls_on_page(soup)))

for i in range(1,counter+1):
    print(i)
    time.sleep(2)
    click_thru_pages = f'//*[@id="bookmark-folder-content"]/nav/ul/li[{counter}]/a'
    r.click(click_thru_pages)
    time.sleep(2)
    soup = bs(r.read('page'), 'html5')
    book_urls_dict[i] = list(set(get_book_urls_on_page(soup)))

r.close()

#### Getting the collection of bookmarked books 

In [None]:
all_book_url_lists = list()
for i in range(0, len(book_urls_dict)):
    all_book_url_lists = all_book_url_lists + book_urls_dict[i]

unique_books = set(all_book_url_lists)
list_of_book_bids = [re.findall(r'\d+', i)[-1] for i in list(unique_books)]
print(f"No of unique books: {len(list_of_book_bids)}")

#### Making NLB API calls

In [None]:
df = pd.DataFrame()
bid_w_issues = list()
for bid_no in tqdm(list_of_book_bids):
    try:
        avail_book_obj = make_get_avail_api_call(bid_no)
        avail_book_df = df_get_avail_data(bid_no, avail_book_obj)

        title_detail_obj = make_get_title_details_api_call(bid_no)
        title_detail_df = df_get_title_data(title_detail_obj)
        
        final_book_df = final_book_avail_df(avail_book_df, title_detail_df)
        final_book_df['url'] = return_needed_url(bid_no)
        
        df = df.append(final_book_df)
    except:
        bid_w_issues.append(bid_no)

In [None]:
df.shape

In [None]:
bid_w_issues

#### Processing files to loading into Google Sheets 

In [None]:
final_table = df[['TitleName', 'BranchName', 'CallNumber', 'StatusDesc', 'url']]
final_table.columns = ['title', "library", "number", 'availability', 'url']
final_table = final_table[['library', 'title', 'number', 'availability', 'url']]
unique_book_count = len(final_table.title.drop_duplicates().tolist())
unique_book_count

#### Processing

In [None]:
final_table = final_table[final_table.availability.notnull()]
final_table = final_table[final_table.availability != "For Reference Only"]
final_table.title = [i.split(" | ")[0] for i in final_table.title]
final_table.loc[final_table.library == "Repository Used Book Collection", 'availability'] = "For Reference Only"
final_table['title'] = [i.split(r"/")[0].strip() for i in final_table['title']]
final_table.availability = [i.replace("Not on Loan", "Available") for i in final_table.availability]
final_table.sort_values(['library', 'title'], inplace=True)