In [1]:
%load_ext autoreload
%autoreload 2

from bs4 import BeautifulSoup as bs
from zeep import Client, helpers
from tqdm import tqdm
from glob import glob
import pandas as pd
import numpy as np
import rpa as r

import warnings
import pygsheets
import math
import time
import re
import os

# Some notebook configs
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', 1000)

In [2]:
from nlb_fun import *
from nlb_api_fun import *

### NLB section
#### Login 
- My username and password are saved locally on my machine, and are needed to load into the signup page. 
- I am using RPA, and executing it on headless_mode=True

In [3]:
# r.init(headless_mode=True)
r.init()

url_link = "https://cassamv2.nlb.gov.sg/cas/login"
r.url(url_link)

auth_csv_file: str = os.environ['nlb_login']

info = pd.read_csv(auth_csv_file)
account_name: str = info['values'][0]
password: str = info['values'][1]

r.type('//*[@id="username"]', f'{account_name}')
r.type('//*[@id="password"]', f'{password}')

login_button_2 = """//*[@id="fm1"]/section/input[4]"""
r.click(login_button_2)

True

#### Finding pagination 

In [5]:
r.url("https://www.nlb.gov.sg/mylibrary/Bookmarks")
time.sleep(5)

soup = bs(r.read('page'), 'html5')
soup.find_all("div", text=re.compile("Showing"))[0].text.split(" ")[-2]

max_records = float(soup.find_all("div", text=re.compile("Showing"))[0].text.split(" ")[-2])
range_list = range(1, int(math.ceil(max_records / 20)) + 1)

# To indicate when the NEXT button is at
counter = range_list[-1] + 2
print(counter)

5


#### Calculating number of bookmarked books

In [6]:
book_urls_dict = dict()
soup = bs(r.read('page'), 'html5')
book_urls_dict[0] = list(set(get_book_urls_on_page(soup)))

for i in range(1,counter+1):
    print(i)
    time.sleep(2)
    click_thru_pages = f'//*[@id="bookmark-folder-content"]/nav/ul/li[{counter}]/a'
    r.click(click_thru_pages)
    time.sleep(2)
    soup = bs(r.read('page'), 'html5')
    book_urls_dict[i] = list(set(get_book_urls_on_page(soup)))

r.close()

1
2
3
4
5


True

#### Getting the collection of bookmarked books 

In [None]:
all_book_url_lists = list()
for i in range(0, len(book_urls_dict)):
    all_book_url_lists = all_book_url_lists + book_urls_dict[i]

unique_books = set(all_book_url_lists)
list_of_book_bids = [re.findall(r'\d+', i)[-1] for i in list(unique_books)]
print(f"No of unique books: {len(list_of_book_bids)}")

#### Making NLB API calls

In [None]:
# bid_no = list_of_book_bids[0]

df = pd.DataFrame()
bid_w_issues = list()
for bid_no in tqdm(list_of_book_bids):
    try:
        avail_book_obj = make_get_avail_api_call(bid_no)
        avail_book_df = df_get_avail_data(bid_no, avail_book_obj)

        title_detail_obj = make_get_title_details_api_call(bid_no)
        title_detail_df = df_get_title_data(title_detail_obj)
        
        final_book_df = final_book_avail_df(avail_book_df, title_detail_df)
        final_book_df['url'] = return_needed_url(bid_no)
        
        df = df.append(final_book_df)
    except:
        bid_w_issues.append(bid_no)

In [None]:
df.shape

In [None]:
bid_w_issues

#### Processing files to loading into Google Sheets 

In [None]:
final_table = df[['TitleName', 'BranchName', 'CallNumber', 'StatusDesc', 'url']]
final_table.columns = ['title', "library", "number", 'availability', 'url']
final_table = final_table[['library', 'title', 'number', 'availability', 'url']]
unique_book_count = len(final_table.title.drop_duplicates().tolist())
unique_book_count

#### Processing

In [None]:
final_table = final_table[final_table.availability.notnull()]
final_table = final_table[final_table.availability != "For Reference Only"]
final_table.title = [i.split(" | ")[0] for i in final_table.title]
final_table.loc[final_table.library == "Repository Used Book Collection", 'availability'] = "For Reference Only"
final_table['title'] = [i.split(r"/")[0].strip() for i in final_table['title']]
final_table.availability = [i.replace("Not on Loan", "Available") for i in final_table.availability]
final_table.sort_values(['library', 'title'], inplace=True)

### Pushing the into Google
#### Authenitcation

In [None]:
google_auth = os.environ['gsheet_cred']
gc = pygsheets.authorize(service_file=google_auth)
sh = gc.open('NLB Project')

#### Checking in all libraries

In [None]:
all_ = sh.worksheet_by_title("All")
all_.clear('A2:F1000') 

all_.set_dataframe(final_table,(1,1))

#### [Link](https://docs.google.com/spreadsheets/d/1s5oYU59jyU_QO3IIhCClyWGoC_MpW9L_h4l4djDUKO0/edit#gid=1021888748) to my Google Sheet