### Scraping NLB to know the books that I borrowed

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import re
import os
import time

import warnings
import pygsheets
import numpy as np
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup as bs

# Some notebook configs
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', 1000)

#### Load in self created functions 

In [3]:
from nlb_fun import *

In [4]:
browser = activate_chrome_selenium_latest(is_headless=False)



Current google-chrome version is 98.0.4758
Get LATEST chromedriver version for 98.0.4758 google-chrome
Driver [/Users/cliff/.wdm/drivers/chromedriver/mac64/98.0.4758.102/chromedriver] found in cache


### Log in first! 

In [5]:
auth_csv_file: str = os.environ['nlb_login']

info = pd.read_csv(auth_csv_file)
account_name: str = info['values'][0]
password: str = info['values'][1]

browser = log_in_nlb(browser, account_name, password)

### Loop through the pages! 

In [6]:
loans_link = "https://www.nlb.gov.sg/mylibrary/Loans"
browser.get(loans_link)

time.sleep(5)

soup = bs(browser.page_source, "html5lib")

In [7]:
table_col = []
table_cells = []

for table in soup.find_all("table", class_="table table-bordered table-striped table-list bg-white"):
    for row in table.find_all('th'):
        table_col.append(row.text)
    
    for row in table.find_all('td'):
        table_cells.append(row.text)

table_col = table_col[:5]

In [8]:
browser.close()

### Preparing raw data to push into G Drive

In [9]:
books = pd.DataFrame(np.array(table_cells).reshape(int(len(table_cells)/5), 5))

books.columns = ['no', 'title', 'code', 'due', 'renewed']
books = books[['title', 'code', 'due']]

for i in ['title', 'code', 'due']:
    books[i] = [re.sub(' +', ' ', i.replace("\n", "")).strip() for i in books[i]]

books['title'] = [i.replace("Title: ", "").strip() for i in books['title']]
books['code'] = [i.replace("Barcode: ", "").strip() for i in books['code']]
books['due'] = [i.replace("Due on ", "") .strip() for i in books['due']]

In [10]:
books

Unnamed: 0,title,code,due
0,Beyond bitcoin : decentralised finance and the end of banks,B37191135H,19 Mar 2022
1,"Practical Docker with Python : build, release, and distribute your Python app with Docker",B37144716K,30 Mar 2022
2,Software engineering at Google : lessons learned from programming over time,B36384927I,30 Mar 2022


### Authenticate into G Drive and push data into G Drive

In [11]:
google_auth = os.environ['gsheet_cred']
gc = pygsheets.authorize(service_file=google_auth)

sh = gc.open('NLB Project')
wks = sh.worksheet_by_title("Current_borrowed")
wks.clear('A2:D17')

wks.update_value('D2', "=ARRAYFORMULA(C2:C{}-E1)".format(books.shape[0] + 1))
wks.update_value('C19', "Average:")
wks.update_value('D19', "=AVERAGE(D2:17)")

wks.set_dataframe(books,(1,1))

### [Link](https://docs.google.com/spreadsheets/d/1s5oYU59jyU_QO3IIhCClyWGoC_MpW9L_h4l4djDUKO0/edit#gid=1021888748) to my Google Sheet