In [1]:
import os
import requests
import pandas as pd
from tqdm import tqdm
from glob import glob
from datetime import datetime
from zeep import Client, helpers

### Loading in the parameters 

In [2]:
API = pd.read_csv("api.csv")['api'].values[0]
PRODUCTION_URL = "https://openweb.nlb.gov.sg/OWS/CatalogueService.svc?singleWsdl"
client = Client(wsdl=PRODUCTION_URL)

### Looking at my books 

In [3]:
book_list = glob("/Users/cliff/main/sides/nlb/book_price/*.csv")

In [4]:
df = pd.DataFrame()
for book in book_list:
    df = df.append(pd.read_csv(book))

In [5]:
df[df.scraped_title == "Functional Training and Beyond: Building the Ultimate Superfunctional Body and Mind (Building Muscle and Performance, Weight Training, Men's Health)"]

Unnamed: 0,given_title,scraped_title,price
0,The Emperors Handbook Meditations,Functional Training and Beyond: Building the U...,27.32


In [6]:
df = df[df.scraped_title != 'The Cute Book of Kawaii Coloring: Learn Japanese Words by Coloring Cute Things']
df = df[df.scraped_title != 'High-Performance Without Pain: A Companion Workbook for High-Achievers and Those Aspiring To Be']
df = df[df.scraped_title != "Functional Training and Beyond: Building the Ultimate Superfunctional Body and Mind (Building Muscle and Performance, Weight Training, Men's Health)"]

df.reset_index(drop=True, inplace=True)
df.shape

(234, 3)

In [7]:
df.head()

Unnamed: 0,given_title,scraped_title,price
0,Seven steps to train your mind,Seven Steps to Train Your Mind,56.74
1,The Obstacle Is The Way,The Obstacle Is the Way: The Timeless Art of T...,23.54
2,Growth Hacker Marketing,Growth Hacker Marketing: A Primer on the Futur...,19.48
3,"Trust Me, I’m Lying: Confession of a Media Man...","Trust Me, I'm Lying: Confessions of a Media Ma...",22.89
4,"Making Habits, Breaking Habits","Making Habits, Breaking Habits: Why We Do Thin...",27.18


#### Search Feature 
- Quick search function for getting specific book details

In [51]:
def search_bk_by_title(title: str):
    search_input = {
        "APIKey": API,
        "SearchItems": {
            "SearchItem": [
                { "SearchField": "Title", "SearchTerms": title }, 
                { "SearchField": "MediaCode", "SearchTerms": 'BK' },
                { "SearchField": "Language", "SearchTerms": 'English' }
            ]
        },
        "Modifiers": {
            "SortSchema": None,
            "StartRecordPosition": 1,
            "MaximumRecords": 10,
            "SetId": None
        }
    }

    return search_input

def get_book_details_by_title(title: str) -> pd.DataFrame:
    try:
        book_detail_output = helpers.serialize_object(client.service.Search(**search_bk_by_title(title)))
        tmp_table = pd.DataFrame(book_detail_output.get('Titles').get('Title'))
    
    except:
        tmp_table = pd.DataFrame([[None] * 7],
            columns=['BID', 'ISBN', 'TitleName', 'Author', 'PublishYear', 'MediaCode', 'MediaDesc'])
    
    tmp_table['search_title'] = title
    return tmp_table

In [53]:
main = pd.DataFrame()

In [52]:
# Tested this
get_book_details_by_title("Predictive irrational")

Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
0,,,,,,,,Predictive irrational


In [44]:
df[df['given_title'].str.contains('Predict')]

Unnamed: 0,given_title,scraped_title,price
26,Predictive irrational,"Predictably Irrational, Revised and Expanded E...",24.18
205,The Physics of Wall Street: A Brief History of...,The Physics of Wall Street: A Brief History of...,56.06


In [54]:
for given_title in tqdm(df['given_title'].tolist()):
    main = main.append(get_book_details_by_title(given_title))

100%|██████████████████████████████████████████████████████████████████████| 234/234 [02:59<00:00,  1.30it/s]


In [55]:
for scraped_title in tqdm(df['scraped_title'].tolist()):
    main = main.append(get_book_details_by_title(scraped_title))

100%|██████████████████████████████████████████████████████████████████████| 234/234 [02:04<00:00,  1.88it/s]


In [56]:
main.shape

(1051, 8)

In [57]:
main[main.search_title.str.contains('Predict')]

Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
0,,,,,,,,Predictive irrational
0,14688206.0,0547317271 ()|9780547317274 (),The physics of Wall Street : a brief history o...,"Weatherall, James Owen.",2013.0,BK,Books,The Physics of Wall Street: A Brief History of...
0,,,,,,,,"Predictably Irrational, Revised and Expanded E..."
0,202137421.0,9780143125082 ((pbk.))|0143125087 ((pbk.)),The signal and the noise : why so many predict...,"Silver, Nate,",2015.0,BK,Books,The Signal and the Noise: Why So Many Predicti...
1,203468951.0,9781101595954 (electronic bk),The signal and the noise : why so many predict...,"Silver, Nate,",2012.0,BK,Books,The Signal and the Noise: Why So Many Predicti...
0,,,,,,,,The Physics of Wall Street: A Brief History of...


In [89]:
sub_main = main.drop_duplicates()
sub_main['search_title'] = [i.strip() for i in sub_main.search_title]
sub_main = sub_main[~sub_main.ISBN.fillna('x').str.contains('electronic')]
sub_main.sort_values(['TitleName', 'PublishYear'], inplace=True)
sub_main.drop_duplicates('TitleName', inplace=True)
sub_main.drop_duplicates(['Author', 'search_title'], inplace=True)
sub_main = sub_main[(sub_main.ISBN.notnull()) & (sub_main.TitleName.notnull())]
sub_main = sub_main.append(main[main.TitleName.isnull()])
sub_main = sub_main.drop_duplicates()

sub_main.sort_values(['search_title', 'TitleName'], inplace=True)
sub_main.reset_index(drop=True, inplace=True)
print(sub_main.shape)
sub_main.head(10)

(371, 8)


Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
0,12580833.0,0672327279 (paperback),1337 h4x0r h4ndb00k / Tapeworm.,"Tapeworm,",2005.0,BK,Books,1337 h4x0r H4ndb00k
1,14227569.0,9780307476463 (paperback)|0307593312 (hardcove...,1Q84 / Haruki Murakami ; translated from the J...,"Murakami, Haruki,",2011.0,BK,Books,1Q84
2,202848452.0,9780997722901 ((paperback) :)|0997722908 ((pap...,The global novel : writing the world in the 21...,"Kirsch, Adam,",2016.0,BK,Books,1Q84
3,200372552.0,9780141047973 (paperback)|0141047976 (paperback),23 things they don't tell you about capitalism...,"Chang, Ha-Joon,",2011.0,BK,Books,23 things they don't tell you about capitalism
4,,,,,,,,50 Artist You Should Know
5,202701875.0,9783791381695 ((paperback))|3791381695 ((paper...,50 artists you should know / Thomas Köster ; w...,"Köster, Thomas,",2016.0,BK,Books,50 Artists: You Should Know
6,205500298.0,9781492062417 (paperback)|1492062413 (paperback),97 things every data engineer should know : co...,,2021.0,BK,Books,97 Things Every Data Engineer Should Know
7,,,,,,,,A Data-Driven Company: 21 Lessons for large or...
8,,,,,,,,A Data-Driven Company: 21 lessons for large or...
9,,,,,,,,A Guide To A Good Life: The Ancient Art of Sto...


In [90]:
search_checks = sub_main.search_title.value_counts().reset_index()
search_checks.columns = ['search_title', 'cnts']
print(search_checks[search_checks.cnts > 1].shape)
search_checks[search_checks.cnts > 1]

(37, 2)


Unnamed: 0,search_title,cnts
0,Tipping Point,8
1,Outliers,8
2,Essentialism,7
3,The End of Poverty,7
4,Breaking and Entering,6
5,Measure What Matters,6
6,Barking Up The Wrong Tree,5
7,The Paradox of Choice,5
8,Sensemaking,4
9,Crucial Conversations,4


In [96]:
sub_main[sub_main.search_title.str.contains('The End of Poverty')]

Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
270,13603920,1586488244 (hardcover)|9781586488246 (hardcover),Building social business : the new kind of cap...,"Yunus, Muhammad,",2010,BK,Books,The End of Poverty
271,13079284,9787208071339 (),Pin qiong de zhong jie : wo men shi dai de jin...,"Sachs, Jeffrey.",2007,BK,Books,The End of Poverty
272,203943243,3030147630 (hardcover)|9783030147631 (hardcove...,The end of poverty : inequality and growth in ...,"Edward, Peter,",2019,BK,Books,The End of Poverty
273,200156330,9780385525817 ()|0385525818 (),The idealist : Jeffrey Sachs and the quest to ...,"Munk, Nina,",2013,BK,Books,The End of Poverty
274,200561886,9780199937875 (hardback acidfree paper)|019993...,The locust effect : why the end of poverty req...,"Haugen, Gary A.,",2014,BK,Books,The End of Poverty
275,202806971,9780316471893 ((hardcover))|0316471895 ((hardc...,Utopia for realists : how we can build the ide...,"Bregman, Rutger,",2017,BK,Books,The End of Poverty
276,202250421,9781612051406 (hc alkaline paper)|1612051405 (...,Watching human rights : the 101 best films / b...,"Gibney, Mark.",2013,BK,Books,The End of Poverty


In [97]:
get_book_details_by_title('The End of Poverty')

Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
0,13603920,1586488244 (hardcover)|9781586488246 (hardcover),Building social business : the new kind of cap...,"Yunus, Muhammad,",2010,BK,Books,The End of Poverty
1,203297824,9781101643280 (electronic bk),The end of poverty [electronic resource] : Eco...,"Sachs, Jeffrey D.",2006,BK,Books,The End of Poverty
2,12525586,0141018666 (paperback),The end of poverty : how we can make it happen...,"Sachs, Jeffrey.",2005,BK,Books,The End of Poverty
3,203943243,3030147630 (hardcover)|9783030147631 (hardcove...,The end of poverty : inequality and growth in ...,"Edward, Peter,",2019,BK,Books,The End of Poverty
4,203246958,,The end of poverty (summary) [electronic resou...,"Sachs, Jeffrey D.",2017,BK,Books,The End of Poverty
5,200156330,9780385525817 ()|0385525818 (),The idealist : Jeffrey Sachs and the quest to ...,"Munk, Nina,",2013,BK,Books,The End of Poverty
6,200561886,9780199937875 (hardback acidfree paper)|019993...,The locust effect : why the end of poverty req...,"Haugen, Gary A.,",2014,BK,Books,The End of Poverty
7,13079284,9787208071339 (),Pin qiong de zhong jie : wo men shi dai de jin...,"Sachs, Jeffrey.",2007,BK,Books,The End of Poverty
8,202806971,9780316471893 ((hardcover))|0316471895 ((hardc...,Utopia for realists : how we can build the ide...,"Bregman, Rutger,",2017,BK,Books,The End of Poverty
9,202250421,9781612051406 (hc alkaline paper)|1612051405 (...,Watching human rights : the 101 best films / b...,"Gibney, Mark.",2013,BK,Books,The End of Poverty


In [92]:
sub_main[sub_main.search_title.str.contains('A Data-Driven')]

Unnamed: 0,BID,ISBN,TitleName,Author,PublishYear,MediaCode,MediaDesc,search_title
7,,,,,,,,A Data-Driven Company: 21 Lessons for large or...
8,,,,,,,,A Data-Driven Company: 21 lessons for large or...


### Some data cleaning 

In [None]:
df['given_title'] = [i.replace(
    "Dark Pools: The rise of A.I. trading machines and the looming threat to Wall Street.", "Dark Pools") for i in df.given_title]

df['given_title'] = [i.replace("Predictive irrational", "Predictably irrational") for i in df.given_title]

df[df.given_title.str.contains("Predictably")]

In [None]:
get_book_details("Title", "0007256531")

In [None]:
get_book_details("Title", "Predictably irrational The Hidden Forces That Shape Our Decisions")

In [None]:
df[df.titlename.isnull()]

In [None]:
df[df.titlename.notnull()].head(30)

In [None]:
df['isbn'] = [get_book_isbn('Title', i) for i in tqdm(df['scraped_title'])]

In [None]:
get_book_isbn('Title', "Web Scraping with Python collecting")

In [None]:
df[df.isbn.isnull()]

#### Available Titles by Library 

In [None]:
get_avail = {
    "APIKey": API,
    "BID": 204485571,
    "Modifiers" : {
        "SortSchema": None,
        "StartRecordPosition": 1,
        "MaximumRecords": 100,
        "SetId": None
    },
}

In [None]:
avail_info = client.service.GetAvailabilityInfo(**get_avail)

In [None]:
df = pd.DataFrame()
for i in pd.DataFrame(helpers.serialize_object(avail_info).get("Items").values()).T[0]:
    df = df.append(pd.DataFrame.from_dict(i, orient='index').T)
df['bid'] = 204485571
df.head()

#### Get Title Details 

In [None]:
title_inputs = {
    "APIKey": API,
    # "BID": 204485571,
    "ISBN": 9780007256532
}

In [None]:
title_details = client.service.GetTitleDetails(**title_inputs)
title_details

#### Read Recommendations - Doesn't seem to work

In [None]:
# get_recom = {
#     "APIKey": API,
#     "BID": 204485571,
#     "Modifiers" : {
#         "SortSchema": None,
#         "StartRecordPosition": 1,
#         "MaximumRecords": 100,
#         "SetId": None
#     },
# }

# PRODUCTION_URL = "http://openweb.nlb.gov.sg/ows/ReadAlikeService.svc"
# client = Client(wsdl=PRODUCTION_URL)
# vars(client.wsdl)

# get_recommendations = client.readalikeservice.GetRecommendationsForTitles(**get_recom)
# get_recommendations