In [1]:
# Endpoints Ref: https://platform.worldcat.org/api-explorer/apis/wcapi

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import json
import re

column_names = ['streaming','clio','wc_holdings','discard','title','bib_id','oclc_num',
                 'format','author','holdings_sup','bib_sup','tag_965','lp','historic_circs',
                     'comments','reserve_past_few','location']

# Passing in column names as a parameter in read_excel().
# Update inplace to avoid creating a new object (useful for larger dataframes).
# Fill in NaN values for new columns.

df = pd.read_excel("cat_data.xlsx", names = column_names)
df.dropna(subset = ['oclc_num'], inplace = True)
df.reset_index(drop = True, inplace = True)
df["clio_y_n"] = ""
df["num_libs"] = ""
df = df.astype({'oclc_num': 'int32'})

In [2]:
df.head()

Unnamed: 0,streaming,clio,wc_holdings,discard,title,bib_id,oclc_num,format,author,holdings_sup,bib_sup,tag_965,lp,historic_circs,comments,reserve_past_few,location,clio_y_n,num_libs
0,n,n,132.0,n,Bright Red [sound recording] / Laurie Anderson.,9062442,227189789,MUSICAL SOUND RECORDING,"Anderson, Laurie.",Y,Y,,,,,,bar,,
1,,,,,"Chain : farm, factory, and the fate of our foo...",11043454,894893030,NONMUSICAL SOUND RECORDING,"Genoways, Ted,",Y,Y,,,,,,bar,,
2,n,n,39.0,m,Egmont [sound recording] / Beethoven.,398584,80373612,MUSICAL SOUND RECORDING,"Beethoven, Ludwig van, 1770-1827.",Y,Y,,y,,Is this an LP??? We don't even have the capaci...,,bar,,
3,y,y,733.0,y,Evolution [sound recording] / the United State...,4108195,44124520,MUSICAL SOUND RECORDING,United States Air Force Band.,Y,N,,,,,,bar,,
4,n,n,1.0,n,Guido Cantelli [sound recording].,9162461,427008859,MUSICAL SOUND RECORDING,,Y,Y,,,,,,bar,,


In [3]:
#### Creating URLs####

API_Key = ""
OCLC_Sym = "ZCU"
base_url = "http://www.worldcat.org/webservices/catalog"

#urls for cul system
def create_clio_urls(oclc):
    search_endpoint = f"/content/libraries/{oclc}&oclcsymbol={OCLC_Sym}?wskey={API_Key}"
    url = base_url + search_endpoint
    return url

#urls for number of libraries
def create_lib_urls(oclc):
    search_endpoint = f"/content/libraries/{oclc}&frbrGrouping=off&maximumLibraries=100&format=json?wskey={API_Key}"
    url = base_url + search_endpoint
    return url

In [10]:
#### Actual API call, for now on a truncated version of the DF ####

#test df for api
#df_trun = df.head()

#api call to determine if oclc exists in the clio system and how many libraries have it

# We say that the holding exists in CLIO's catalogue if there is no message, because the message will say
# "Holding Not Found" if the holding doesn't exist in CLIO. Therefore, no message means it does exist.
def parse_response_clio(clio_url):
    clio_response = requests.get(clio_url)
    clio_soup = BeautifulSoup(clio_response.content,"lxml")
    if not clio_soup.message:
        df.at[index, 'clio_y_n'] = "y"
    else:
        df.at[index, 'clio_y_n'] = 'n'

def parse_response_lib(lib_url):
    lib_response  = requests.get(lib_url)
    lib_soup  = BeautifulSoup(lib_response.content,"html.parser")
    s = lib_soup.text
    try: 
        a = re.search(r'\b(totalLibCount)\b', s)
        position_of_value = a.start() + 15
        value_string = s[position_of_value: position_of_value+10]
        value = int(''.join(filter(str.isdigit, value_string)))
        df.at[index, 'num_libs'] = value
    except:
        df.at[index, 'num_libs'] = ''

In [5]:
for index, row in df.iterrows():
    clio_url = create_clio_urls(row['oclc_num'])
    lib_url = create_lib_urls(row['oclc_num'])
    parse_response_clio(clio_url)
    parse_response_lib(lib_url)

In [7]:
df[-10:]

Unnamed: 0,streaming,clio,wc_holdings,discard,title,bib_id,oclc_num,format,author,holdings_sup,bib_sup,tag_965,lp,historic_circs,comments,reserve_past_few,location,clio_y_n,num_libs
8594,,,,,Åuvre pour piano [sound recording] / Heitor V...,2133593,28369923,MUSICAL SOUND RECORDING,"Villa-Lobos, Heitor.",Y,Y,,,,,,bsr,n,18.0
8595,,,,,Åuvre pour piano [sound recording] / Satie.,1705974,21746424,MUSICAL SOUND RECORDING,"Satie, Erik, 1866-1925.",Y,Y,,,,,,bsr,y,39.0
8596,,,,,Åuvre pour piano [sound recording]. Vol. II /...,1997107,28388015,MUSICAL SOUND RECORDING,"Shostakovich, DmitriiÌ Dmitrievich, 1906-1975.",Y,Y,,,,,,bsr,n,17.0
8597,,,,,Åuvres orchestrales [sound recording] = Orche...,2327284,33979600,MUSICAL SOUND RECORDING,"Poulenc, Francis, 1899-1963.",Y,Y,,,,,,bsr,n,26.0
8598,,,,,Åuvres pour deux pianos [sound recording] / C...,2091894,19632525,MUSICAL SOUND RECORDING,"Saint-SaeÌns, Camille, 1835-1921.",Y,Y,,,,,,bsr,n,38.0
8599,,,,,Åuvres pour piano [sound recording] / Debussy.,7188174,58650579,MUSICAL SOUND RECORDING,"Debussy, Claude, 1862-1918.",N,Y,,,,,,bsr,n,7.0
8600,,,,,Åuvres pour piano [sound recording] : MeÌlod...,10821261,881496641,MUSICAL SOUND RECORDING,"FaureÌ, Gabriel, 1845-1924.",Y,Y,,,,,,bsr,n,3.0
8601,,,,,Åuvres pour piano [sound recording] = Piano w...,1953170,29380629,MUSICAL SOUND RECORDING,"Chopin, FreÌdeÌric, 1810-1849.",N,Y,,,,,,bsr,n,
8602,,,,,Åuvres pour piano [sound recording] = piano w...,1948864,36784930,MUSICAL SOUND RECORDING,"Stravinsky, Igor, 1882-1971.",Y,Y,,,,,,bsr,n,8.0
8603,,,,,Åuvres symphoniques [sound recording] / Guill...,2230226,28300992,MUSICAL SOUND RECORDING,"Lekeu, Guillaume, 1870-1894.",Y,N,,,,,,bsr,y,29.0


In [8]:
df.to_csv("updated_cat_data.csv")