* Purpose: Script to extract  books metadata information from worldcat api results   
* License: GPLv3 (Free Software) 
* Date: Oct 15, 2018
* Additional Notes: API GET Request: http://www.worldcat.org/webservices/catalog/content/62123162?wskey={built-in-api-key}&recordSchema=info%3Asrw%2Fschema%2F1%2Fdc

In [2]:
from urllib.request import urlopen
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from geopy.geocoders import Nominatim

In [107]:
# Get info from API result xml
ns = {'dc': 'http://purl.org/dc/elements/1.1/', 'oclcterms': 'http://purl.org/oclc/terms/', 'xmlns': 'http://www.loc.gov/zing/srw/', 'diag': 'http://www.loc.gov/zing/srw/diagnostic/'}

tree = ET.parse('data/source/book_opensearch_metadata.xml')
root = tree.getroot()
records_ele = root.findall("xmlns:records", ns)[0]
record_list = records_ele.findall("xmlns:record", ns)

booK_metadata_df = pd.DataFrame()

for record in record_list:
    title = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:title", ns)[0].text
    creator = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:creator", ns)
    contributor = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:contributor", ns)
    description = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:description", ns)
    date = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:date", ns)
    book_format = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:format", ns)
    language = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:language", ns)
    publisher = record.findall("xmlns:recordData/xmlns:oclcdcs/dc:publisher", ns)
    oclc_number = record.findall("xmlns:recordData/xmlns:oclcdcs/oclcterms:recordIdentifier", ns)[0].text
    
    if len(creator) > 0:
        creator = creator[0].text
    else: 
        creator = ""

    if len(contributor) > 0:
        contributor = contributor[0].text
    else: 
        contributor = ""

    if len(description) > 0:
        description = description[0].text
    else: 
        description = ""

    if len(date) > 0:
        date = date[0].text
    else:
        date = ""

    if len(book_format) > 0:
        book_format = book_format[0].text
    else:
        book_format = ""
        
    if len(language) > 0:
        language = language[0].text
    else:
        language = "" 
        
    if len(publisher) > 0:
        publisher = publisher[0].text
    else:
        publisher = ""  

    booK_metadata_df = booK_metadata_df.append({'oclc_number':oclc_number, 'title': title, 'creator': creator, 'contributor': contributor, 'description': description, 'date': date, 'book_format': book_format, 'language': language, 'publisher': publisher}, ignore_index=True)
    


In [108]:
booK_metadata_df.head(10)

Unnamed: 0,book_format,contributor,creator,date,description,language,oclc_number,publisher,title
0,"x, 253 Seiten.",,"Münch, Richard.",1994,,eng,311887493,Nelson-Hall,"Sociological theory / 1, From the 1850s to the..."
1,"xxx, 612 pages ; 24 cm","Sydie, R. A. (Rosalind Ann), 1940-","Adams, Bert N.",2002,"Originally published: Thousand Oaks, Calif. : ...",eng,62123162,Vistaar,Sociological theory
2,1 online resource : illustrations,"Wallace, Walter L.",,2017,First published 1969 by Transaction Publishers.,eng,995616403,Routledge,Sociological theory
3,"xviii, 603 pages ; 23 cm","Coser, Lewis A., 1913-2003.",,"1989, ©1982",5th ed.,eng,21769128,Waveland Press,Sociological theory : a book of readings
4,1 online resource,,"Parsons, Talcott.",2014,Title from resource description page (Recorded...,eng,893100430,Free Press,Essays in sociological theory
5,"XXV, 529 Seiten : Diagramme ; 24 cm",,"Turner, Jonathan H.,",[2004],"7. ed., [Nachdr.].",eng,549028728,Wadsworth Thomson,The structure of sociological theory
6,1 online resource (573 pages).,,"Martindale, Don.",2013,First Published in 1998. Routledge is an impri...,eng,863823284,Taylor and Francis,The Nature and Types of Sociological Theory.
7,"ix, 194 pages.",,"Rex, John.",2010,,eng,778811797,Routledge,Key problems of sociological theory
8,"xvi, 502 pages : illustrations ; 26 cm","Beeghley, Leonard.","Turner, Jonathan H.",©2012,7th ed.,eng,2011031071,Pine Forge Press,The emergence of sociological theory
9,"xxi, 569 Seiten 26 cm","Stepnisky, Jeffrey,","Ritzer, George,",,,eng,2016036790,,Classical sociological theory


In [109]:
booK_metadata_df.to_csv("data/extracted/book_opensearch_metadata.csv", header=True, columns=["oclc_number","title","creator","contributor","description","date","book_format", "language", "publisher"])