In [1]:
import pandas as pd
import glob
import os
import requests
import pprint
import time

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
all_files = glob.glob(r'../data/libib_exports/*.csv')

In [3]:
# df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
all_data = []
for f in all_files:
    data = pd.read_csv(f)
    collection_name = os.path.splitext(os.path.basename(f))[0].replace("_", " ").title()
    data['collection'] = collection_name
    all_data.append(data)

df = pd.concat(all_data, ignore_index = True)

In [4]:
df.head()

Unnamed: 0,item_type,title,creators,first_name,last_name,ean_isbn13,upc_isbn10,description,publisher,publish_date,group,tags,notes,price,length,number_of_discs,number_of_players,age_group,ensemble,aspect_ratio,esrb,rating,review,review_date,status,began,completed,added,copies,collection
0,book,Chicka Chicka Boom Boom (Classic Board Books),"Bill Martin Jr., John Archambault",Bill,Jr.,9781442450707,1442450703,The complete edition of the bestselling childr...,Little Simon,2012-08-28,,,,,36.0,,,,,,,,,,,,,2025-06-19,1,Woodbine
1,book,Ten Gators in the Bed,Johnette Downing,Johnette,Downing,9781941879023,1941879020,"True to her Louisiana culture, Johnette brings...",River Road Press,2015-10-01,,,,,,,,,,,,,,,,,,2025-06-19,1,Woodbine
2,book,The Bible App For Kids Storybook Bible,YouVersion in partnership with OneHope Inc.,YouVersion,Inc.,9781630490652,1630490652,The Bible App for Kids Storybook Bible is an a...,"YouVersion and OneHope, Inc./ Winters Publishi...",2015-10-13,,,,,416.0,,,,,,,,,,,,,2025-06-19,1,Woodbine
3,book,"Payback (Stephen Coonts' Deep Black, Book 4)","Stephen Coonts, Jim DeFelice",Stephen,Coonts,9780312936983,312936982,RECRUITED:A crack team of cover agents.Word is...,St. Martin's Paperbacks,2005-10-04,,,,,480.0,,,,,,,,,,,,,2025-06-19,1,Woodbine
4,book,Gem Squash Tokoloshe,Rachel Zadok,Rachel,Zadok,9780330441193,330441191,"She just sat there hardly moving, staring at t...",Pan Books,2005-09-16,,,,,320.0,,,,,,,,,,,,,2025-06-19,1,Woodbine


In [5]:
# Now loop through and call APIs for each entry
for index, row in df.iterrows():
    
    book_isbn = row["ean_isbn13"]
    print(f"-- {row['title']}, ISBN: {book_isbn}")

    # Pull subjects data from Open Library API
    try:
        response = requests.get(f"https://openlibrary.org/isbn/{book_isbn}.json")
        open_lib_details = response.json()
        
        if 'subjects' in open_lib_details:
            subjects = open_lib_details['subjects']
            cleaned_subjects = ', '.join([subj.replace(',', '') for subj in subjects])
            df.at[index, 'subjects'] = cleaned_subjects
    
        if 'physical_format' in open_lib_details:
            df.at[index, 'physical_format'] = open_lib_details['physical_format']
    
        if 'weight' in open_lib_details:
            df.at[index, 'weight'] = open_lib_details['weight']
    
        if 'revision' in open_lib_details:
            df.at[index, 'revision'] = open_lib_details['revision']
    except:
        print(f"No Open Library details found for ISBN {book_isbn}")
        
    # Pull categories data from Google Books API
    try:
        response = requests.get(f"https://www.googleapis.com/books/v1/volumes?q=isbn:{book_isbn}")
        google_details = response.json()
    
        volume_info = google_details['items'][0]['volumeInfo']
        
        if 'categories' in volume_info:
            # Remove commas now, so I can accurately split categories on comma post-join
            cleaned_categories = ', '.join([cat.replace(',', '') for cat in volume_info['categories']])
            df.at[index, 'categories'] = cleaned_categories

        if 'language' in volume_info:
            df.at[index, 'language'] = volume_info['language']

        access_info = google_details['items'][0]['accessInfo']

        if 'publicDomain' in access_info:
            df.at[index, 'public_domain'] = access_info['publicDomain']
        
    except:
        print(f"No Google details found for ISBN {book_isbn}")
    

-- Chicka Chicka Boom Boom (Classic Board Books), ISBN: 9781442450707
-- Ten Gators in the Bed, ISBN: 9781941879023
-- The Bible App For Kids Storybook Bible, ISBN: 9781630490652
-- Payback (Stephen Coonts' Deep Black, Book 4), ISBN: 9780312936983
-- Gem Squash Tokoloshe, ISBN: 9780330441193
-- Preggatinis: Mixology for the Mom-to-Be, ISBN: 9781599214542
-- I Am Not Okay with This, ISBN: 9781683963318
-- Kismet: A Kayankaya Thriller (4) (Melville International Crime), ISBN: 9781935554233
-- Without You: A Memoir of Love, Loss, and the Musical Rent, ISBN: 9780743269773
-- Caught, ISBN: 9780525951582
-- The Princess and the Goblin, ISBN: 9781591667995
-- Always and Forever, Lara Jean (To All the Boys I've Loved Before), ISBN: 9781481430494
-- Long Lost (Myron Bolitar), ISBN: 9780525951056
-- Blow Fly, ISBN: 9780399150890
-- Deadly Election: A Flavia Albia Mystery (Flavia Albia Series), ISBN: 9781250063984
-- Recreating Your Self: Building Self-Esteem Through Imaging and Self-Hypnosis, IS

In [6]:
df_clean = df.drop(
    [
        "number_of_discs",
        "number_of_players",
        "age_group",
        "ensemble",
        "aspect_ratio",
        "esrb",
        "rating",
        "review",
        "review_date",
        "status",
        "began",
        "completed"
    ],
    axis = 1
)

In [7]:
df_clean.to_csv("../data/little_libraries_books_2025_06_27.csv", index = False)

In [8]:
df_clean.head()

Unnamed: 0,item_type,title,creators,first_name,last_name,ean_isbn13,upc_isbn10,description,publisher,publish_date,group,tags,notes,price,length,added,copies,collection,physical_format,revision,categories,language,public_domain,subjects,weight
0,book,Chicka Chicka Boom Boom (Classic Board Books),"Bill Martin Jr., John Archambault",Bill,Jr.,9781442450707,1442450703,The complete edition of the bestselling childr...,Little Simon,2012-08-28,,,,,36.0,2025-06-19,1,Woodbine,Paperback,10.0,Juvenile Nonfiction,en,False,,
1,book,Ten Gators in the Bed,Johnette Downing,Johnette,Downing,9781941879023,1941879020,"True to her Louisiana culture, Johnette brings...",River Road Press,2015-10-01,,,,,,2025-06-19,1,Woodbine,,1.0,Alligators,en,False,"Children's fiction, Family",0.014
2,book,The Bible App For Kids Storybook Bible,YouVersion in partnership with OneHope Inc.,YouVersion,Inc.,9781630490652,1630490652,The Bible App for Kids Storybook Bible is an a...,"YouVersion and OneHope, Inc./ Winters Publishi...",2015-10-13,,,,,416.0,2025-06-19,1,Woodbine,hardcover,3.0,Juvenile Nonfiction,en,False,,
3,book,"Payback (Stephen Coonts' Deep Black, Book 4)","Stephen Coonts, Jim DeFelice",Stephen,Coonts,9780312936983,312936982,RECRUITED:A crack team of cover agents.Word is...,St. Martin's Paperbacks,2005-10-04,,,,,480.0,2025-06-19,1,Woodbine,,10.0,Fiction,en,False,"Intelligence officers -- Fiction, Undercover o...",
4,book,Gem Squash Tokoloshe,Rachel Zadok,Rachel,Zadok,9780330441193,330441191,"She just sat there hardly moving, staring at t...",Pan Books,2005-09-16,,,,,320.0,2025-06-19,1,Woodbine,Paperback,9.0,Apartheid,en,False,Modern fiction,7.8 ounces
