In [34]:
import pandas as pd
import glob
import os
import requests
import pprint
import time

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [25]:
all_files = glob.glob(r'../data/*.csv')

In [26]:
# df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
all_data = []
for f in all_files:
    data = pd.read_csv(f)
    collection_name = os.path.splitext(os.path.basename(f))[0].capitalize()
    data['collection'] = collection_name
    all_data.append(data)

df = pd.concat(all_data, ignore_index = True)

In [27]:
df.head()

Unnamed: 0,item_type,title,creators,first_name,last_name,ean_isbn13,upc_isbn10,description,publisher,publish_date,group,tags,notes,price,length,number_of_discs,number_of_players,age_group,ensemble,aspect_ratio,esrb,rating,review,review_date,status,began,completed,added,copies,collection
0,book,Lifelines,Leana Wen,Leana,Wen,9781250186232,1250186234,"From medical expert Leana Wen, MD,Lifelines is...",Henry Holt &amp; Company,2021-07-27,,,,,352.0,,,,,,,,,,,,,2025-06-13,1,Norfolk
1,book,Edith's Story: The True Story of a Young Girl'...,Edith Velmans,Edith,Velmans,9780553381108,0553381105,"In 1940, while the Germans occupied Holland, f...",Bantam,2001-01-02,,,,,256.0,,,,,,,,,,,,,2025-06-13,1,Norfolk
2,book,The Flight Attendant (Television Tie-In Edition),Chris Bohjalian,Chris,Bohjalian,9780593314005,059331400X,Now a limited series on HBO Max starring Kaley...,Knopf Doubleday Publishing Group,2020-12-08,,,,,368.0,,,,,,,,,,,,,2025-06-13,1,Norfolk
3,book,The Dalai Lama's Cat,David Michie,David,Michie,9781401940584,1401940587,Oh! How adorable! I didn&#039;t know you had a...,Hay House Visions,2012-10-01,,,,,240.0,,,,,,,,,,,,,2025-06-13,1,Maple
4,book,Joey Pigza Swallowed the Key (Joey Pigza Books),Jack Gantos,Jack,Gantos,9780064408332,64408337,Joey Pigza can&#039;t sit still. He can&#039;t...,HarperCollins,2000-04-30,,,,,160.0,,,,,,,,,,,,,2025-06-13,1,Maple


In [40]:
# Now loop through and call APIs for each entry
for index, row in df.iterrows():
    
    book_isbn = row["ean_isbn13"]
    print(f"-- {row['title']}, ISBN: {book_isbn}")

    # Pull subjects data from Open Library API
    try:
        response = requests.get(f"https://openlibrary.org/isbn/{book_isbn}.json")
        open_lib_details = response.json()
        
        if 'subjects' in open_lib_details:
            subjects = open_lib_details['subjects']
            cleaned_subjects = ', '.join([subj.replace(',', '') for subj in subjects])
            df.at[index, 'subjects'] = cleaned_subjects
    
        if 'physical_format' in open_lib_details:
            df.at[index, 'physical_format'] = open_lib_details['physical_format']
    
        if 'weight' in open_lib_details:
            df.at[index, 'weight'] = open_lib_details['weight']
    
        if 'revision' in open_lib_details:
            df.at[index, 'revision'] = open_lib_details['revision']
    except:
        print(f"No Open Library details found for ISBN {book_isbn}")
        
    # Pull categories data from Google Books API
    try:
        response = requests.get(f"https://www.googleapis.com/books/v1/volumes?q=isbn:{book_isbn}")
        google_details = response.json()
    
        volume_info = google_details['items'][0]['volumeInfo']
        
        if 'categories' in volume_info:
            # Remove commas now, so I can accurately split categories on comma post-join
            cleaned_categories = ', '.join([cat.replace(',', '') for cat in volume_info['categories']])
            df.at[index, 'categories'] = cleaned_categories

        if 'language' in volume_info:
            df.at[index, 'language'] = volume_info['language']

        access_info = google_details['items'][0]['accessInfo']

        if 'publicDomain' in access_info:
            df.at[index, 'public_domain'] = access_info['publicDomain']
        
    except:
        print(f"No Google details found for ISBN {book_isbn}")
    

-- Lifelines, ISBN: 9781250186232
-- Edith's Story: The True Story of a Young Girl's Courage and Survival During World War II, ISBN: 9780553381108
-- The Flight Attendant (Television Tie-In Edition), ISBN: 9780593314005
-- The Dalai Lama's Cat, ISBN: 9781401940584
-- Joey Pigza Swallowed the Key (Joey Pigza Books), ISBN: 9780064408332
-- Are You There God?  It's Me, Margaret, ISBN: 9780440404194
-- Hannah Arendt: For Love of the World, Second Edition, ISBN: 9780300105889
-- Heroes and Saints and Other Plays: Giving Up the Ghost, Shadow of a Man, Heroes and Saints, ISBN: 9780931122743
-- Ceremony (Contemporary American Fiction Series), ISBN: 9780140086836
-- And the Bridge Is Love, ISBN: 9780807063279
-- Sweet Theft: A Poet's Commonplace Book, ISBN: 9781619027138
-- Schindler's List, ISBN: 9780671880316
-- The Partition, ISBN: 9781636140315
-- Democracy Awakening: Notes on the State of America, ISBN: 9780593652961
-- Dogbert's Top Secret Management Handbook, ISBN: 9780887307881
-- Jack 

In [41]:
df_clean = df.drop(
    [
        "number_of_discs",
        "number_of_players",
        "age_group",
        "ensemble",
        "aspect_ratio",
        "esrb",
        "rating",
        "review",
        "review_date",
        "status",
        "began",
        "completed"
    ],
    axis = 1
)

In [45]:
df_clean.to_csv("../data/little_libraries_books_2025_06_18.csv", index = False)
df_clean.head(100)

Unnamed: 0,item_type,title,creators,first_name,last_name,ean_isbn13,upc_isbn10,description,publisher,publish_date,group,tags,notes,price,length,added,copies,collection,categories,physical_format,revision,language,public_domain,subjects,weight
0,book,Lifelines,Leana Wen,Leana,Wen,9781250186232,1250186234,"From medical expert Leana Wen, MD,Lifelines is...",Henry Holt &amp; Company,2021-07-27,,,,,352.0,2025-06-13,1,Norfolk,Social Science,hardcover,3.0,en,False,,
1,book,Edith's Story: The True Story of a Young Girl'...,Edith Velmans,Edith,Velmans,9780553381108,0553381105,"In 1940, while the Germans occupied Holland, f...",Bantam,2001-01-02,,,,,256.0,2025-06-13,1,Norfolk,Biography & Autobiography,Paperback,8.0,en,False,"Jewish girls, Personal narratives, Holocaust, ...",8.3 ounces
2,book,The Flight Attendant (Television Tie-In Edition),Chris Bohjalian,Chris,Bohjalian,9780593314005,059331400X,Now a limited series on HBO Max starring Kaley...,Knopf Doubleday Publishing Group,2020-12-08,,,,,368.0,2025-06-13,1,Norfolk,Fiction,paperback,4.0,en,False,,
3,book,The Dalai Lama's Cat,David Michie,David,Michie,9781401940584,1401940587,Oh! How adorable! I didn&#039;t know you had a...,Hay House Visions,2012-10-01,,,,,240.0,2025-06-13,1,Maple,Fiction,,6.0,en,False,"Fiction, Buddhism, Cats, Buddhist philosophy",
4,book,Joey Pigza Swallowed the Key (Joey Pigza Books),Jack Gantos,Jack,Gantos,9780064408332,64408337,Joey Pigza can&#039;t sit still. He can&#039;t...,HarperCollins,2000-04-30,,,,,160.0,2025-06-13,1,Maple,Juvenile Fiction,,23.0,en,False,Attention-deficit hyperactivity disorder -- Fi...,
5,book,"Are You There God? It's Me, Margaret",Judy Blume,Judy,Blume,9780440404194,440404193,"Are You There God? It&#039;s Me, Margaret No o...",Yearling,1972-01-01,,,,,160.0,2025-06-13,1,Maple,Juvenile Fiction,paperback,3.0,en,False,,
6,book,"Hannah Arendt: For Love of the World, Second E...",Elisabeth Young-Bruehl,Elisabeth,Young-Bruehl,9780300105889,300105886,"This highly acclaimed, prize-winning biography...",Yale University Press,2004-10-11,,,,,620.0,2025-06-13,1,Maple,Biography & Autobiography,Paperback,6.0,en,False,"Philosophers, Biography & Autobiography, Biogr...",1.9 pounds
7,book,Heroes and Saints and Other Plays: Giving Up t...,Cherrie Moraga,Cherrie,Moraga,9780931122743,931122740,Heroes and Saints &amp; Other Plays is Chicana...,West End Press,1994-12-31,,,,,149.0,2025-06-13,1,Maple,Drama,Paperback,15.0,en,False,"Drama texts: from c 1900 -, Ethnic studies, Mo...",7.8 ounces
8,book,Ceremony (Contemporary American Fiction Series),Leslie Marmon Silko,Leslie,Silko,9780140086836,140086838,"Tayo, a young Native American, has been a pris...",Penguin Books,1986-03-04,,,,,260.0,2025-06-13,1,Maple,Computers,,36.0,en,False,"World War 1939-1945 -- Veterans -- Fiction, La...",
9,book,And the Bridge Is Love,Faye Moskowitz,Faye,Moskowitz,9780807063279,807063274,,Beacon Press,1993-08-01,,,,,160.0,2025-06-13,1,Maple,Biography & Autobiography,Paperback,8.0,en,False,"Moskowitz Faye, Jewish women, Biography/Autobi...",6.9 ounces
