In [1]:
import json
import os

In [56]:
def save_json_files(json_path, folder):
    file_name = os.path.splitext(os.path.basename(json_path))[0]
    # Make it a plain strings with only letters and numbers
    file_name = ''.join(e for e in file_name if e.isalnum())
    file = json.loads(open(json_path).read())
    sub_folder = f'{folder}/{file_name}'
    os.makedirs(sub_folder, exist_ok=True)

    for key in file:
        # Create suffix with the matched keyword and the last part of the key
        suffix = f"{next((word for word in ['book', 'work', 'author', 'series'] if word in key), key)}_{key.split('.')[-1]}"
        with open(f'{sub_folder}/{file_name}_{suffix}.json', 'w') as f:
            json.dump(file[key], f, indent=4)
            print(f'Created {file_name}_{suffix}.json')


In [None]:
folder = 'book_details'
# For each json in the folder cann save_json_files
for file in os.listdir(folder):
    if file.endswith('.json'):
        save_json_files(f'{folder}/{file}', folder)
        

In [2]:
def extract_relevant_fields(json_path):
    file = json.loads(open(json_path).read())
    # Get the keys
    keys = file.keys()
    # from the key that contains 'book' get the values
    book = file[next((key for key in keys if 'Book' in key), None)]
    # from the key that contains 'work' get the values
    work = file[next((key for key in keys if 'Work' in key), None)]
    # from each key that contains 'author' get the values
    authors = [file[key] for key in keys if 'Contributor' in key]
    # from each key that contains 'serie' get the values
    series = [file[key] for key in keys if 'Series' in key]
    return book, work, authors, series

# Extract book details
def extract_book_details(book: dict):
    details = {}
    details['legacyId'] = book.get('legacyId')
    details['title'] = book.get('title')
    details['primaryContributorEdge_role'] = book.get('primaryContributorEdge').get('role')
    details['primaryContributorEdge_ref'] = book.get('primaryContributorEdge').get('node').get('__ref')
    # Extract secondaryContributorEdges and append them as a list of dictionaries in details
    details['secondaryContributorEdges'] = [
        {'role': contributor.get('role'), 'ref': contributor.get('node', {}).get('__ref')}
        for contributor in book.get('secondaryContributorEdges', [])
    ]
    details['bookSeries'] = [series.get('series', {}).get('__ref') for series in book.get('bookSeries', [])]
    details['bookGenres'] = [genre.get('genre', {}).get('name') for genre in book.get('bookGenres', [])]
    details['numPages'] = book.get('details').get('numPages')
    details['publicationTime'] = book.get('details').get('publicationTime')
    details['publisher'] = book.get('details').get('publisher')
    details['language'] = book.get('details').get('language').get('name')
    details['work_ref'] = book.get('work').get('__ref')
    return details

# Extract work details
def extract_work_details(work: dict):
    details = {}
    details['legacyId'] = work.get('legacyId')
    details['originalTitle'] = work.get('details').get('originalTitle')
    details['characters'] = [character.get('name') for character in work.get('details').get('characters', [])]
    details['editions_url'] = work.get('editions').get('webUrl')
    return details

# Extract author details
def extract_author_details(authors: list):
    details = []
    for author in authors:
        author_details = {}
        author_details['legacyId'] = author.get('legacyId')
        author_details['name'] = author.get('name')
        author_details['description'] = author.get('description')
        author_details['webUrl'] = author.get('webUrl')
        details.append(author_details)
    return details

# Extract series details
def extract_series_details(series: list):
    details = []
    for serie in series:
        serie_details = {}
        serie_details['title'] = serie.get('title')
        details.append(serie_details)
    return details

# Extract all the details
def extract_all_details(json_path):
    book, work, authors, series = extract_relevant_fields(json_path)
    return extract_book_details(book), extract_work_details(work), extract_author_details(authors), extract_series_details(series)

# Extract all the details from the json
book_details, work_details, author_details, series_details = extract_all_details('book_details/The_Flame_Bearer_(The_Saxon_Stories,_#10).json')

print('Book details:', book_details)

Book details: {'legacyId': 28949218, 'title': 'The Flame Bearer', 'primaryContributorEdge_role': 'Author', 'primaryContributorEdge_ref': 'Contributor:kca://author/amzn1.gr.author.v1.wlwi3fZJNsgRxAOMSIOIbw', 'secondaryContributorEdges': [], 'bookSeries': ['Series:kca://series/amzn1.gr.series.v1._wedZzHR7zE0Lx56fNmudA'], 'bookGenres': ['Historical Fiction', 'Fiction', 'Historical', 'Audiobook', 'Fantasy', 'Medieval', 'Adventure', 'War', 'British Literature', 'Novels'], 'numPages': 284, 'publicationTime': 1475737200000, 'publisher': 'HARPER COLLINS', 'language': 'English', 'work_ref': 'Work:kca://work/amzn1.gr.work.v1.Qs-mhpnPXcpKcbVxST-rSA'}


In [5]:
# Merge the details in a single dictionary (except authors_details)
merged_details = {**book_details, **work_details, 'series': series_details}
print('Merged details:', merged_details)

import csv
# Save the merged details in a csv file
def save_details_to_csv(details, csv_file):
    with open(csv_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=details.keys())
        writer.writeheader()
        writer.writerow(details)

save_details_to_csv(merged_details, 'merged_details.csv')

# Author details is a list of dicts with keys 'legacyId', 'name', 'description', convert each element in a row
def save_author_details_to_csv(author_details, csv_file):
    with open(csv_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=author_details[0].keys())
        writer.writeheader()
        writer.writerows(author_details)

save_author_details_to_csv(author_details, 'author_details.csv')



Merged details: {'legacyId': 49172708, 'title': 'The Flame Bearer', 'primaryContributorEdge_role': 'Author', 'primaryContributorEdge_ref': 'Contributor:kca://author/amzn1.gr.author.v1.wlwi3fZJNsgRxAOMSIOIbw', 'secondaryContributorEdges': [], 'bookSeries': ['Series:kca://series/amzn1.gr.series.v1._wedZzHR7zE0Lx56fNmudA'], 'bookGenres': ['Historical Fiction', 'Fiction', 'Historical', 'Audiobook', 'Fantasy', 'Medieval', 'Adventure', 'War', 'British Literature', 'Novels'], 'numPages': 284, 'publicationTime': 1475737200000, 'publisher': 'HARPER COLLINS', 'language': 'English', 'work_ref': 'Work:kca://work/amzn1.gr.work.v1.Qs-mhpnPXcpKcbVxST-rSA', 'originalTitle': 'The Flame Bearer', 'characters': ['Edward the Elder', 'Uhtred of Bebbanburg', 'Constantine II, King of Alba', 'Aethelflaed'], 'editions_url': 'https://www.goodreads.com/work/editions/49172708', 'series': [{'title': 'The Last Kingdom'}]}
