-
Notifications
You must be signed in to change notification settings - Fork 48
/
adding_wishlist_ol.py
210 lines (165 loc) · 5.85 KB
/
adding_wishlist_ol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import ast
import csv
import json
import sys
import unittest
import olclient.common as common
import requests
from olclient.openlibrary import OpenLibrary
ol = OpenLibrary()
FILE = "ia-data/new_wishlist_salman_1000.csv"
class TestWishlistAddBook(unittest.TestCase):
def test_parse_wishlist_csv_row_to_dict(self):
csv = [
"Larks in a paradise: New Zealand portraits",
"['McNeish, James', 'Friedlander, Marti']",
"eng",
"1974",
"16289249",
"0002114976",
"9780002114974",
]
expected = {
"title": "Larks in a paradise: New Zealand portraits",
"authors": ["McNeish, James", "Friedlander, Marti"],
"language": "eng",
"date": "1974",
"oclc": "16289249",
"isbn10": "0002114976",
"isbn13": "9780002114974",
}
self.assertTrue(parse_wishlist_csv_row_to_dict(csv) == expected)
def test_get_author_object(self):
author = {"author_name": "JK Rowling"}
expected = common.Author(name="JK Rowling")
author_obj = get_author_object(
author.get("author_name"),
author.get("author_birth_date"),
author.get("author_death_date"),
)
self.assertTrue(
expected.name == author_obj.name
and expected.identifiers == author_obj.identifiers
)
def test_get_bookcover(self):
csv = [
"Mom Goes to War(Light)",
"[' Irene Aparici Martin']",
"eng",
"1974",
"16289249",
"8415503202",
"9788415503200",
]
book = parse_wishlist_csv_row_to_dict(csv)
expected_url = "https://images.betterworldbooks.com/841/9788415503200.jpg"
self.assertTrue(expected_url == get_bookcover(book))
def test_empty_bookcover(self):
csv = [
"Alicia a trave s del espejo La caza del snark",
"['Lewis Carroll']",
"Spanish",
"2002",
"893562252",
"9706664998",
"9789706664990",
]
book = parse_wishlist_csv_row_to_dict(csv)
expected_url = None
self.assertTrue(expected_url == get_bookcover(book))
def process_csv(filename):
"""This function takes a csv file which was output from our whatever process created e.g. *new_wishlist_salman_1000.csv* and converts it into a python dictionary
Usage:
>>> parse_wishlist_csv_row_to_dict("foo,bar,baz,qux")
{ "author": "foo", "title": "bar", ...}
"""
with open(filename) as infile:
reader = csv.reader(infile)
book_data = list(reader)
return book_data
def parse_wishlist_csv_row_to_dict(csv):
"""This function takes a csv row which was output from our whatever process created e.g. *new_wishlist_salman_1000.csv* and converts it into a python dictionary
Usage:
>>> parse_wishlist_csv_row_to_dict("foo,bar,baz,qux")
{ "author": "foo", "title": "bar", ...}
"""
book = {
"title": csv[0],
"authors": ast.literal_eval(csv[1]),
"language": csv[2],
"date": csv[3],
"oclc": csv[4],
"isbn10": csv[5],
"isbn13": csv[6],
}
return book
import re
def get_author_object(author_name, author_birth_date=None, author_death_date=None):
"""This takes an author name which was output from our CSV row and then either retrieves an ol Object which was already created or creates a new OL Object
Usage:
>>> get_author_object('Dan Brown')
"""
if "," in author_name:
author_name = author_name.split(",")
author_name = author_name[1] + " " + author_name[0]
while True:
author_name_new = re.sub(r"\([^\(]*?\)", r"", author_name)
if author_name_new == author_name:
break
author_name = author_name_new
if author_olid := ol.Author.get_olid_by_name(author_name):
return ol.get(author_olid)
else:
return common.Author(name=author_name)
def get_bookcover(book):
url = (
"https://images.betterworldbooks.com/"
+ book.get("isbn10")[0:3]
+ "/"
+ book.get("isbn13")
+ ".jpg"
)
r = requests.get(url)
if r.status_code == 200:
return url
return None
def add_book_via_olclient(book, author_list, bookcover=None):
if len(author_list) != 0:
# Define a Book Object
new_book = common.Book(
title=book.get("title"),
authors=author_list,
publish_date=book.get("date"),
language=book.get("language"),
)
# Add metadata like ISBN 10 and ISBN 13
new_book.add_id("isbn_10", book.get("isbn10"))
new_book.add_id("isbn_13", book.get("isbn13"))
new_book.add_id("oclc", book.get("oclc"))
print(new_book)
try:
newer_book = ol.create_book(new_book)
if bookcover:
newer_book.add_bookcover(bookcover)
except Exception as e:
print("Book already exists!")
else:
print("No authors added. Work " + book.get("title") + " has been skipped.")
def process_book(book):
# make sure we've normalized the author name (e.g. first last?)
author_list = []
for author_name in book.get("authors"):
author_list.append(get_author_object(author_name))
# Bookcover search, etc
bookcover = get_bookcover(book)
# Add book to Open Library via olclient
add_book_via_olclient(book, author_list, bookcover)
if __name__ == "__main__":
# csv_row = sys.argv[1]
# unittest.main()
book_data = process_csv(FILE)
for data in book_data:
book = parse_wishlist_csv_row_to_dict(data)
process_book(book)
print("Book has been processed")
# book = parse_wishlist_csv_row_to_dict("foo,bar,baz,qux")