From de47ebbf5f95510cad580cb72bf6a08c94cae022 Mon Sep 17 00:00:00 2001 From: dellsystem Date: Wed, 26 Apr 2023 13:51:07 -0700 Subject: [PATCH] Switch to scraping Goodreads rather than using API * management commands converted into one view, sync * updated tests * got rid of all the views related to using the goodreads api * got rid of redis dependency Closes #170 and probably some others too --- src/bookmarker/tests/test_authors.py | 96 ------ src/bookmarker/tests/test_books.py | 128 ------- src/bookmarker/tests/test_goodreadstools.py | 60 ++-- src/bookmarker/tests/test_redistools.py | 94 ------ src/bookmarker/urls.py | 5 - src/bookmarker/views.py | 313 ++++-------------- src/books/goodreadstools.py | 233 +++++++++---- .../management/commands/import_goodreads.py | 127 ------- .../management/commands/scrape_goodreads.py | 70 ---- .../management/commands/sync_goodreads.py | 61 ---- src/books/models.py | 110 +----- src/books/redistools.py | 206 ------------ src/books/tests.py | 3 - src/templates/add_author.html | 15 + src/templates/add_author_field.html | 13 - src/templates/add_book.html | 44 +++ src/templates/add_book_field.html | 13 - src/templates/author_form.html | 2 +- src/templates/book_form.html | 9 +- src/templates/keyboard.html | 39 --- src/templates/manual_import.html | 19 -- src/templates/menu.html | 3 +- src/templates/sync_goodreads.html | 187 +++++------ src/templates/view_book.html | 4 - 24 files changed, 433 insertions(+), 1421 deletions(-) delete mode 100644 src/bookmarker/tests/test_authors.py delete mode 100644 src/bookmarker/tests/test_books.py delete mode 100644 src/bookmarker/tests/test_redistools.py delete mode 100644 src/books/management/commands/import_goodreads.py delete mode 100644 src/books/management/commands/scrape_goodreads.py delete mode 100644 src/books/management/commands/sync_goodreads.py delete mode 100644 src/books/redistools.py delete mode 100644 src/books/tests.py delete mode 100644 src/templates/add_author_field.html delete mode 100644 src/templates/add_book_field.html delete mode 100644 src/templates/manual_import.html diff --git a/src/bookmarker/tests/test_authors.py b/src/bookmarker/tests/test_authors.py deleted file mode 100644 index 39f3efb..0000000 --- a/src/bookmarker/tests/test_authors.py +++ /dev/null @@ -1,96 +0,0 @@ -from django.test import TestCase - -from books.models import Author, GoodreadsAuthor - - -class TestCreateFromGoodreads(TestCase): - def setUp(self): - Author.objects.create( - name='John Smith', - link='http://johnsmith.com', - slug='john-smith', - ) - - def test_new_author(self): - self.assertEqual(Author.objects.count(), 1) - self.assertEqual(GoodreadsAuthor.objects.count(), 0) - author = Author.objects.create_from_goodreads({ - 'id': '123', - 'name': 'Adam Smith', - 'link': 'link', - }) - self.assertEqual(Author.objects.count(), 2) - self.assertEqual(GoodreadsAuthor.objects.count(), 1) - - self.assertEqual(author.name, 'Adam Smith') - self.assertEqual(author.slug, 'adam-smith') - self.assertEqual(author.link, 'link') - - gr_author = author.goodreadsauthor_set.first() - self.assertEqual(gr_author.goodreads_id, '123') - self.assertEqual(gr_author.goodreads_link, 'link') - self.assertEqual(gr_author.author, author) - - def test_messy_author(self): - """e.g., api results for gr author 7020416""" - self.assertEqual(Author.objects.count(), 1) - self.assertEqual(GoodreadsAuthor.objects.count(), 0) - author = Author.objects.create_from_goodreads({ - 'id': '7020416', - 'name': 'Tony McMahon', - 'link': 'link', - }) - self.assertEqual(Author.objects.count(), 2) - self.assertEqual(GoodreadsAuthor.objects.count(), 1) - - self.assertEqual(author.name, 'Tony McMahon') - self.assertEqual(author.slug, 'tony-mcmahon') - self.assertEqual(author.link, 'link') - - gr_author = author.goodreadsauthor_set.first() - self.assertEqual(gr_author.goodreads_id, '7020416') - self.assertEqual(gr_author.goodreads_link, 'link') - self.assertEqual(gr_author.author, author) - - def test_same_slug(self): - """Add more authors with the same slug.""" - self.assertEqual(Author.objects.count(), 1) - self.assertEqual(GoodreadsAuthor.objects.count(), 0) - author = Author.objects.create_from_goodreads({ - 'id': '999', - 'name': 'John Smith', - 'link': 'link2', - }) - self.assertEqual(Author.objects.count(), 2) - self.assertEqual(GoodreadsAuthor.objects.count(), 1) - - self.assertEqual(author.name, 'John Smith 2') - self.assertEqual(author.slug, 'john-smith-2') - self.assertEqual(author.link, 'link2') - - gr_author = author.goodreadsauthor_set.first() - self.assertEqual(gr_author.goodreads_id, '999') - self.assertEqual(gr_author.goodreads_link, 'link2') - self.assertEqual(gr_author.author, author) - - # Now create a new author. - other_author = Author.objects.create_from_goodreads({ - 'id': '100', - 'name': 'John Smith', - 'link': 'link3', - }) - self.assertEqual(Author.objects.count(), 3) - self.assertEqual(GoodreadsAuthor.objects.count(), 2) - - self.assertEqual(other_author.name, 'John Smith 3') - self.assertEqual(other_author.slug, 'john-smith-3') - self.assertEqual(other_author.link, 'link3') - - other_gr_author = other_author.goodreadsauthor_set.first() - self.assertEqual(other_gr_author.goodreads_id, '100') - self.assertEqual(other_gr_author.goodreads_link, 'link3') - self.assertEqual(other_gr_author.author, other_author) - - def tearDown(self): - Author.objects.all().delete() - GoodreadsAuthor.objects.all().delete() diff --git a/src/bookmarker/tests/test_books.py b/src/bookmarker/tests/test_books.py deleted file mode 100644 index 52a913c..0000000 --- a/src/bookmarker/tests/test_books.py +++ /dev/null @@ -1,128 +0,0 @@ -from django.test import TestCase - -from books.models import Book, BookDetails - - -class TestCreateFromGoodreads(TestCase): - def setUp(self): - self.book = Book.objects.create_from_goodreads({ - 'id': '123', - 'title': 'Book: Something', - 'link': 'link', - 'format': 'Hardcover', - 'year': '2009', - 'isbn13': '9783531168050', - 'publisher': 'Publisher', - 'num_pages': '335', - 'image_url': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SX98_.jpg', - }) - - def test_long_title_with_colon(self): - book2 = Book.objects.create_from_goodreads({ - 'id': '2', - 'title': 'Portrait of the Manager as a Young Author: On Storytelling, Business, and Literature', - 'link': 'link2', - 'format': 'Hardcover', - 'year': '2009', - 'isbn13': '9783531168051', - 'publisher': 'Publisher', - 'num_pages': '100', - 'image_url': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SX98_.jpg', - }) - self.assertEqual(book2.slug, "portrait-of-the-manager-as-a-young-author") - - def test_long_title_without_colon(self): - book2 = Book.objects.create_from_goodreads({ - 'id': '2', - 'title': 'Portrait of the Manager as a Young Author, On Storytelling, Business, and Literature', - 'link': 'link2', - 'format': 'Hardcover', - 'year': '2009', - 'isbn13': '9783531168051', - 'publisher': 'Publisher', - 'num_pages': '100', - 'image_url': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SX98_.jpg', - }) - self.assertEqual(book2.slug, "portrait-of-the-manager-as-a-young-author-on") - - def test_long_title_without_spaces(self): - book2 = Book.objects.create_from_goodreads({ - 'id': '2', - 'title': 'Portrait of the Manager asayoungauthoronstorytellingbusinessandliterature', - 'link': 'link2', - 'format': 'Paperback', - 'year': '2009', - 'isbn13': '9783531168051', - 'publisher': 'Publisher', - 'num_pages': '100', - 'image_url': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SX98_.jpg', - }) - self.assertEqual(book2.slug, "portrait-of-the-manager") - - def test_long_title_without_spaces_at_all(self): - # Unlikely but you never know - book2 = Book.objects.create_from_goodreads({ - 'id': '2', - 'title': 'Portraitofthemanagerasayoungauthoronstorytellingbusinessandliterature', - 'link': 'link2', - 'format': 'Paperback', - 'year': '2009', - 'isbn13': '9783531168051', - 'publisher': 'Publisher', - 'num_pages': '100', - 'image_url': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SX98_.jpg', - }) - self.assertEqual(book2.slug, "portraitofthemanagerasayoungauthoronstorytellingbu") - - def test_initial_book(self): - self.assertEqual(Book.objects.count(), 1) - self.assertEqual(BookDetails.objects.count(), 1) - - self.assertEqual(self.book.title, 'Book: Something') - self.assertEqual(self.book.slug, 'book') - self.assertEqual(self.book.image_url, 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1356348356l/14700203._SY475_.jpg') - - self.assertEqual(self.book.details.year, 2009) - self.assertEqual(self.book.details.num_pages, 335) - self.assertEqual(self.book.details.goodreads_id, '123') - - def test_new_book_same_slug(self): - new_book = Book.objects.create_from_goodreads({ - 'id': '999', - 'title': 'Book: Something Else', - 'link': 'link', - 'format': 'Hardcover', - 'year': '2019', - 'isbn13': '9783531168051', - 'publisher': 'Publisher 2', - 'num_pages': '500', - 'image_url': '', - }) - - self.assertEqual(new_book.title, 'Book: Something Else') - self.assertEqual(new_book.slug, 'book-2') - self.assertEqual(new_book.image_url, '') - - self.assertEqual(new_book.details.year, 2019) - self.assertEqual(new_book.details.num_pages, 500) - self.assertEqual(new_book.details.goodreads_id, '999') - - # Now create a whole new book with again the same slug. - third_book = Book.objects.create_from_goodreads({ - 'id': '100', - 'title': 'Book: Once More', - 'link': 'link2', - 'year': '2020', - 'isbn13': '9783531168052', - 'publisher': 'Publisher 3', - 'num_pages': '1000', - 'image_url': '', - }) - - self.assertEqual(third_book.title, 'Book: Once More') - self.assertEqual(third_book.slug, 'book-3') - self.assertEqual(third_book.image_url, '') - - self.assertEqual(third_book.details.year, 2020) - self.assertEqual(third_book.details.num_pages, 1000) - self.assertEqual(third_book.details.goodreads_id, '100') diff --git a/src/bookmarker/tests/test_goodreadstools.py b/src/bookmarker/tests/test_goodreadstools.py index 515d64a..e206c8d 100644 --- a/src/bookmarker/tests/test_goodreadstools.py +++ b/src/bookmarker/tests/test_goodreadstools.py @@ -1,32 +1,38 @@ -from unittest.mock import patch, Mock +import datetime + from django.test import TestCase from books import goodreadstools -class TestGoodreadstools(TestCase): - def setUp(self): - self.client = Mock() - self.redistools = Mock() - - def test_get_author_new(self): - with patch.multiple( - 'books.goodreadstools', - _client=self.client, - redistools=self.redistools - ): - self.redistools.get_author.return_value = None - self.client.find_author.return_value = None - self.assertEqual(None, goodreadstools.get_author_by_name('new')) - self.client.find_author.assert_called_once_with('new') - - def test_get_books_new(self): - with patch.multiple( - 'books.goodreadstools', - _client=self.client, - redistools=self.redistools - ): - self.redistools.get_book.return_value = None - self.client.search_books.return_value = [] - self.assertEqual([], goodreadstools.get_books_by_title('new')) - self.client.search_books.assert_called_once_with('new') +class TestParseNumPages(TestCase): + def test_invalid_input(self): + self.assertEqual(None, goodreadstools._parse_num_pages('blah')) + + def test_valid_input(self): + self.assertEqual(123, goodreadstools._parse_num_pages('123\np')) + + +class TestParseId(TestCase): + def test_with_hyphen(self): + self.assertEqual( + '37941942', + goodreadstools._parse_id('/book/show/37941942-the-flame') + ) + + def test_without_hyphen(self): + self.assertEqual( + '1188779', + goodreadstools._parse_id('/book/show/1188779.Money') + ) + + +class TestParseDate(TestCase): + def test_invalid_input(self): + self.assertEqual(None, goodreadstools._parse_date('')) + + def test_valid_input(self): + self.assertEqual( + datetime.date(2023, 2, 7), + goodreadstools._parse_date('Feb 07, 2023') + ) diff --git a/src/bookmarker/tests/test_redistools.py b/src/bookmarker/tests/test_redistools.py deleted file mode 100644 index 634f248..0000000 --- a/src/bookmarker/tests/test_redistools.py +++ /dev/null @@ -1,94 +0,0 @@ -from unittest.mock import patch, Mock -from django.test import TestCase -import json - -from books import redistools - - -class TestRedistools(TestCase): - def setUp(self): - self.client = Mock() - - def test_get_author_new(self): - with patch('books.redistools._client', self.client): - self.client.get.return_value = None - self.assertEquals(None, redistools.get_author('new')) - self.client.get.assert_called_once_with('author:new') - - def test_get_author_existing(self): - with patch('books.redistools._client', self.client): - self.client.get.return_value = '{"foo": "bar"}' - self.assertEquals( - {'foo': 'bar'}, - redistools.get_author('existing') - ) - self.client.get.assert_called_once_with('author:existing') - - def test_save_author(self): - with patch('books.redistools._client', self.client): - author_data = Mock( - gid='1234', - link='link', - books=[Mock(title='Title 1'), Mock(title='Title 2')], - ) - author_data.name = 'John Doe' # can't be set as a kwarg on Mock - redistools.save_author(author_data) - self.client.set.assert_called_once_with('author:1234', - json.dumps({ - 'id': '1234', - 'name': 'John Doe', - 'link': 'link', - 'titles': 'Title 1, Title 2' - }) - ) - - def test_get_book_none(self): - with patch('books.redistools._client', self.client): - self.client.get.return_value = None - self.assertEquals(None, redistools.get_book('new')) - self.client.get.assert_called_once_with('book:new') - - def test_get_book_existing(self): - with patch('books.redistools._client', self.client): - self.client.get.return_value = '{"foo": "bar"}' - self.assertEquals( - {'foo': 'bar'}, - redistools.get_book('existing') - ) - self.client.get.assert_called_once_with('book:existing') - - def test_save_book(self): - with patch('books.redistools._client', self.client): - mock_author = Mock(gid='999', link='link', books=[]) - mock_author.name = 'Naomi Klein' - book_data = Mock( - title='No Logo', - gid='1234', - link='link', - format='Hardcover', - publication_date=(None, None, 1999), # happens sometimes - isbn13='9780312421434', - publisher='Random House', - num_pages=None, # also happens sometimes - image_url='image', - authors=[mock_author], - ) - book = redistools.save_book(book_data) - self.assertEquals(2, self.client.set.call_count) - self.assertEquals({ - 'id': '1234', - 'title': 'No Logo', - 'link': 'link', - 'format': 'Hardcover', - 'year': 1999, - 'isbn13': '9780312421434', - 'publisher': 'Random House', - 'num_pages': None, - 'image_url': 'image', - 'authors': [{ - 'id': '999', - 'name': 'Naomi Klein', - 'link': 'link', - 'titles': 'No Logo', - }] - }, book) diff --git a/src/bookmarker/urls.py b/src/bookmarker/urls.py index 9b609f7..d73f39e 100644 --- a/src/bookmarker/urls.py +++ b/src/bookmarker/urls.py @@ -26,9 +26,7 @@ path('login', bookmarker.views.LoginView.as_view(), name='login'), re_path(r'^logout$', bookmarker.views.LogoutView.as_view(), name='logout'), re_path(r'^addbook$', bookmarker.views.add_book, name='add_book'), - re_path(r'^addbook/id$', bookmarker.views.add_book_from_id, name='add_book_from_id'), re_path(r'^addauthor$', bookmarker.views.add_author, name='add_author'), - re_path(r'^addauthor/id$', bookmarker.views.add_author_from_id, name='add_author_from_id'), re_path(r'^books/(?P\w+)$', bookmarker.views.view_books, name='view_books'), re_path(r'^authors$', bookmarker.views.view_all_authors, name='view_all_authors'), re_path(r'^terms$', bookmarker.views.view_all_terms, name='view_all_terms'), @@ -87,10 +85,7 @@ re_path(r'^faves$', bookmarker.views.view_faves, name='view_faves'), re_path(r'^search$', bookmarker.views.search, name='search'), re_path(r'^sync$', bookmarker.views.sync_goodreads, name='sync_goodreads'), - re_path(r'^import$', bookmarker.views.manual_import, name='manual_import'), re_path(r'^search.json$', bookmarker.views.search_json, name='search_json'), - re_path(r'^author_search.json$', bookmarker.views.author_search_json, name='author_search_json'), - re_path(r'^book_search.json$', bookmarker.views.book_search_json, name='book_search_json'), ] diff --git a/src/bookmarker/views.py b/src/bookmarker/views.py index c131438..88ff4bd 100644 --- a/src/bookmarker/views.py +++ b/src/bookmarker/views.py @@ -1,7 +1,6 @@ import collections import datetime import random -from dateutil import parser from django.contrib import messages from django.contrib.auth import views as auth_views @@ -13,10 +12,11 @@ from django.shortcuts import render, redirect, get_object_or_404 from django.urls import reverse from django.utils.http import urlencode +from django.utils.text import slugify from django.views.decorators.http import require_POST from activity.models import Action, CATEGORIES, FILTER_CATEGORIES -from books import goodreadstools, redistools, highlighter +from books import goodreadstools, highlighter from books.forms import NoteForm, SectionForm, ArtefactAuthorForm, BookForm, \ BookDetailsForm, AuthorForm, TagForm, \ MultipleSectionsForm @@ -569,43 +569,6 @@ def add_term(request, slug): return render(request, 'add_term.html', context) -@require_POST -@login_required -def add_author_from_id(request): - query = request.POST.get('q') - - # If the query parameter is missing, show the form. - if not query: - return redirect('add_author') - - # If there's a : in the goodreads ID, then it's being sent from the keyboard shortcut modal. - if ':' in query: - # Extract the goodreads ID from the posted parameter. - goodreads_id = query.split(':')[0] - else: - goodreads_id = query - - # See if author already exists. - gr_author = GoodreadsAuthor.objects.filter(goodreads_id=goodreads_id) - if gr_author.exists(): - messages.error(request, 'Author for ID {} already exists'.format(goodreads_id)) - return redirect(gr_author.first().author) - - # Author doesn't exist - fetch from goodreads api - author_data = goodreadstools.get_author_by_id(goodreads_id) - author = Author.objects.create_from_goodreads(author_data) - - Action.objects.create( - category='author', - primary_id=author.pk, - verb='added', - details=author.name, - ) - messages.success(request, 'Added author: {}'.format(author.name)) - - return redirect(author) - - @login_required def add_author(request): if request.POST.get('submit'): @@ -614,6 +577,15 @@ def add_author(request): if author_form.is_valid(): author = author_form.save() + # If the link is a goodreads link, automatically add the + # goodreads_author object + goodreads_id = goodreadstools.get_author_id(author.link) + if goodreads_id is not None: + author.goodreadsauthor_set.create( + goodreads_id=goodreads_id, + goodreads_link=author.link + ) + Action.objects.create( category='author', primary_id=author.pk, @@ -628,60 +600,29 @@ def add_author(request): 'Error creating author' ) else: - author_form = AuthorForm() + author_name = request.GET.get('name') + author_form = AuthorForm(initial={ + 'name': author_name, + 'link': request.GET.get('link'), + 'slug': slugify(author_name) if author_name else '', + }) + + # Check if the author in the QP already exists in our database + if author_name: + existing_authors = Author.objects.filter( + name__icontains=author_name.lower() + ) + else: + existing_authors = None context = { 'author_form': author_form, + 'existing_authors': existing_authors, } return render(request, 'add_author.html', context) -@require_POST -@login_required -def add_book_from_id(request): - query = request.POST.get('q') - - # If the query parameter is missing, show the form. - if not query: - return redirect('add_book') - - # If there's a : in the goodreads ID, then it's being sent from the keyboard shortcut modal. - if ':' in query: - # Extract the goodreads ID from the posted parameter. - goodreads_id = query.split(':')[0] - else: - goodreads_id = query - - # Check if book already exists. - book_qs = Book.objects.filter(details__goodreads_id=goodreads_id) - if book_qs.exists(): - messages.error(request, 'Book for ID {} already exists'.format(goodreads_id)) - return redirect(book_qs.first()) - - # Book doesn't exist, so get from goodreads api (or redis cache, if it exists). - book_data = goodreadstools.get_book_by_id(goodreads_id) - book = Book.objects.create_from_goodreads(book_data) - - messages.success(request, 'Added book: {}'.format(book.title)) - - for author_data in book_data['authors']: - gr_author = GoodreadsAuthor.objects.filter( - goodreads_id=author_data['id'] - ) - if gr_author.exists(): - author = gr_author.first().author - else: - # Author doesn't exist yet - must create. - author = Author.objects.create_from_goodreads(author_data) - messages.success(request, 'Added author: {}'.format(author.name)) - - book.details.authors.add(author) - book.details.default_authors.add(author) - - return redirect(book) - - @login_required def add_book(request): if request.POST.get('submit'): @@ -724,13 +665,43 @@ def add_book(request): 'Error with book form' ) else: - # Just setting the completed read field to true by default for now - book_form = BookForm(initial={'completed_read': True}) - details_form = BookDetailsForm() + title = request.GET.get('title') + book_form = BookForm(initial={ + # setting the completed read field to true by default for now + 'completed_read': True, + # filling in query params if provided + 'title': title, + 'image_url': request.GET.get('image_url'), + 'slug': slugify(title) if title else '', + }) + details_form = BookDetailsForm(initial={ + 'goodreads_id': request.GET.get('id'), + 'link': request.GET.get('link'), + 'isbn': request.GET.get('isbn'), + 'year': request.GET.get('year'), + 'format': request.GET.get('format'), + 'num_pages': request.GET.get('num_pages'), + 'start_date': request.GET.get('start_date'), + 'end_date': request.GET.get('end_date'), + 'verified': True, + }) + # Check if the book in the QP already exists in our database + if title: + existing_books = Book.objects.filter( + title__icontains=title.lower() + ) + else: + existing_books = None context = { 'book_form': book_form, 'details_form': details_form, + # this is just for pre-populating forms with query param data + 'author_name': request.GET.get('author_name'), + 'author_url': request.GET.get('author_url'), + 'author_slug': request.GET.get('author_slug'), + 'goodreads_url': request.GET.get('link'), + 'existing_books': existing_books, } return render(request, 'add_book.html', context) @@ -1293,98 +1264,6 @@ def view_section(request, section_id): return render(request, 'view_section.html', context) -# todo: this and author search should be part of a goodreads-connected app which caches intermediate results -def book_search_json(request): - query = request.GET.get('q') - results = [] - - # Only search goodreads if there's no one with this name in our db - existing_books = Book.objects.filter(title__icontains=query) - if existing_books.exists(): - for book in existing_books: - authors = [] - if book.details: - for author in book.details.authors.all(): - authors.append(author.name) - - authors.sort() - results.append({ - 'title': book.title, - 'description': ', '.join(authors), - 'image': '/static/img/bookmarker.png', - 'url': book.get_absolute_url(), - }) - else: - # Use the goodreads API - for book in goodreadstools.get_books_by_title(query): - description = [] - if book['format']: - description.append(book['format']) - if book['publisher']: - description.append(book['publisher']) - if book['num_pages']: - description.append(book['num_pages']) - - title = '{}: {}'.format(book['id'], book['title']) - authors = ', '.join(a['name']for a in book['authors']) - results.append({ - 'title': title, - 'price': authors, - 'description': ' / '.join(description), - 'image': '/static/img/goodreads.png', - }) - - return JsonResponse({ - 'results': results - }) - - -def author_search_json(request): - query = request.GET.get('q') - results = [] - - # Put a ! at the end of the query to allow saving. I'm sorry - should_save = query.endswith('!') - query = query.strip('!') - - # Only search goodreads if there's no one with this name in our db - existing_authors = Author.objects.filter(name__icontains=query) - if existing_authors.exists(): - # todo: urgh - for author in existing_authors: - titles = [] - books = set(author.books.values_list('book__title', flat=True)) - for book in books: - if book: - titles.append(book) - - publications = set(author.sections.values_list('book__title', flat=True)) - for publication in publications - books: - if publication: - titles.append(publication) - - titles.sort() - results.append({ - 'title': author.name, - 'description': ', '.join(titles[:10]), - 'image': '/static/img/bookmarker.png', - 'url': author.get_absolute_url(), - }) - else: - # Use the goodreads API - author = goodreadstools.get_author_by_name(query) - title = '{}: {}'.format(author['id'], author['name']) - results.append({ - 'title': title, - 'description': author['titles'], - 'image': '/static/img/goodreads.png', - }) - - return JsonResponse({ - 'results': results - }) - - def within_book_search_json(request, book_id): """Suggest notes/sections/terms with that keyword""" query = request.GET.get('q') @@ -1894,74 +1773,20 @@ def add_tag(request): return render(request, 'add_tag.html', context) -@login_required -def manual_import(request): - context = {} - return render(request, 'manual_import.html', context) - - @login_required def sync_goodreads(request): - if request.method == 'POST': - goodreads_id = request.POST.get('id') - action = request.POST.get('action') - review = redistools.get_review(goodreads_id) - - if review is None: - messages.error(request, "Invalid goodreads ID") - else: - # Delete it so it doesn't show up again. - redistools.ignore_review(goodreads_id) - if action == 'ignore': - messages.success(request, "Successfully ignored {}".format(review['book']['title'])) - else: - # If there is one existing book, use that one instead. - existing_books = Book.objects.filter(title=review['book']['title']) - if existing_books.count() == 1: - book = existing_books.first() - messages.success(request, 'Updated existing book: {}'.format(book.title)) - else: - book = Book.objects.create_from_goodreads(review['book']) - messages.success(request, 'Added book: {}'.format(book.title)) - - book.details.shelves = review['shelves'] - book.details.review = review['review'] - book.details.rating = review['rating'] - book.details.start_date = parser.parse( - review['start_date'] - ).date() - book.details.end_date = parser.parse( - review['end_date'] - ).date() - book.details.save() - - for author_data in review['book']['authors']: - gr_author = GoodreadsAuthor.objects.filter( - goodreads_id=author_data['id'] - ) - if gr_author.exists(): - author = gr_author.first().author - else: - # Author doesn't exist yet - must create. - author = Author.objects.create_from_goodreads(author_data) - messages.success(request, 'Added author: {}'.format(author.name)) - - book.details.authors.add(author) - book.details.default_authors.add(author) - - review = redistools.get_random_review() - if review: - # check if we already have a book with this title - existing_books = Book.objects.filter( - title=review['book']['title'] - ) - else: - existing_books = Book.objects.none() + # Scrape the goodreads website and show the results (paginated) + try: + page = int(request.GET.get('page', 1)) + except ValueError: + page = 1 + books = goodreadstools.get_books(page) context = { - 'review': review, - 'existing_books': existing_books, - 'total': redistools.count_reviews(), + 'books': books, + 'page': page, + 'previous_page': page - 1, + 'next_page': page + 1, } return render(request, 'sync_goodreads.html', context) diff --git a/src/books/goodreadstools.py b/src/books/goodreadstools.py index 35ec7ff..ea7bdf3 100644 --- a/src/books/goodreadstools.py +++ b/src/books/goodreadstools.py @@ -1,69 +1,176 @@ -import os +import bs4 from datetime import datetime +import requests +import urllib.parse + +from books.models import BookDetails, GoodreadsAuthor + + +def _parse_num_pages(field): + """Expect input like '123\n p'""" + if field: + text = field.strip() + number = text.splitlines()[0].strip() + if number: + try: + return int(number) + except ValueError: + # Just return None, it's fine, whatever + pass + +def _parse_id(field): + """Input format: /book/show/12345.optionaltitle-morestuff""" + return field.split('/')[-1].split('-')[0].split('.')[0] + +def _parse_date(field): + if field: + text = field.strip() + if ',' in text: + format_string = '%b %d, %Y' + else: + format_string = '%b %Y' + return datetime.strptime( + text, + format_string + ).date() + + +USER_ID = '60292716-wendy-liu' +BASE_URL = "https://www.goodreads.com" +READ_URL = BASE_URL + "/review/list/{}?shelf=read&per_page=100&sort=date_updated".format(USER_ID) +def get_books(page): + url = '{}&page={}'.format(READ_URL, page) + response = requests.get(url) + response.raise_for_status() + soup = bs4.BeautifulSoup(response.content.decode(), "html.parser") + rows = soup.select("table#books tbody tr") -from goodreads import client - -from books import redistools - - -_client = client.GoodreadsClient( - os.environ.get('GOODREADS_KEY'), - os.environ.get('GOODREADS_SECRET'), -) -_client.authenticate( - access_token=os.environ.get('GOODREADS_ACCESS_TOKEN'), - access_token_secret=os.environ.get('GOODREADS_ACCESS_SECRET'), -) - - -def get_author_by_name(name): - author = _client.find_author(name) - if author is not None: - return redistools.save_author2(author) - - -def get_author_by_id(goodreads_id): - cached_data = redistools.get_author(goodreads_id) - if cached_data: - return cached_data - - return redistools.save_author2(_client.author(goodreads_id)) - - -def get_books_by_title(title): books = [] - for book in _client.search_books(title): - books.append(redistools.save_book(book)) - - return books - - -def get_book_by_id(goodreads_id): - cached_data = redistools.get_book(goodreads_id) - if cached_data: - return cached_data + goodreads_book_ids = set() + goodreads_author_ids = set() + for row in rows: + # I HATE GOODREADS + title_tag = row.select('.field.title .value a')[0] + goodreads_url = title_tag['href'] + goodreads_id = _parse_id(goodreads_url) + title = title_tag.text.strip() + + author_tag = row.select('.field.author .value a')[0] + author_url = author_tag['href'] + author_id = _parse_id(author_url) + author_name = author_tag.text.strip() + + # Doing this kind of annoying if/else statement just cus i want to make + # _parse_date feel better for testing [operating on strings, not bs4 + # objects] + start_date = row.select('.date_started_value') + if start_date: + start_date = _parse_date(start_date[0].text) + else: + start_date = None + end_date = row.select('.date_read_value') + if end_date: + end_date = _parse_date(end_date[0].text) + else: + end_date = None + + book_format = row.select('.format .value')[0].text.strip() + isbn = row.select('.isbn13 .value')[0].text.strip() + num_pages = _parse_num_pages(row.select('.num_pages .value')[0].text) + image_url = row.select('img')[0]['src'] + + # If the publication year is missing or weirdly formatted (sometimes it + # just is for whatever reason, see if it's present in the + # date_pub_edition field instead + year = row.select('.date_pub .value')[0].text.strip() + if year == 'unknown' or ',' in year: + pub_date = row.select('.date_pub_edition .value')[0].text.strip() + year = pub_date.split(',')[-1].strip() + + book = { + 'id': goodreads_id, + 'url': BASE_URL + goodreads_url, + 'title': title, + 'start_date': start_date, + 'end_date': end_date, + 'format': book_format, + 'isbn': isbn, + 'num_pages': num_pages, + 'image_url': image_url, + 'year': year, # convert this to number? maybe + # TODO: author, publisher name + 'author_url': BASE_URL + author_url, + 'author_name': author_name, + 'author_id': author_id, + } + goodreads_book_ids.add(goodreads_id) + goodreads_author_ids.add(author_id) + books.append(book) + + # Figure out which of the books and authors are already in our DB + details_query = BookDetails.objects.filter(goodreads_id__in=goodreads_book_ids) + details_dict = {} + for d in details_query: + details_dict[d.goodreads_id] = d + + author_query = GoodreadsAuthor.objects.filter(goodreads_id__in=goodreads_author_ids) + author_dict = {} + for a in author_query: + author_dict[a.goodreads_id] = a.author + + # Now go back through the books list to update the status + for book in books: + a = author_dict.get(book['author_id']) + if a: + book['author'] = a + else: + # flip the author name around (last, first to first, last) + author_name = ' '.join(book['author_name'].split(', ')[::-1]) + book['author_params'] = urllib.parse.urlencode({ + 'name': author_name, + 'link': book['author_url'], + }) + + d = details_dict.get(book['id']) + if d: + b = d.book + book['is_processed'] = b.is_processed + book['slug'] = b.slug + book['dates_match'] = ( + d.start_date == book['start_date'] and + d.end_date == book['end_date'] + ) + if d.start_date is None and d.end_date is None: + dates_comment = 'No BM dates' + else: + dates_comment = '{} - {}'.format( + d.start_date, d.end_date + ) + book['dates_comment'] = dates_comment + else: + # Create a URL to quickly create the book (query params) + book['book_params'] = urllib.parse.urlencode({ + 'id': book['id'], + 'title': book['title'], + 'link': book['url'], # todo: url to link. also prepend site + 'isbn': book['isbn'], + 'year': book['year'], + 'format': book['format'], + 'num_pages': book['num_pages'], + 'start_date': book['start_date'], + 'end_date': book['end_date'], + 'image_url': book['image_url'], + 'author_name': book['author_name'], + 'author_url': book['author_url'], + 'author_slug': a.slug if a is not None else '', + }) - return redistools.save_book2(_client.book(goodreads_id)) - - -def get_user(): - return _client.user() - - -# todo: the dictionary should be in redistools -def get_read_shelf(page=1): - reviews = [] - for review in _client.user().reviews(page=page): - if 'read' not in review.shelves: - continue + return books - reviews.append({ - 'book': redistools.save_book2(review.book), - 'shelves': '/'.join(review.shelves), - 'review': review.body or '', - 'rating': int(review.rating), - 'start_date': review.started_at, - 'end_date': review.read_at, - }) - return reviews +AUTHOR_URL = BASE_URL + '/author/show/' +def get_author_id(link): + """Given a URL, if it's a goodreads author url, return the goodreads ID. + else, return none""" + if link.startswith(AUTHOR_URL): + return link.lstrip(AUTHOR_URL).split('.')[0] diff --git a/src/books/management/commands/import_goodreads.py b/src/books/management/commands/import_goodreads.py deleted file mode 100644 index 2d5641a..0000000 --- a/src/books/management/commands/import_goodreads.py +++ /dev/null @@ -1,127 +0,0 @@ -import csv -import re - -from django.core.management.base import BaseCommand - -from books import redistools, goodreadstools -from books.models import Author, Book, BookDetails - - -AUTHOR_LINK_PREFIX = 'https://www.goodreads.com/author/show/' -BASE_URL = 'https://www.goodreads.com/book/show/' -WHITESPACE_RE = re.compile('\s+') -class Command(BaseCommand): - def add_arguments(self, parser): - # Positional arguments - parser.add_argument('filename') - - def handle(self, **options): - f = options['filename'] - reader = csv.reader(open(f)) - print(next(reader)) - for row in reader: - goodreads_id = row[0] - title = row[1] - author = row[2] - if ' ' in author: - author = WHITESPACE_RE.sub(' ', author) - isbn = row[6].strip('=').strip('"') - rating = int(row[7]) - publisher = row[9] - format = row[10] - num_pages = int(row[11]) if row[11] else None - year = int(row[12]) if row[12] else None - end_date = row[14] - if end_date: - end_date = end_date.replace('/', '-') - else: - end_date = None - shelves = row[16] - exclusive_shelf = row[18] - review = row[19] - - details = BookDetails.objects.filter(goodreads_id=goodreads_id) - if not details.exists() and exclusive_shelf == 'read': - print('============= {title} - {author} ({year}) [{id}]'.format( - title=title, - author=author, - year=year, - id=goodreads_id - )) - print('Done: {}'.format(end_date)) - if shelves: - print('Shelves: {}'.format(shelves)) - - # Check if the author exists - authors = Author.objects.filter(name=author) - if authors.exists(): - author_object = authors.first() - print('Author already exists') - else: - print('NEW AUTHOR') - while True: - author_link = input('{} Goodreads link: '.format(author)).strip() - if author_link.startswith(AUTHOR_LINK_PREFIX): - break - else: - print('BAD AUTHOR LINK - TRY AGAIN!!!') - author_id = author_link.removeprefix(AUTHOR_LINK_PREFIX) - author_id = author_id.split('.')[0] - print('Author ID: {}'.format(author_id)) - author_object = Author.objects.create_from_goodreads({ - 'name': author, - 'id': author_id, - 'link': author_link - }) - - # TODO: create the book - image_url = input('Book image URL: ') - book = Book.objects.create_from_goodreads({ - 'id': goodreads_id, - 'link': BASE_URL + goodreads_id, - 'year': year, - 'isbn13': isbn, - 'publisher': publisher, - 'num_pages': num_pages, - 'image_url': image_url, - 'title': title, - 'shelves': shelves, - 'review': review, - 'format': format, - 'end_date': end_date, - }) - - book.details.authors.add(author_object) - book.details.default_authors.add(author_object) - else: - continue - - d = details.first() - to_update = {} - - - if not d.isbn and isbn: - to_update['isbn'] = isbn - if not d.rating and rating: - to_update['rating'] = rating - if not d.publisher and publisher: - to_update['publisher'] = publisher - if not d.format and format: - to_update['format'] = format - if not d.num_pages and num_pages: - to_update['num_pages'] = num_pages - if not d.year and year: - to_update['year'] = year - if not d.end_date and end_date: - to_update['end_date'] = end_date - if not d.shelves and shelves: - to_update['shelves'] = shelves - if not d.review and review: - to_update['review'] = review - - if to_update: - print("Updating: {}, {} =======".format( - goodreads_id, title) - ) - print(to_update) - details.update(**to_update) diff --git a/src/books/management/commands/scrape_goodreads.py b/src/books/management/commands/scrape_goodreads.py deleted file mode 100644 index 3564d79..0000000 --- a/src/books/management/commands/scrape_goodreads.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Inspired by https://github.com/rixx/goodreads-to-sqlite (just the scrape -function). -I don't know why the params dict isn't working (I just hardcoded the params in -the URL -This is only for updating dates of books that are already in the db -""" -import bs4 -from datetime import datetime -import requests - -from django.core.management.base import BaseCommand - -from books import redistools, goodreadstools -from books.models import Book, BookDetails - - -USER_ID = '60292716-wendy-liu' -BASE_URL = "https://www.goodreads.com/" -URL = BASE_URL + "review/list/{}?shelf=read&per_page=100&sort=date_updated".format(USER_ID) -class Command(BaseCommand): - def handle(self, **options): - """ - params = { - #"utf8": "✓", - "per_page": "100", # Maximum allowed page size - "sort": "date_updated", - "page": 0, - } - """ - date_counter = 0 - while True: - #print("Page", params['page']) - #params["page"] += 1 - #response = requests.get(URL, data=params) - response = requests.get(URL) - response.raise_for_status() - soup = bs4.BeautifulSoup(response.content.decode(), "html.parser") - - rows = soup.select("table#books tbody tr") - for row in rows: - title_tag = row.select('.field.title .value a')[0] - goodreads_id = title_tag['href'].split('/')[-1].split('-')[0] - title = title_tag.text.strip() - - details = BookDetails.objects.filter(goodreads_id=goodreads_id) - if not details.exists(): - continue - details = details.first() - - date = row.select(".date_read_value") - if not date: - continue - - date = date[0].text.strip() - if ',' in date: - format_string = '%b %d, %Y' - else: - format_string = '%b %Y' - read_at = datetime.strptime( - date, - format_string - ).date() - date_counter += 1 - if not details.end_date: - print("Updating read date", goodreads_id, title, read_at) - details.end_date = read_at - details.save() - - if not soup.select("a[rel=next]"): - break diff --git a/src/books/management/commands/sync_goodreads.py b/src/books/management/commands/sync_goodreads.py deleted file mode 100644 index 1d295d7..0000000 --- a/src/books/management/commands/sync_goodreads.py +++ /dev/null @@ -1,61 +0,0 @@ -import time - -from django.core.management.base import BaseCommand - -from books import redistools, goodreadstools -from books.models import Book, BookDetails - - -def update_details(details, data): - updated_fields = set() - # Doing it manually instead of using an update() just in case the data is somehow missing in goodreads - if data['rating'] and not details.rating: - details.rating = data['rating'] - updated_fields.add('rating') - - if data['review'] and not details.review: - details.review = data['review'] - updated_fields.add('review') - - if data['start_date'] and not details.start_date: - details.start_date = data['start_date'] - updated_fields.add('start_date') - - if data['end_date'] and not details.end_date: - details.end_date = data['end_date'] - updated_fields.add('end_date') - - if data['shelves'] and not details.shelves: - details.shelves = data['shelves'] - updated_fields.add('shelves') - - if updated_fields: - print("Updating details for {}".format(details.book.title)) - for field in updated_fields: - print("Changed {} to {}".format( - field, data[field] - )) - details.save() - - -class Command(BaseCommand): - def handle(self, **options): - page = 1 - while True: - reviews = goodreadstools.get_read_shelf(page=page) - - if not reviews: - break - - page += 1 - - for review in reviews: - goodreads_id = review['book']['id'] - details = BookDetails.objects.filter(goodreads_id=goodreads_id) - if details.exists(): - for instance in details.all(): - update_details(instance, review) - elif not redistools.is_ignored(goodreads_id): - print("Saving", goodreads_id) - redistools.save_review(goodreads_id, review) - print("Done page {}".format(page)) diff --git a/src/books/models.py b/src/books/models.py index bcf1ef8..c9fda17 100644 --- a/src/books/models.py +++ b/src/books/models.py @@ -16,47 +16,6 @@ from .utils import int_to_roman, roman_to_int -class AuthorManager(models.Manager): - def create_from_goodreads(self, author_data): - """Only call this if you're sure the goodreads author doesn't already - exist.""" - # Clean the name (get rid of extraneous whitespace). - name = ' '.join(author_data['name'].split()) - slug = slugify(name) - - # If the author already exists for this slug, assume it's different, - # and give this author a different slug. - existing_author = Author.objects.filter(slug=slug) - if existing_author.exists(): - i = 2 - while True: - potential_slug = "{}-{}".format(slug, i) - if Author.objects.filter(slug=potential_slug).exists(): - i += 1 - else: - slug = potential_slug - name = "{} {}".format(name, i) - break - - # Create the author and the goodreads author. - author = Author.objects.create( - name=name, - link=author_data['link'], - slug=slug, - ) - author.goodreadsauthor_set.create( - goodreads_id=author_data['id'], - goodreads_link=author_data['link'], - ) - Action.objects.create( - category='author', - primary_id=author.pk, - details=name, - verb='added', - ) - return author - - class ReadingGoal(models.Model): name = models.CharField(max_length=50) @@ -80,9 +39,8 @@ def __str__(self): class Author(models.Model): name = models.CharField(max_length=100) - link = models.URLField(help_text='Only needed if no goodreads author') + link = models.URLField() slug = models.SlugField(unique=True) - objects = AuthorManager() class Meta: ordering = ['name'] @@ -100,71 +58,6 @@ class GoodreadsAuthor(models.Model): goodreads_link = models.URLField() -GR_IMAGE_URL_RE = re.compile(r'_SX98_.jpg$') -class BookManager(models.Manager): - def create_from_goodreads(self, book_data): - details = BookDetails.objects.create( - goodreads_id=book_data['id'], - link=book_data['link'], - year=int(book_data['year']) if book_data['year'] else None, - isbn=book_data['isbn13'], - publisher=book_data['publisher'], - num_pages=int(book_data['num_pages']) if book_data['num_pages'] else None, - shelves=book_data['shelves'], - review=book_data['review'], - format=book_data['format'], - end_date=book_data['end_date'] - ) - - # Replace the SX98_.jpg at the end with SX475_.jpg - image_url = book_data['image_url'] - #if image_url: - # image_url = GR_IMAGE_URL_RE.sub('_SY475_.jpg', image_url) - - # If there's a :, strip out everything after it for the slug. - title = book_data['title'] - slug = slugify(title.split(':')[0]) - # Make sure the slug is 50 characters or less - if len(slug) > 50: - # First try cutting it to the part before the last - - # as long as it's not obscenely short - slug = slug[:50] - last_dash = slug.rfind('-') - if last_dash > 10: - slug = slug[:last_dash] - - # If the book already exists for this slug, give this book a number - # after the slug. - existing_book = Book.objects.filter(slug=slug) - if existing_book.exists(): - i = 2 - while True: - potential_slug = "{}-{}".format(slug, i) - if Book.objects.filter(slug=potential_slug).exists(): - i += 1 - else: - slug = potential_slug - break - - is_read = book_data['end_date'] is not None - book = Book.objects.create( - details=details, - title=title, - image_url=image_url, - slug=slug, - completed_read=is_read - ) - - # Create the relevant action, too. - Action.objects.create( - category='book', - primary_id=book.pk, - details=title, - verb='added', - ) - - return book - class RatingField(models.IntegerField): def __init__(self, min_value=0, max_value=5, **kwargs): self.min_value, self.max_value = min_value, max_value @@ -237,7 +130,6 @@ def __str__(self): class Book(models.Model): """If details are None, then it's a publication, not a book.""" - objects = BookManager() details = models.OneToOneField(BookDetails, on_delete=models.CASCADE, blank=True, null=True) title = models.CharField(max_length=255) diff --git a/src/books/redistools.py b/src/books/redistools.py deleted file mode 100644 index 2c3d8ab..0000000 --- a/src/books/redistools.py +++ /dev/null @@ -1,206 +0,0 @@ -import json -import redis - -from django.conf import settings - - -_client = redis.StrictRedis( - host=settings.REDIS_HOST, - port=settings.REDIS_PORT, - db=0 -) - - -AUTHOR_KEY = 'author:{}' -def get_author(goodreads_id): - # return none if nothing is stored - key = AUTHOR_KEY.format(goodreads_id) - - value = _client.get(key) - if value is not None: - return json.loads(value) - - -def save_author2(author_data, titles=None): - key = AUTHOR_KEY.format(author_data.gid) - - # If author_data is empty, save it anyway. - if author_data is None: - value = None - else: - # Figure out the titles from the author's books, if any. - if titles is None: - titles = ', '.join( - sorted([book.title for book in author_data.books]) - ) - - value = { - 'id': author_data.gid, - 'name': author_data.name, - 'link': author_data.link, - 'titles': titles, - } - - # If value is None, dumps and loads cancel out - _client.set(key, json.dumps(value)) - - # Return the dictionary being stored - return value - - - -def save_author(author_data, titles=None): - key = AUTHOR_KEY.format(author_data['id']) - - # If author_data is empty, save it anyway. - if author_data is None: - value = None - else: - # Figure out the titles from the author's books, if any. - if titles is None: - titles = 'unknown' - """ - titles = ', '.join( - sorted([book.title for book in author_data.books]) - ) - """ - - value = { - 'id': author_data['id'], - 'name': author_data['name'], - 'link': author_data['link'], - 'titles': titles, - } - - # If value is None, dumps and loads cancel out - _client.set(key, json.dumps(value)) - - # Return the dictionary being stored - return value - - -BOOK_KEY = 'book:{}' -def get_book(goodreads_id): - # return none if nothing is stored - key = BOOK_KEY.format(goodreads_id) - - value = _client.get(key) - if value is not None: - return json.loads(value) - - -def save_book2(book_data): - key = BOOK_KEY.format(book_data.gid) - if book_data is None: - value = None - else: - authors = [] - for author_data in book_data.authors: - # While we have the author data, store that too. - author = save_author2(author_data, titles=book_data.title) - authors.append(author) - - value = { - 'id': book_data.gid, - 'title': book_data.title, - 'link': book_data.link, - 'format': book_data.format, - 'year': book_data.publication_date[2], - 'isbn13': book_data.isbn13, - 'publisher': book_data.publisher, - 'num_pages': book_data.num_pages, - 'image_url': book_data.image_url, - 'authors': authors, - } - - # If value is None, dumps and loads cancel out - _client.set(key, json.dumps(value)) - - # Return the dictionary being stored - return value - - - -def save_book(book_data): - key = BOOK_KEY.format(book_data['id']['#text']) - if book_data is None: - value = None - else: - if 'author' in book_data['authors']: - authors = [save_author(book_data['authors']['author'], titles=book_data['title'])] - else: - print('missing author key:') - print(book_data['authors']) - """ - authors = [] - for author_data in book_data['authors']: - # While we have the author data, store that too. - author = save_author(author_data, titles=book_data['title']) - authors.append(author) - """ - - value = { - 'id': book_data['id']['#text'], - 'title': book_data['title'], - 'link': book_data['link'], - 'format': book_data['format'], - 'year': book_data['publication_year'], - 'isbn13': book_data['isbn13'], - 'publisher': book_data['publisher'], - 'num_pages': book_data['num_pages'], - 'image_url': book_data['image_url'], - 'authors': authors, - } - - # If value is None, dumps and loads cancel out - print('saving book') - print(value) - print(book_data) - input('waiting') - _client.set(key, json.dumps(value)) - - # Return the dictionary being stored - return value - - -IGNORED_KEY = 'ignored' -REVIEW_KEY = 'reviews:{}' -def ignore_review(goodreads_id): - _client.sadd(IGNORED_KEY, goodreads_id) - _client.delete(REVIEW_KEY.format(goodreads_id)) - - -def is_ignored(goodreads_id): - return _client.sismember(IGNORED_KEY, goodreads_id) - - -def save_review(goodreads_id, review): - key = REVIEW_KEY.format(goodreads_id) - _client.set(key, json.dumps(review)) - - -def count_reviews(): - return len(_client.keys(REVIEW_KEY.format('*'))) - - -def load_review(key): - value = _client.get(key) - if value is not None: - value = json.loads(value) - # fix bad isbns - TODO: remove - isbn = value['book']['isbn13'] - if type(isbn) == dict: - value['book']['isbn13'] = '' - return value - - -def get_review(goodreads_id): - key = REVIEW_KEY.format(goodreads_id) - return load_review(key) - - -def get_random_review(): - # Returns one random review. - keys = _client.keys(REVIEW_KEY.format('*')) - for key in keys: - return load_review(key) diff --git a/src/books/tests.py b/src/books/tests.py deleted file mode 100644 index 7ce503c..0000000 --- a/src/books/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/src/templates/add_author.html b/src/templates/add_author.html index 19b918b..ee94af8 100644 --- a/src/templates/add_author.html +++ b/src/templates/add_author.html @@ -7,5 +7,20 @@ {% endblock %} {% block content %} +{% if existing_authors %} +
+
+ Author may already exist +
+
+ {% for a in existing_authors %} +
+ {{ a.name }} ({{ a.slug}}): + {{ a.link|urlize }} +
+ {% endfor %} +
+
+{% endif %} {% include 'author_form.html' %} {% endblock %} diff --git a/src/templates/add_author_field.html b/src/templates/add_author_field.html deleted file mode 100644 index eeb2b55..0000000 --- a/src/templates/add_author_field.html +++ /dev/null @@ -1,13 +0,0 @@ -
-
-
-
Author
- - -
- {% csrf_token %} -
-
diff --git a/src/templates/add_book.html b/src/templates/add_book.html index e838314..1b603d7 100644 --- a/src/templates/add_book.html +++ b/src/templates/add_book.html @@ -7,6 +7,50 @@ {% endblock %} {% block content %} +{% if existing_books %} +
+
+ Book may already exist +
+
+ {% for b in existing_books %} +
+ {{ b.title }} ({{ b.slug}}) +
+ {% endfor %} +
+
+{% endif %} + +{% if author_slug %} +
+
+ Author already exists +

+ {{ author_name }} +

+
+
+{% elif author_name %} +
+ +
+ Author does not exist yet +

+ {{ author_name }} +

+
+
+{% endif %} +{% if goodreads_url %} +
+
+ Pre-populated from scraping Goodreads +

{{ goodreads_url|urlize }}

+
+
+{% endif %} + {% include 'book_form.html' %} {% endblock %} diff --git a/src/templates/add_book_field.html b/src/templates/add_book_field.html deleted file mode 100644 index 292f6d1..0000000 --- a/src/templates/add_book_field.html +++ /dev/null @@ -1,13 +0,0 @@ -
-
-
-
{{ label }}
- - -
- {% csrf_token %} -
-
diff --git a/src/templates/author_form.html b/src/templates/author_form.html index bb1061a..1d7f7d3 100644 --- a/src/templates/author_form.html +++ b/src/templates/author_form.html @@ -11,7 +11,7 @@
{{ author_form.slug }}
diff --git a/src/templates/book_form.html b/src/templates/book_form.html index 2ff16d4..103d6eb 100644 --- a/src/templates/book_form.html +++ b/src/templates/book_form.html @@ -19,7 +19,12 @@ {{ details_form.isbn }} -
+ {% comment %} + the error is just to draw attention to the fact that this field + doesn't get automatically populated from scraping goodreads + (because the page we're scraping doesn't include this data) + {% endcomment %} +
{{ details_form.publisher }}
@@ -112,7 +117,7 @@ {% endif %}
-
+
{{ details_form.shelves }}
diff --git a/src/templates/keyboard.html b/src/templates/keyboard.html index 3e4c963..fd6f16f 100644 --- a/src/templates/keyboard.html +++ b/src/templates/keyboard.html @@ -3,42 +3,3 @@
- - - - - \ No newline at end of file diff --git a/src/templates/manual_import.html b/src/templates/manual_import.html deleted file mode 100644 index d23f870..0000000 --- a/src/templates/manual_import.html +++ /dev/null @@ -1,19 +0,0 @@ -{% extends "base.html" %} - -{% load humanize %} -{% load markdown_filter %} - -{% block title %}Manual import from Goodreads{% endblock %} - -{% block breadcrumbs %} -
Manual import from Goodreads
-{% endblock %} - -{% block content %} -
-{% include 'add_author_field.html' %} -
-
-{% include 'add_book_field.html' with label='Book' %} -
-{% endblock %} diff --git a/src/templates/menu.html b/src/templates/menu.html index a237704..159c83b 100644 --- a/src/templates/menu.html +++ b/src/templates/menu.html @@ -58,7 +58,8 @@
diff --git a/src/templates/sync_goodreads.html b/src/templates/sync_goodreads.html index 12481bb..71524f6 100644 --- a/src/templates/sync_goodreads.html +++ b/src/templates/sync_goodreads.html @@ -10,102 +10,97 @@ {% endblock %} {% block content %} - -{% if review %} -
-
-

{{ total }} total

- {% if existing_books %} -

Found existing

- +
+ -{% else %} -
Nothing to sync. Try reading some more books?
-{% endif %} +
+ + + + + + + + + + + + + + + + {% for book in books %} + + + {% if book.slug %} + + {% else %} + + {% endif %} + + {% if book.author %} + + {% else %} + + {% endif %} + + + {% if book.is_processed %} + + {% else %} + + {% endif %} + + {% if book.dates_match %} + + {% else %} + + {% endif %} + + {% endfor %} + +
Book IDBook title (in BM?)Author IDAuthor name (in BM?)Start dateEnd dateProcessed?Dates match?
{{ book.id }} + {{ book.title }} + + {{ book.title }} + {{ book.author_id }} + + {{ book.author_name }} + + {{ book.author_name }}{{ book.start_date }}{{ book.end_date }} + + + {{ book.dates_comment }} +
+ {% endblock %} diff --git a/src/templates/view_book.html b/src/templates/view_book.html index 5aefed8..5c5aa78 100644 --- a/src/templates/view_book.html +++ b/src/templates/view_book.html @@ -86,10 +86,6 @@

{% endif %} - {% else %} - {% if request.user.is_staff %} - {% include 'add_book_field.html' with label='Add more' %} - {% endif %} {% endif %}

{% endif %}