Skip to content

Commit

Permalink
Merge branch 'main' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
Mouse Reeve committed Oct 2, 2023
2 parents 8a12870 + bcfd4d2 commit 1093e95
Show file tree
Hide file tree
Showing 88 changed files with 8,146 additions and 5,282 deletions.
6 changes: 5 additions & 1 deletion bookwyrm/activitypub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

from .base_activity import ActivityEncoder, Signature, naive_parse
from .base_activity import Link, Mention, Hashtag
from .base_activity import ActivitySerializerError, resolve_remote_id
from .base_activity import (
ActivitySerializerError,
resolve_remote_id,
get_representative,
)
from .image import Document, Image
from .note import Note, GeneratedNote, Article, Comment, Quotation
from .note import Review, Rating
Expand Down
7 changes: 5 additions & 2 deletions bookwyrm/activitypub/base_activity.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" basics for an activitypub serializer """
from __future__ import annotations
from dataclasses import dataclass, fields, MISSING
from json import JSONEncoder
import logging
Expand Down Expand Up @@ -72,8 +73,10 @@ class ActivityObject:

def __init__(
self,
activity_objects: Optional[list[str, base_model.BookWyrmModel]] = None,
**kwargs: dict[str, Any],
activity_objects: Optional[
dict[str, Union[str, list[str], ActivityObject, base_model.BookWyrmModel]]
] = None,
**kwargs: Any,
):
"""this lets you pass in an object with fields that aren't in the
dataclass, which it ignores. Any field in the dataclass is required or
Expand Down
2 changes: 1 addition & 1 deletion bookwyrm/activitystreams.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _get_audience(self, status): # pylint: disable=no-self-use
trace.get_current_span().set_attribute("status_privacy", status.privacy)
trace.get_current_span().set_attribute(
"status_reply_parent_privacy",
status.reply_parent.privacy if status.reply_parent else None,
status.reply_parent.privacy if status.reply_parent else status.privacy,
)
# direct messages don't appear in feeds, direct comments/reviews/etc do
if status.privacy == "direct" and status.status_type == "Note":
Expand Down
6 changes: 4 additions & 2 deletions bookwyrm/importers/calibre_import.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" handle reading a csv from calibre """
from typing import Any, Optional

from bookwyrm.models import Shelf

from . import Importer
Expand All @@ -9,7 +11,7 @@ class CalibreImporter(Importer):

service = "Calibre"

def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any):
# Add timestamp to row_mappings_guesses for date_added to avoid
# integrity error
row_mappings_guesses = []
Expand All @@ -23,6 +25,6 @@ def __init__(self, *args, **kwargs):
self.row_mappings_guesses = row_mappings_guesses
super().__init__(*args, **kwargs)

def get_shelf(self, normalized_row):
def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:
# Calibre export does not indicate which shelf to use. Use a default one for now
return Shelf.TO_READ
51 changes: 35 additions & 16 deletions bookwyrm/importers/importer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
""" handle reading a csv from an external service, defaults are from Goodreads """
import csv
from datetime import timedelta
from typing import Iterable, Optional

from django.utils import timezone
from bookwyrm.models import ImportJob, ImportItem, SiteSettings
from bookwyrm.models import ImportJob, ImportItem, SiteSettings, User


class Importer:
Expand Down Expand Up @@ -35,36 +37,47 @@ class Importer:
}

# pylint: disable=too-many-locals
def create_job(self, user, csv_file, include_reviews, privacy):
def create_job(
self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str
) -> ImportJob:
"""check over a csv and creates a database entry for the job"""
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
rows = list(csv_reader)
if len(rows) < 1:
raise ValueError("CSV file is empty")
rows = enumerate(rows)

mappings = (
self.create_row_mappings(list(fieldnames))
if (fieldnames := csv_reader.fieldnames)
else {}
)

job = ImportJob.objects.create(
user=user,
include_reviews=include_reviews,
privacy=privacy,
mappings=self.create_row_mappings(csv_reader.fieldnames),
mappings=mappings,
source=self.service,
)

enforce_limit, allowed_imports = self.get_import_limit(user)
if enforce_limit and allowed_imports <= 0:
job.complete_job()
return job
for index, entry in rows:
for index, entry in enumerate(rows):
if enforce_limit and index >= allowed_imports:
break
self.create_item(job, index, entry)
return job

def update_legacy_job(self, job):
def update_legacy_job(self, job: ImportJob) -> None:
"""patch up a job that was in the old format"""
items = job.items
headers = list(items.first().data.keys())
first_item = items.first()
if first_item is None:
return

headers = list(first_item.data.keys())
job.mappings = self.create_row_mappings(headers)
job.updated_date = timezone.now()
job.save()
Expand All @@ -75,24 +88,24 @@ def update_legacy_job(self, job):
item.normalized_data = normalized
item.save()

def create_row_mappings(self, headers):
def create_row_mappings(self, headers: list[str]) -> dict[str, Optional[str]]:
"""guess what the headers mean"""
mappings = {}
for (key, guesses) in self.row_mappings_guesses:
value = [h for h in headers if h.lower() in guesses]
value = value[0] if len(value) else None
values = [h for h in headers if h.lower() in guesses]
value = values[0] if len(values) else None
if value:
headers.remove(value)
mappings[key] = value
return mappings

def create_item(self, job, index, data):
def create_item(self, job: ImportJob, index: int, data: dict[str, str]) -> None:
"""creates and saves an import item"""
normalized = self.normalize_row(data, job.mappings)
normalized["shelf"] = self.get_shelf(normalized)
ImportItem(job=job, index=index, data=data, normalized_data=normalized).save()

def get_shelf(self, normalized_row):
def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:
"""determine which shelf to use"""
shelf_name = normalized_row.get("shelf")
if not shelf_name:
Expand All @@ -103,11 +116,15 @@ def get_shelf(self, normalized_row):
]
return shelf[0] if shelf else None

def normalize_row(self, entry, mappings): # pylint: disable=no-self-use
# pylint: disable=no-self-use
def normalize_row(
self, entry: dict[str, str], mappings: dict[str, Optional[str]]
) -> dict[str, Optional[str]]:
"""use the dataclass to create the formatted row of data"""
return {k: entry.get(v) for k, v in mappings.items()}
return {k: entry.get(v) if v else None for k, v in mappings.items()}

def get_import_limit(self, user): # pylint: disable=no-self-use
# pylint: disable=no-self-use
def get_import_limit(self, user: User) -> tuple[int, int]:
"""check if import limit is set and return how many imports are left"""
site_settings = SiteSettings.objects.get()
import_size_limit = site_settings.import_size_limit
Expand All @@ -125,7 +142,9 @@ def get_import_limit(self, user): # pylint: disable=no-self-use
allowed_imports = import_size_limit - imported_books
return enforce_limit, allowed_imports

def create_retry_job(self, user, original_job, items):
def create_retry_job(
self, user: User, original_job: ImportJob, items: list[ImportItem]
) -> ImportJob:
"""retry items that didn't import"""
job = ImportJob.objects.create(
user=user,
Expand Down
20 changes: 14 additions & 6 deletions bookwyrm/importers/librarything_import.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
""" handle reading a tsv from librarything """
import re
from typing import Optional

from bookwyrm.models import Shelf

from . import Importer


def _remove_brackets(value: Optional[str]) -> Optional[str]:
return re.sub(r"\[|\]", "", value) if value else None


class LibrarythingImporter(Importer):
"""csv downloads from librarything"""

service = "LibraryThing"
delimiter = "\t"
encoding = "ISO-8859-1"

def normalize_row(self, entry, mappings): # pylint: disable=no-self-use
def normalize_row(
self, entry: dict[str, str], mappings: dict[str, Optional[str]]
) -> dict[str, Optional[str]]: # pylint: disable=no-self-use
"""use the dataclass to create the formatted row of data"""
remove_brackets = lambda v: re.sub(r"\[|\]", "", v) if v else None
normalized = {k: remove_brackets(entry.get(v)) for k, v in mappings.items()}
isbn_13 = normalized.get("isbn_13")
isbn_13 = isbn_13.split(", ") if isbn_13 else []
normalized = {
k: _remove_brackets(entry.get(v) if v else None)
for k, v in mappings.items()
}
isbn_13 = value.split(", ") if (value := normalized.get("isbn_13")) else []
normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 1 else None
return normalized

def get_shelf(self, normalized_row):
def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:
if normalized_row["date_finished"]:
return Shelf.READ_FINISHED
if normalized_row["date_started"]:
Expand Down
4 changes: 3 additions & 1 deletion bookwyrm/importers/openlibrary_import.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" handle reading a csv from openlibrary"""
from typing import Any

from . import Importer


Expand All @@ -7,7 +9,7 @@ class OpenLibraryImporter(Importer):

service = "OpenLibrary"

def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any):
self.row_mappings_guesses.append(("openlibrary_key", ["edition id"]))
self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"]))
super().__init__(*args, **kwargs)
81 changes: 63 additions & 18 deletions bookwyrm/isbn/isbn.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
""" Use the range message from isbn-international to hyphenate ISBNs """
import os
from typing import Optional
from xml.etree import ElementTree
from xml.etree.ElementTree import Element

import requests

from bookwyrm import settings


def _get_rules(element: Element) -> list[Element]:
if (rules_el := element.find("Rules")) is not None:
return rules_el.findall("Rule")
return []


class IsbnHyphenator:
"""Class to manage the range message xml file and use it to hyphenate ISBNs"""

Expand All @@ -15,58 +24,94 @@ class IsbnHyphenator:
)
__element_tree = None

def update_range_message(self):
def update_range_message(self) -> None:
"""Download the range message xml file and save it locally"""
response = requests.get(self.__range_message_url)
with open(self.__range_file_path, "w", encoding="utf-8") as file:
file.write(response.text)
self.__element_tree = None

def hyphenate(self, isbn_13):
def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]:
"""hyphenate the given ISBN-13 number using the range message"""
if isbn_13 is None:
return None

if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)

gs1_prefix = isbn_13[:3]
reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
if reg_group is None:
return isbn_13 # failed to hyphenate

registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
if registrant is None:
return isbn_13 # failed to hyphenate

publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
check_digit = isbn_13[-1:]
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))

def __find_reg_group(self, isbn_13, gs1_prefix):
for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall(
"EAN.UCC"
):
if ean_ucc_el.find("Prefix").text == gs1_prefix:
for rule_el in ean_ucc_el.find("Rules").findall("Rule"):
length = int(rule_el.find("Length").text)
def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]:
if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)

ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes")
if ucc_prefixes_el is None:
return None

for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"):
if (
prefix_el := ean_ucc_el.find("Prefix")
) is not None and prefix_el.text == gs1_prefix:
for rule_el in _get_rules(ean_ucc_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0:
continue
reg_grp_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-")
]

range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue

reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")]
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
return reg_group
return None
return None

def __find_registrant(self, isbn_13, gs1_prefix, reg_group):
def __find_registrant(
self, isbn_13: str, gs1_prefix: str, reg_group: str
) -> Optional[str]:
from_ind = len(gs1_prefix) + len(reg_group)
for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"):
if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)):
for rule_el in group_el.find("Rules").findall("Rule"):
length = int(rule_el.find("Length").text)

if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)

reg_groups_el = self.__element_tree.find("RegistrationGroups")
if reg_groups_el is None:
return None

for group_el in reg_groups_el.findall("Group"):
if (
prefix_el := group_el.find("Prefix")
) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)):
for rule_el in _get_rules(group_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0:
continue

range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue
registrant_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-")
int(x[:length]) for x in range_el.text.split("-")
]
registrant = isbn_13[from_ind : from_ind + length]
if registrant_range[0] <= int(registrant) <= registrant_range[1]:
Expand Down
Loading

0 comments on commit 1093e95

Please sign in to comment.