diff --git a/fatcat_scholar/djvu.py b/fatcat_scholar/djvu.py index 58d6761..d626238 100644 --- a/fatcat_scholar/djvu.py +++ b/fatcat_scholar/djvu.py @@ -16,7 +16,7 @@ def djvu_extract_leaf_texts( if only_leaves: max_leaf = max(only_leaves) elem_iter = ET.iterparse(blob, ["start", "end"]) - for (event, element) in elem_iter: + for event, element in elem_iter: if event == "start": continue if not (element.tag == "OBJECT" and event == "end"): diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 38fe8b1..1cfad1e 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -358,7 +358,7 @@ def load_counts(self, es_client: Any) -> None: ) cur: Any = self.db.cursor() count = 0 - for (sim_pubid, container_ident) in all_pub_containers: + for sim_pubid, container_ident in all_pub_containers: count += 1 if count % 500 == 0: print(f" {count}...", file=sys.stderr) diff --git a/fatcat_scholar/kafka.py b/fatcat_scholar/kafka.py index 9fd43cf..556ede5 100644 --- a/fatcat_scholar/kafka.py +++ b/fatcat_scholar/kafka.py @@ -85,7 +85,6 @@ def create_kafka_consumer( """ def _on_rebalance(consumer: Any, partitions: Any) -> None: - for p in partitions: if p.error: raise KafkaException(p.error) @@ -160,7 +159,6 @@ def create_kafka_producer(kafka_brokers: List[str]) -> Producer: return Producer(config) def run(self) -> Counter: - if self.batch_timeout_sec: signal.signal(signal.SIGALRM, self._timeout_handler) diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 14e5fed..53ac0df 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -496,7 +496,6 @@ def contrib_affiliation(contrib: ReleaseContrib) -> Optional[str]: def es_abstracts_from_grobid(tei_doc: GrobidDocument) -> List[ScholarAbstract]: - if tei_doc.abstract: body = scrub_text(tei_doc.abstract) if body: @@ -505,7 +504,6 @@ def es_abstracts_from_grobid(tei_doc: GrobidDocument) -> List[ScholarAbstract]: def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]: - d = dict() for abst in release.abstracts: if abst.lang not in d: @@ -518,7 +516,6 @@ def es_abstracts_from_release(release: ReleaseEntity) -> List[ScholarAbstract]: def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: - container_name = release.extra and release.extra.get("container_name") container_sherpa_color = None @@ -622,7 +619,6 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: def es_release_from_release(release: ReleaseEntity) -> ScholarRelease: - if release.container: container_name = release.container.name container_ident = release.container.redirect or release.container.ident diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index ebfa6ef..1003cec 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -228,7 +228,6 @@ def apply_filters(search: Search, query: FulltextQuery) -> Search: def process_query(query: FulltextQuery) -> FulltextHits: - if not query.q: return do_fulltext_search(query) @@ -295,7 +294,6 @@ def do_lookup_query(lookup: str) -> FulltextHits: def do_fulltext_search( query: FulltextQuery, deep_page_limit: int = 2000 ) -> FulltextHits: - search = Search(using=es_client, index=settings.ELASTICSEARCH_QUERY_FULLTEXT_INDEX) if query.collapse_key: diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index c7d49d3..e8c4164 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -122,7 +122,6 @@ def es_sim_from_sim(sim: Dict[str, Any]) -> ScholarSim: def es_biblio_from_sim(sim: Dict[str, Any]) -> ScholarBiblio: - issue_meta = sim["issue_item_metadata"]["metadata"] pub_meta = sim["pub_item_metadata"]["metadata"] @@ -264,7 +263,6 @@ def es_fulltext_from_grobid( def es_fulltext_from_pdftotext( raw_text: str, pdf_meta: Optional[dict], re: ReleaseEntity, fe: FileEntity ) -> Optional[ScholarFulltext]: - if raw_text and len(raw_text) > MAX_BODY_CHARS: raw_text = raw_text[:MAX_BODY_CHARS] ret = ScholarFulltext( @@ -281,7 +279,6 @@ def es_fulltext_from_html( re: ReleaseEntity, wc: WebcaptureEntity, ) -> Optional[ScholarFulltext]: - if not wc.archive_urls or not html_fulltext.get("tei_xml"): return None @@ -407,7 +404,6 @@ def biblio_metadata_hacks(biblio: ScholarBiblio) -> ScholarBiblio: # noqa: C901 def generate_tags( biblio: ScholarBiblio, primary_release: Optional[ReleaseEntity] ) -> List[str]: - tags = [] # tags @@ -466,7 +462,6 @@ def check_exclude_web(biblio: ScholarBiblio) -> bool: def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: - tags: List[str] = [] work_ident: Optional[str] = None sim_issue: Optional[str] = None diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index d3a1460..ed5e3ec 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -202,7 +202,6 @@ def web_search( lang: LangPrefix = Depends(LangPrefix), content: ContentNegotiation = Depends(ContentNegotiation), ) -> Any: - if content.mimetype == "application/json": return search(query) hits: Optional[FulltextHits] = None @@ -250,7 +249,6 @@ def web_feed_rss( query: FulltextQuery = Depends(FulltextQuery), lang: LangPrefix = Depends(LangPrefix), ) -> fastapi_rss.RSSResponse: - # override some query params for feeds original_query = query.q if query.q: @@ -314,7 +312,6 @@ def web_work( lang: LangPrefix = Depends(LangPrefix), content: ContentNegotiation = Depends(ContentNegotiation), ) -> Any: - if content.mimetype == "application/json": return get_work(work_ident) diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index fa1a7bc..ae5ebc1 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -216,7 +216,6 @@ def fetch_webcapture_html_fulltext( wc: WebcaptureEntity, release_ident: str, ) -> Optional[Dict[str, Any]]: - primary_resources = [cdx for cdx in wc.cdx if cdx.url == wc.original_url] if not primary_resources or primary_resources[0].mimetype != "text/html": return None diff --git a/fatcat_scholar/worker.py b/fatcat_scholar/worker.py index ef1a8c7..591d2e4 100644 --- a/fatcat_scholar/worker.py +++ b/fatcat_scholar/worker.py @@ -115,7 +115,6 @@ def __init__(self, es_client: Any, es_index: str, **kwargs: Any): self.es_index = es_index def process_batch(self, batch: List[dict]) -> None: - bulk_actions = [] for obj in batch: bundle = IntermediateBundle.from_json(obj) diff --git a/tests/test_djvu_parse.py b/tests/test_djvu_parse.py index 621bf2e..38692d0 100644 --- a/tests/test_djvu_parse.py +++ b/tests/test_djvu_parse.py @@ -4,7 +4,6 @@ def test_djvu_extract_leaf_texts() -> None: - # https://archive.org/details/ERIC_ED441501 with open("tests/files/ERIC_ED441501_djvu.xml", "r") as f: blob = f.read() diff --git a/tests/test_issue_db.py b/tests/test_issue_db.py index 6aab879..648982a 100644 --- a/tests/test_issue_db.py +++ b/tests/test_issue_db.py @@ -6,7 +6,6 @@ def test_issue_db_basics() -> None: - api_conf = fatcat_openapi_client.Configuration() api_conf.host = settings.FATCAT_API_HOST api = fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(api_conf)) diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py index 4b1b588..9e80d97 100644 --- a/tests/test_refs_transform.py +++ b/tests/test_refs_transform.py @@ -7,7 +7,6 @@ def test_transform_refs_grobid() -> None: - with open("tests/files/example_grobid.tei.xml", "r") as f: blob = f.read() @@ -45,7 +44,6 @@ def test_transform_refs_grobid() -> None: def test_transform_refs_crossref() -> None: - with open("tests/files/example_crossref_record.json", "r") as f: record = json.loads(f.read()) diff --git a/tests/test_transform.py b/tests/test_transform.py index fd14533..17c0301 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -14,7 +14,6 @@ def test_es_release_from_release() -> None: - with open("tests/files/release_hsmo6p4smrganpb3fndaj2lon4.json", "r") as f: release = entity_from_json(f.read(), ReleaseEntity) @@ -27,7 +26,6 @@ def test_es_release_from_release() -> None: def test_es_biblio_from_release() -> None: - with open("tests/files/release_hsmo6p4smrganpb3fndaj2lon4.json", "r") as f: release = entity_from_json(f.read(), ReleaseEntity) @@ -43,13 +41,11 @@ def test_es_biblio_from_release() -> None: def test_run_refs() -> None: - with open("tests/files/work_iarm6swodra2bcrzhxrfaah7py_bundle.json", "r") as f: run_refs(f.readlines()) def test_run_transform() -> None: - with open("tests/files/work_iarm6swodra2bcrzhxrfaah7py_bundle.json", "r") as f: run_transform(f.readlines()) diff --git a/tests/test_web.py b/tests/test_web.py index 33d80ed..375707c 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -81,7 +81,6 @@ def test_basic_routes(client: Any) -> None: def test_basic_search(client: Any, mocker: Any) -> None: - rv = client.get("/search") assert rv.status_code == 200 @@ -105,7 +104,6 @@ def test_basic_search(client: Any, mocker: Any) -> None: def test_basic_rss_feed(client: Any, mocker: Any) -> None: - with open("tests/files/elastic_fulltext_search.json") as f: elastic_resp = json.loads(f.read()) @@ -127,7 +125,6 @@ def test_basic_rss_feed(client: Any, mocker: Any) -> None: def test_basic_work_landing_page(client: Any, mocker: Any) -> None: - with open("tests/files/elastic_fulltext_get.json") as f: elastic_resp = json.loads(f.read()) @@ -149,7 +146,6 @@ def test_basic_work_landing_page(client: Any, mocker: Any) -> None: def test_basic_access_redirect(client: Any, mocker: Any) -> None: - with open("tests/files/elastic_fulltext_get.json") as f: elastic_resp = json.loads(f.read()) @@ -184,7 +180,6 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None: def test_access_redirect_fallback(client: Any, mocker: Any) -> None: - with open("tests/files/elastic_fulltext_get.json") as f: elastic_resp = json.loads(f.read()) @@ -280,9 +275,9 @@ def test_access_redirect_fallback(client: Any, mocker: Any) -> None: assert b"Access Location Not Found" in rv.content assert b"archive.org/download/some/thing.else.pdf" in rv.content + @pytest.mark.skip(reason="todo: requires a mocked fatcat API client, not just es") def test_access_redirect_encoding(client: Any, mocker: Any) -> None: - with open("tests/files/elastic_get_work_a6gvpil4brdgzhqyaog3ftngqe.json") as f: elastic_ia_resp = json.loads(f.read()) with open("tests/files/elastic_get_work_ao5l3ykgbvg2vfpqe2y5qold5y.json") as f: diff --git a/tests/test_work_pipeline.py b/tests/test_work_pipeline.py index bc8a79a..de7a448 100644 --- a/tests/test_work_pipeline.py +++ b/tests/test_work_pipeline.py @@ -13,7 +13,6 @@ @responses.activate def test_run_transform(mocker: Any) -> None: - issue_db = IssueDB(settings.SCHOLAR_ISSUEDB_PATH) issue_db.init_db()