Skip to content

Commit

Permalink
Merge pull request #278 from miguelgrc/move_derive_categories
Browse files Browse the repository at this point in the history
arxiv parser: remove duplicates from inspire categories
  • Loading branch information
ammirate committed Sep 27, 2019
2 parents ffc2ea9 + 9752620 commit 203afd9
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 45 deletions.
3 changes: 2 additions & 1 deletion hepcrawl/parsers/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from inspire_schemas.utils import classify_field
from inspire_utils.date import PartialDate
from inspire_utils.helpers import maybe_int, remove_tags
from inspire_utils.dedupers import dedupe_list

from ..mappings import CONFERENCE_WORDS, THESIS_WORDS
from ..utils import (
Expand Down Expand Up @@ -92,7 +93,7 @@ def parse(self):
self.builder.add_document_type(self.document_type)
normalized_categories = [classify_field(arxiv_cat)
for arxiv_cat in self.arxiv_categories]
self.builder.add_inspire_categories(normalized_categories, 'arxiv')
self.builder.add_inspire_categories(dedupe_list(normalized_categories), 'arxiv')

return self.builder.record

Expand Down
8 changes: 0 additions & 8 deletions tests/functional/arxiv/fixtures/arxiv_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,6 @@
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Other"
Expand Down Expand Up @@ -417,10 +413,6 @@
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "General Physics"
Expand Down
16 changes: 0 additions & 16 deletions tests/functional/arxiv/fixtures/arxiv_expected_single.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,6 @@
{
"source": "arxiv",
"term": "Theory-HEP"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Math and Math Physics"
}
],
"abstracts": [
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/responses/arxiv/sample_arxiv_record10.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
</header>
<metadata>
<arXiv xmlns="http://arxiv.org/OAI/arXiv/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://arxiv.org/OAI/arXiv/ http://arxiv.org/OAI/arXiv.xsd">
<id>1606.04259</id><created>2016-06-14</created><authors><author><keyname>Battista</keyname><forenames>Emmanuele</forenames></author></authors><title>Extreme Regimes in Quantum Gravity</title><categories>hep-th</categories><comments>Phd thesis in Fundamental and Applied Physics presented at University
<id>1606.04259</id><created>2016-06-14</created><authors><author><keyname>Battista</keyname><forenames>Emmanuele</forenames></author></authors><title>Extreme Regimes in Quantum Gravity</title><categories>math.AP hep-th math-ph math.DG math.MP nlin.SI</categories><comments>Phd thesis in Fundamental and Applied Physics presented at University
&quot;Federico II&quot; (Naples, Italy) on 29th April 2016</comments><license>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</license><abstract> The thesis is divided into two parts. In the first part the low-energy limit
of quantum gravity is analysed, whereas in the second we deal with the
high-energy domain. In the first part, by applying the effective field theory
Expand Down
47 changes: 28 additions & 19 deletions tests/unit/responses/arxiv/sample_arxiv_record10_parsed.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"errors": [],
"results_uri": "scrapy_feed_uri",
"log_file": "scrapy_log_file",
"errors": [],
"results_uri": "scrapy_feed_uri",
"log_file": "scrapy_log_file",
"results_data": [
{
"_collections": ["Literature"],
"preprint_date": "2016-06-14",
"preprint_date": "2016-06-14",
"curated": false,
"citeable": true,
"license": [
Expand All @@ -14,54 +14,63 @@
"material": "preprint",
"license": "arXiv nonexclusive-distrib 1.0"
}
],
],
"public_notes": [
{
"source": "arXiv",
"source": "arXiv",
"value": "Phd thesis in Fundamental and Applied Physics presented at University\n \"Federico II\" (Naples, Italy) on 29th April 2016"
}
],
],
"authors": [
{
"full_name": "Battista, Emmanuele"
}
],
],
"titles": [
{
"source": "arXiv",
"source": "arXiv",
"title": "Extreme Regimes in Quantum Gravity"
}
],
],
"arxiv_eprints": [
{
"value": "1606.04259",
"categories": [
"hep-th"
],
"value": "1606.04259"
"math.AP",
"hep-th",
"math-ph",
"math.DG",
"math.MP",
"nlin.SI"
]
}
],
"inspire_categories": [
{
"source": "arxiv",
"term": "Math and Math Physics"
},
{
"source": "arxiv",
"term": "Theory-HEP"
}
],
"document_type": [
"thesis"
],
],
"abstracts": [
{
"source": "arXiv",
"source": "arXiv",
"value": "The thesis is divided into two parts. In the first part the low-energy limit of quantum gravity is analysed, whereas in the second we deal with the high-energy domain. In the first part, by applying the effective field theory point of view to the quantization of general relativity, detectable, though tiny, quantum effects in the position of Newtonian Lagrangian points of the Earth-Moon system are found. In order to make more realistic the quantum corrected model proposed, the full three-body problem where the Earth and the Moon interact with a generic massive body and the restricted four-body problem involving the perturbative effects produced by the gravitational presence of the Sun in the Earth-Moon system are also studied. After that, a new quantum theory having general relativity as its classical counterpart is analysed. By exploiting this framework, an innovative interesting prediction involving the position of Lagrangian points within the context of general relativity is described. Furthermore, the new pattern provides quantum corrections to the relativistic coordinates of Earth-Moon libration points of the order of few millimetres. The second part of the thesis deals with the Riemannian curvature characterizing the boosted form assumed by the Schwarzschild-de Sitter metric. The analysis of the Kretschmann invariant and the geodesic equation shows that the spacetime possesses a \"scalar curvature singularity\" within a 3-sphere and that it is possible to define what we here call \"boosted horizon\", a sort of elastic wall where all particles are surprisingly pushed away, suggesting that such \"boosted geometries\" are ruled by a sort of \"antigravity effect\". Eventually, the equivalence with the coordinate shift method is invoked in order to demonstrate that all $\\delta^2$ terms appearing in the Riemann curvature tensor give vanishing contribution in distributional sense."
}
],
],
"acquisition_source": {
"datetime": "2016-06-14T00:00:00",
"source": "arXiv",
"method": "hepcrawl",
"source": "arXiv",
"method": "hepcrawl",
"submission_number": "scrapy_job"
}
}
],
],
"job_id": "scrapy_job"
}

0 comments on commit 203afd9

Please sign in to comment.