Skip to content

Commit

Permalink
Merge pull request #381 from Wikidata/bot-edit-summary
Browse files Browse the repository at this point in the history
add edit summaries in claims and references, bot task 1 and 3
  • Loading branch information
marfox committed Feb 12, 2020
2 parents 6dd61a2 + 0ab4359 commit 1aabe54
Showing 1 changed file with 105 additions and 25 deletions.
130 changes: 105 additions & 25 deletions soweego/ingester/wikidata_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,28 @@
)
RETRIEVED_REFERENCE.setTarget(TIMESTAMP)

###
# BEGIN: Edit summaries
###
# Approved task 1: identifiers addition
# https://www.wikidata.org/wiki/Wikidata:Requests_for_permissions/Bot/Soweego_bot
IDENTIFIERS_SUMMARY = '[[Wikidata:Requests_for_permissions/Bot/Soweego_bot|bot task 1]] with P887 reference, see [[Topic:V6cc1thgo09otfw5#flow-post-v7i05rpdja1b3wzk|discussion]]'

# Approved task 2: URL-based validation, criterion 2
# https://www.wikidata.org/wiki/Wikidata:Requests_for_permissions/Bot/Soweego_bot_2
URL_VALIDATION_SUMMARY = (
'[[Wikidata:Requests_for_permissions/Bot/Soweego_bot_2|bot task 2]]'
)

# Approved task 3: works by people
# https://www.wikidata.org/wiki/Wikidata:Requests_for_permissions/Bot/Soweego_bot_3
WORKS_SUMMARY = (
'[[Wikidata:Requests_for_permissions/Bot/Soweego_bot_3|bot task 3]]'
)
###
# END: Edit summaries
###

# We also support Twitter
SUPPORTED_TARGETS = target_database.supported_targets() ^ {TWITTER}

Expand Down Expand Up @@ -153,7 +175,8 @@ def identifiers_cli(catalog, entity, identifiers, sandbox):
claim (Richard Hell, Discogs artist ID, 266995)
reference (stated in, Discogs), (retrieved, today)
reference (based on heuristic, artificial intelligence),
(retrieved, today)
"""
if sandbox:
LOGGER.info('Running on the Wikidata sandbox item ...')
Expand Down Expand Up @@ -245,11 +268,18 @@ def works_cli(catalog, statements, sandbox):
person,
person_pid,
person_tid,
is_imdb,
is_imdb=is_imdb,
summary=WORKS_SUMMARY,
)
else:
_add_or_reference_works(
work, predicate, person, person_pid, person_tid, is_imdb
work,
predicate,
person,
person_pid,
person_tid,
is_imdb=is_imdb,
summary=WORKS_SUMMARY,
)


Expand All @@ -276,9 +306,16 @@ def add_identifiers(
vocabulary.SANDBOX_1,
qid,
)
_add_or_reference(vocabulary.SANDBOX_1, catalog_pid, tid)
_add_or_reference(
vocabulary.SANDBOX_1,
catalog_pid,
tid,
summary=IDENTIFIERS_SUMMARY,
)
else:
_add_or_reference(qid, catalog_pid, tid)
_add_or_reference(
qid, catalog_pid, tid, summary=IDENTIFIERS_SUMMARY
)


def add_people_statements(statements: Iterable, sandbox: bool) -> None:
Expand Down Expand Up @@ -331,10 +368,17 @@ def add_works_statements(
person_pid,
person_tid,
is_imdb=is_imdb,
summary=WORKS_SUMMARY,
)
else:
_add_or_reference_works(
work, predicate, person, person_pid, person_tid, is_imdb=is_imdb
work,
predicate,
person,
person_pid,
person_tid,
is_imdb=is_imdb,
summary=WORKS_SUMMARY,
)


Expand Down Expand Up @@ -383,6 +427,7 @@ def _add_or_reference_works(
person_pid: str,
person_tid: str,
is_imdb=False,
summary=None,
) -> None:
# Parse value into an item in case of QID
qid = search(QID_REGEX, person)
Expand All @@ -398,7 +443,12 @@ def _add_or_reference_works(
person = pywikibot.ItemPage(REPO, qid.group())

subject_item, claims = _essential_checks(
work, predicate, person, person_pid=person_pid, person_tid=person_tid
work,
predicate,
person,
person_pid=person_pid,
person_tid=person_tid,
summary=summary,
)
if None in (subject_item, claims):
return
Expand All @@ -413,6 +463,7 @@ def _add_or_reference_works(
person,
person_pid=person_pid,
person_tid=person_tid,
summary=summary,
):
return

Expand All @@ -423,11 +474,16 @@ def _add_or_reference_works(
person,
person_pid=person_pid,
person_tid=person_tid,
summary=summary,
)


def _add_or_reference(subject: str, predicate: str, value: str) -> None:
subject_item, claims = _essential_checks(subject, predicate, value)
def _add_or_reference(
subject: str, predicate: str, value: str, summary=None
) -> None:
subject_item, claims = _essential_checks(
subject, predicate, value, summary=summary
)

if None in (subject_item, claims):
return
Expand All @@ -437,7 +493,7 @@ def _add_or_reference(subject: str, predicate: str, value: str) -> None:
# If 'official website' property has the same value -> add reference
# See https://www.wikidata.org/wiki/User_talk:Jura1#Thanks_for_your_feedback_on_User:Soweego_bot_task_2
if _check_for_same_value(
claims, subject, vocabulary.OFFICIAL_WEBSITE, value
claims, subject, vocabulary.OFFICIAL_WEBSITE, value, summary=summary
):
return

Expand All @@ -454,6 +510,7 @@ def _add_or_reference(subject: str, predicate: str, value: str) -> None:
predicate,
value,
case_insensitive=case_insensitive,
summary=summary,
)


Expand All @@ -465,14 +522,22 @@ def _handle_addition(
case_insensitive=False,
person_pid=None,
person_tid=None,
summary=None,
):
given_predicate_claims = claims.get(predicate)
subject_qid = subject_item.getID()

# No claim with the given predicate -> add statement
if not given_predicate_claims:
LOGGER.debug('%s has no %s claim', subject_qid, predicate)
_add(subject_item, predicate, value, person_pid, person_tid)
_add(
subject_item,
predicate,
value,
person_pid,
person_tid,
summary=summary,
)
return

if case_insensitive:
Expand All @@ -493,7 +558,14 @@ def _handle_addition(
LOGGER.debug(
'%s has no %s claim with value %s', subject_qid, predicate, value
)
_add(subject_item, predicate, value, person_pid, person_tid)
_add(
subject_item,
predicate,
value,
person_pid,
person_tid,
summary=summary,
)
return

# Claim with the given predicate and value -> add reference
Expand All @@ -503,12 +575,12 @@ def _handle_addition(
if case_insensitive:
for claim in given_predicate_claims:
if claim.getTarget().lower() == value:
_reference(claim, person_pid, person_tid)
_reference(claim, person_pid, person_tid, summary=summary)
return

for claim in given_predicate_claims:
if claim.getTarget() == value:
_reference(claim, person_pid, person_tid)
_reference(claim, person_pid, person_tid, summary=summary)


def _handle_redirect_and_dead(qid):
Expand All @@ -527,7 +599,7 @@ def _handle_redirect_and_dead(qid):


def _essential_checks(
subject, predicate, value, person_pid=None, person_tid=None
subject, predicate, value, person_pid=None, person_tid=None, summary=None
):
item, data = _handle_redirect_and_dead(subject)

Expand All @@ -537,21 +609,27 @@ def _essential_checks(
# No data at all
if not data:
LOGGER.warning('%s has no data at all', subject)
_add(item, predicate, value, person_pid, person_tid)
_add(item, predicate, value, person_pid, person_tid, summary=summary)
return None, None

claims = data.get('claims')
# No claims
if not claims:
LOGGER.warning('%s has no claims', subject)
_add(item, predicate, value, person_pid, person_tid)
_add(item, predicate, value, person_pid, person_tid, summary=summary)
return None, None

return item, claims


def _check_for_same_value(
subject_claims, subject, predicate, value, person_pid=None, person_tid=None
subject_claims,
subject,
predicate,
value,
person_pid=None,
person_tid=None,
summary=None,
):
given_predicate_claims = subject_claims.get(predicate)
if given_predicate_claims:
Expand All @@ -563,7 +641,7 @@ def _check_for_same_value(
predicate,
value,
)
_reference(claim, person_pid, person_tid)
_reference(claim, person_pid, person_tid, summary=summary)
return True
return False

Expand Down Expand Up @@ -603,18 +681,18 @@ def _get_works_args(catalog):
return is_imdb, person_pid


def _add(subject_item, predicate, value, person_pid, person_tid):
def _add(subject_item, predicate, value, person_pid, person_tid, summary=None):
claim = pywikibot.Claim(REPO, predicate)
claim.setTarget(value)
subject_item.addClaim(claim)
subject_item.addClaim(claim, summary=summary)
LOGGER.debug('Added claim: %s', claim.toJSON())
_reference(claim, person_pid, person_tid)
_reference(claim, person_pid, person_tid, summary=summary)
LOGGER.info(
'Added (%s, %s, %s) statement', subject_item.getID(), predicate, value
)


def _reference(claim, person_pid, person_tid):
def _reference(claim, person_pid, person_tid, summary=None):
if None in (person_pid, person_tid):
reference_log = (
f'({BASED_ON_HEURISTIC_REFERENCE.getID()}, {vocabulary.ARTIFICIAL_INTELLIGENCE}), '
Expand All @@ -623,7 +701,8 @@ def _reference(claim, person_pid, person_tid):

try:
claim.addSources(
[BASED_ON_HEURISTIC_REFERENCE, RETRIEVED_REFERENCE]
[BASED_ON_HEURISTIC_REFERENCE, RETRIEVED_REFERENCE],
summary=summary,
)

LOGGER.info('Added %s reference node', reference_log)
Expand All @@ -647,7 +726,8 @@ def _reference(claim, person_pid, person_tid):
BASED_ON_HEURISTIC_REFERENCE,
tid_reference,
RETRIEVED_REFERENCE,
]
],
summary=summary,
)

LOGGER.info('Added %s reference node', reference_log)
Expand Down

0 comments on commit 1aabe54

Please sign in to comment.