diff --git a/bot/wikidata/WikidataBot.py b/bot/wikidata/WikidataBot.py index 66b75ac..96f3c32 100644 --- a/bot/wikidata/WikidataBot.py +++ b/bot/wikidata/WikidataBot.py @@ -1,4 +1,6 @@ # wikidata_bot.py +import re + import pywikibot from pywikibot.pagegenerators import WikidataSPARQLPageGenerator @@ -7,7 +9,22 @@ def __init__(self): self.site = pywikibot.Site().data_repository() def _clean_city_name(self, city_name): - return city_name.replace("[", "").replace("]", "").strip() + """ + Clean the city name from brackets or alternative names + It could be found in the wikitext in the following formats: + - [[City name]] -- remove the brackets + - [[City name|Alt name]] -- remove the brackets and the alternative name + - City Name -- keep as it is + :param city_name: City name to clean + :return: The cleaned city name + """ + # Remove brackets + city_name = city_name.replace("[[", "").replace("]]", "") + + # Remove alternative names + city_name = re.sub(r'\|.*', '', city_name) + + return city_name def get_wikidata_entity_by_wikipedia_article_name(self,article_name, alt, lang='it'): diff --git a/bot/wikivoyage/VoyBot.py b/bot/wikivoyage/VoyBot.py index d105599..9ae1e0a 100644 --- a/bot/wikivoyage/VoyBot.py +++ b/bot/wikivoyage/VoyBot.py @@ -43,6 +43,7 @@ def process_wikidata_in_citylist(self, templates): """ wd_bot = WikidataBot() for template in templates: + # Conditions is_target_template = (template.name == CITY_TEMPLATE_ITEM_NAME or template.name == DESTINATION_TEMPLATE_ITEM_NAME) @@ -69,7 +70,7 @@ def _process_wikidata(self, name, wikidata_id, template): self.write_log_line(f"{self.current_page} -- No wikidata item found for {name}") else: pywikibot.logging.stdout(f"\tFound wikidata item for {name}: {wikidata_id}") - template.add("wikidata", wikidata_id) + template.add("wikidata", wikidata_id, before="descrizione", preserve_spacing=True) def write_log_line(self, text, file="logs/citylist_log.log"): """