Skip to content

Commit

Permalink
Use church mapping tree in make_KMB_info (#6)
Browse files Browse the repository at this point in the history
Implement matching of specific cats for churches in Sweden.

Task: https://phabricator.wikimedia.org/T176935
  • Loading branch information
Vesihiisi committed Sep 28, 2017
1 parent 0d1d77b commit 96942c3
Show file tree
Hide file tree
Showing 3 changed files with 3,567 additions and 3,482 deletions.
2 changes: 1 addition & 1 deletion importer/load_church_cats.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def add_if_likely_church(church_cat, church_dict):
name = church_cat.title(withNamespace=False)
name = name.partition(',')[0]
if any(name.lower().endswith(end) for end in endings):
church_dict[name] = church_cat.title()
church_dict[name] = church_cat.title(withNamespace=False)


def has_subcats(cat):
Expand Down
32 changes: 27 additions & 5 deletions importer/make_KMB_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def load_mappings(self, update_mappings):
photographer_file = os.path.join(MAPPINGS_DIR, 'photographers.json')
kmb_files_file = os.path.join(MAPPINGS_DIR, 'kmb_files.json')
commonscat_file = os.path.join(MAPPINGS_DIR, 'commonscat.json')
church_file = os.path.join(MAPPINGS_DIR, 'churches.json')
photographer_page = 'Institution:Riksantikvarieämbetet/KMB/creators'

if update_mappings:
Expand Down Expand Up @@ -135,6 +136,8 @@ def load_mappings(self, update_mappings):

self.mappings['countries'] = common.open_and_read_file(
countries_file, as_json=True)
self.mappings['churches'] = common.open_and_read_file(
church_file, as_json=True)
self.mappings['tags'] = common.open_and_read_file(
tags_file, as_json=True)
self.mappings['primary_classes'] = common.open_and_read_file(
Expand Down Expand Up @@ -584,6 +587,16 @@ def __init__(self, initial_data, kmb_info):
self.log = kmb_info.log
self.commons = pywikibot.Site('commons', 'commons')

def get_exact_match_church(self):
"""Try to find correct category for church in Sweden."""
if self.kommun:
muni_cat_name = self.kmb_info.mappings['kommun'][self.kommun]['commonscat']
churches_municip = self.kmb_info.mappings["churches"].get(muni_cat_name)
if churches_municip and self.namn in churches_municip:
exact_category_title = churches_municip[self.namn]
self.content_cats.add(exact_category_title)
return True

def get_exact_cat_from_name(self, cache):
"""
Try to find a category with the same name as item.
Expand All @@ -594,7 +607,15 @@ def get_exact_cat_from_name(self, cache):
categories are then removed.
"""
exact_match = False
exact_category_from_name = self.kmb_info.category_exists(self.namn, cache)

# churches are done separately
if ("Religionsutövning - kyrkor" in self.item_classes):
if self.get_exact_match_church():
return

# Not a church, more generalised guesswork
exact_category_from_name = self.kmb_info.category_exists(self.namn,
cache)
if exact_category_from_name:
exact_category_from_name = pywikibot.Page(self.commons, self.namn)
parent_cats = exact_category_from_name.categories()
Expand All @@ -605,11 +626,12 @@ def get_exact_cat_from_name(self, cache):
# if its parent(s) is in this item's cat,
# we can assume it's correct
self.content_cats.discard(cat_name)
if exact_match:
exact_category_title = exact_category_from_name.title(
withNamespace=False)
self.content_cats.add(exact_category_title)

if exact_match is True:
exact_category_title = exact_category_from_name.title(withNamespace=False)
self.content_cats.add(exact_category_title)
else:
if not exact_match:
self.meta_cats.add('needing categorisation (no exact match)')

def get_other_versions(self):
Expand Down

0 comments on commit 96942c3

Please sign in to comment.