Vesihiisi · Vesihiisi · Aug 10, 2017 · May 20, 2017 · May 20, 2017 · May 31, 2017
diff --git a/importer/AtDe.py b/importer/AtDe.py
@@ -0,0 +1,177 @@
+from Monument import Monument
+import importer_utils as utils
+
+
+MAPPING_DIR = "mappings"
+
+
+class AtDe(Monument):
+
+    def get_type_keyword(self):
+        """
+        Extract type of monument from name.
+
+        Does matching of name against list of monument
+        types from
+        https://www.wikidata.org/wiki/Wikidata:WikiProject_WLM/Mapping_tables/at_(de)/types
+        """
+        raw_name = self.name.lower()
+        types = self.data_files["types"]["mappings"]
+        keywords = list(types.keys())
+        keywords = [x.lower() for x in keywords]
+        return utils.get_longest_match(raw_name, keywords)
+
+    def set_type(self):
+        """
+        Set specific P31, if possible.
+
+        Default P31 is cultural property.
+        If extracting a type keyword succeeded,
+        use the more specific item as P31.
+        """
+        types = self.data_files["types"]["mappings"]
+        type_match = self.get_type_keyword()
+        if type_match:
+            if isinstance(type_match, list):
+                # multiple matches - report
+                self.add_to_report("type", self.name, "is")
+            else:
+                # single match - add
+                match_item = [types[x]["items"]
+                              for x in types if x.lower() == type_match][0][0]
+                self.substitute_statement("is", match_item)
+        else:
+            # no matches - report
+            self.add_to_report("type", self.name, "is")
+
+    def update_labels(self):
+        """
+        Set the labels.
+
+        The data does not contain any markup,
+        so we're adding them as is.
+        """
+        self.add_label("de", self.name)
+
+    def set_descriptions(self):
+        """
+        Set descriptions in several languages.
+
+        Base format: $type in $country
+        If specific type in German exists, substitute for $type.
+        If municipality name exists, insert in all languages
+        """
+        type_match = self.get_type_keyword()
+        municipality_name = self.get_municipality_name()
+        base_desc_german_english = "{} in {}"
+        base_desc_swedish = "{} i {}"
+        place_german = "Österreich"
+        place_english = "Austria"
+        place_swedish = "Österike"
+        type_german = "Denkmalgeschütztes Objekt"
+        type_english = "cultural property"
+        type_swedish = "kulturarv"
+
+        if isinstance(type_match, str):
+            type_german = type_match.title()
+        if municipality_name:
+            place_german = municipality_name
+            place_swedish = "{}, {}".format(municipality_name, place_swedish)
+            place_english = "{}, {}".format(municipality_name, place_english)
+        desc_german = base_desc_german_english.format(
+            type_german, place_german)
+        desc_english = base_desc_german_english.format(
+            type_english, place_english)
+        desc_swedish = base_desc_swedish.format(type_swedish, place_swedish)
+
+        self.add_description("de", desc_german)
+        self.add_description("en", desc_english)
+        self.add_description("sv", desc_swedish)
+
+    def set_street_address(self):
+        """
+        Set the street address.
+
+        Most of these look good. Patterns for invalid ones:
+        * 251, gegenüber
+          Filtering by "starts with digit" and "contains comma",
+          as we can't rely only on "starts with digit" because
+          10. Oktober-Straße 18 is legit.
+        * zwischen Entensteinstraße 36 u. 38
+          Filtering by "zwischen".
+        * 76
+          Filtering by containing only digits.
+        """
+        bad_address = None
+        if self.has_non_empty_attribute("adresse"):
+            if (utils.first_char_is_number(self.adresse) and
+                    "," in self.adresse):
+                bad_address = self.adresse
+            elif "zwischen" in self.adresse:
+                bad_address = self.adresse
+            elif self.adresse.isdigit():
+                bad_address = self.adresse
+            else:
+                address = self.adresse
+                self.add_statement(
+                    "located_street", address)
+        if bad_address:
+            self.add_to_report("adresse", self.adresse, "located_street")
+
+    def set_heritage_id(self):
+        """Set the heritage ID, P2951."""
+        self.add_statement("austria_heritage_id", self.objektid)
+
+    def set_monuments_all_id(self):
+        """Map which column in table to ID in monuments_all."""
+        self.monuments_all_id = self.objektid
+
+    def get_municipality_name(self):
+        """Get municipality name based on gemeindekennzahl."""
+        all_codes = self.data_files["municipalities"]
+        municip_code = str(self.gemeindekennzahl)
+        match = [x["itemLabel"]
+                 for x in all_codes if x["value"] == municip_code]
+        if len(match) == 1:
+            return match[0]
+
+    def set_adm_location(self):
+        """
+        Set the administrative location (municipality).
+
+        Using the gemeindekennzahl field, municipality ID,
+        mapped to municipality WD items in external file.
+        """
+        all_codes = self.data_files["municipalities"]
+        if self.has_non_empty_attribute("gemeindekennzahl"):
+            municip_code = str(self.gemeindekennzahl)
+            match = [x["item"]
+                     for x in all_codes if x["value"] == municip_code]
+            if len(match) == 0:
+                self.add_to_report("gemeindekennzahl",
+                                   municip_code, "located_adm")
+            else:
+                self.add_statement("located_adm", match[0])
+
+    def exists_with_monument_article(self, language):
+        return super().exists_with_monument_article("de", "artikel")
+
+    def __init__(self, db_row_dict, mapping, data_files, existing, repository):
+        Monument.__init__(self, db_row_dict, mapping,
+                          data_files, existing, repository)
+        self.set_monuments_all_id()
+        self.set_changed()
+        self.wlm_source = self.create_wlm_source(self.monuments_all_id)
+        self.update_labels()
+        self.set_descriptions()
+        self.set_is()
+        self.set_type()
+        self.set_country()
+        self.set_image("foto")
+        self.set_heritage()
+        self.set_heritage_id()
+        self.set_adm_location()
+        self.set_street_address()
+        self.set_coords(("lat", "lon"))
+        self.set_commonscat()
+        self.set_wd_item(self.find_matching_wikidata(mapping))
diff --git a/importer/importer.py b/importer/importer.py
@@ -1,5 +1,6 @@
 from CmFr import CmFr
 from CzCs import CzCs
+from AtDe import AtDe
 from DkBygningDa import DkBygningDa
 from DkFortidsDa import DkFortidsDa
 from EeEt import EeEt
@@ -85,9 +86,12 @@ def create_connection(arguments):
                    "monuments_pt_(pt)": {"class": PtPt, "data_files": {}},
                    "monuments_ro_(ro)": {"class": RoRo, "data_files": {}},
                    "monuments_xk_(sq)": {"class": XkSq, "data_files": {}},
-                   "monuments_ie_(en)": {"class": IeEn, "data_files": {"counties": "ireland_counties.json"}},
+                   "monuments_ie_(en)": {"class": IeEn, "data_files":{"counties": "ireland_counties.json"}},
                    "monuments_za_(en)": {"class": ZaEn, "data_files": {}},
                    "monuments_cm_(fr)": {"class": CmFr, "data_files": {}},
+                   "monuments_at_(de)": {"class": AtDe, "data_files": {"municipalities":
+                                                                       "austria_municipalities.json",
+                                                                       "types": "at_(de)_types.json"}},
                    "monuments_dk-bygninger_(da)": {"class": DkBygningDa,
                                                    "data_files": {},
                                                    "subclass_downloads": {"settlement": "Q486972"}},
@@ -102,7 +106,7 @@ def create_connection(arguments):
                                          "data_files": {}},
                    "monuments_se-bbr_(sv)": {"class": SeBbrSv,
                                              "data_files": {"functions": "se-bbr_(sv)_functions.json",
-                                             "settlements": "sweden_settlements.json"}},
+                                                            "settlements": "sweden_settlements.json"}},
                    "monuments_ee_(et)": {"class": EeEt,
                                          "data_files": {"counties": "estonia_counties.json"}},
                    "monuments_se-fornmin_(sv)":
@@ -256,7 +260,8 @@ def get_items(connection,
     problem_reports = []
     wikidata_site = utils.create_site_instance("wikidata", "wikidata")
     for row in database_rows:
-        monument = class_to_use(row, mapping, data_files, existing, wikidata_site)
+        monument = class_to_use(row, mapping, data_files,
+                                existing, wikidata_site)
         problem_report = monument.get_report()
         if table:
             raw_data = "<pre>" + str(row) + "</pre>\n"

diff --git a/importer/importer_utils.py b/importer/importer_utils.py
@@ -207,6 +207,44 @@ def parse_year(text):
     return year
 
 
+def get_longest_string(in_list):
+    """
+    Get the longest string(s) in a list.
+
+    :param in_list: list of strings
+    :return: single string if there's only one with the max length,
+             or a list of strings if there are several.
+    """
+    if len(in_list) == 0:
+        return None
+    max_length = max(len(x) for x in in_list)
+    matches = [x for x in in_list if len(x) == max_length]
+    if len(matches) == 1:
+        return matches[0]
+    else:
+        return matches
+
+
+def get_longest_match(word, keywords):
+    """
+    Given a list of keywords, get longest keyword that overlaps with input.
+
+    A naive attempt to match words in languages that use
+    compound nouns written together. Given a string and a list of
+    keywords, return the longest of these keywords that's
+    contained in the input string. That way, if the keyword list
+    contains both a simple word ("bro") and its compound ("järnvägsbro"),
+    we only get the more specific one:
+        * "götaälvsbron" -> "bro"
+        * "en stor järnvägsbro" -> "järnvägsbro"
+    """
+    matches = []
+    for k in keywords:
+        if k in word:
+            matches.append(k)
+    return get_longest_string(matches)
+
+
 def remove_characters(text, string_of_chars_to_remove):
     translator = str.maketrans(
         {key: None for key in string_of_chars_to_remove})
@@ -265,6 +303,11 @@ def last_char_is_vowel(text):
     return is_vowel(get_last_char(text))
 
 
+def first_char_is_number(text):
+    """Check if string starts with a number."""
+    return text[0].isdigit()
+
+
 def socken_to_q(socken, landskap):
     if last_char_is_vowel(socken) or get_last_char(socken) == "s":
         socken_name = socken + " socken"

diff --git a/importer/mappings/at_(de).json b/importer/mappings/at_(de).json
@@ -0,0 +1,21 @@
+{
+    "country": {
+        "item": "Q40",
+        "name": "Austria"
+    },
+    "country_code": "at",
+    "default_is": {
+        "item": "Q2065736",
+        "name": "cultural property"
+    },
+    "heritage": {
+        "item": "Q1188447",
+        "name": "Denkmalgeschutztes Objekt"
+    },
+    "language": "de",
+    "table_name": "at",
+    "unique": {
+        "comment": "Cultural heritage database in Austria ObjektID ",
+        "property": "P2951"
+    }
+}