Skip to content

Commit

Permalink
Spain (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
Vesihiisi committed Oct 31, 2017
1 parent 796ca10 commit cfc1e8b
Show file tree
Hide file tree
Showing 9 changed files with 32,985 additions and 4 deletions.
157 changes: 157 additions & 0 deletions importer/EsEs.py
@@ -0,0 +1,157 @@
from Monument import Monument, Dataset
import importer_utils as utils
import importer as importer
import dateparser


class EsEs(Monument):

def set_heritage_with_date(self):
"""
Set heritage status (bien interes cultural).
Optionally, with start date qualifier.
"""
heritage = self.mapping["heritage"]["item"]
if self.has_non_empty_attribute("fecha"):
# 20 de febrero de 1985
es_date = dateparser.parse(self.fecha, languages=['es'])
try:
date_dict = utils.datetime_object_to_dict(es_date)
except ValueError:
self.add_to_report("fecha", self.fecha, "start_time")
self.add_statement("heritage_status", heritage)
return
qualifier = {"start_time": {"time_value": date_dict}}
self.add_statement("heritage_status", heritage, qualifier)
else:
self.add_statement("heritage_status", heritage)

def set_adm_location(self):
adm_q = None
municip_dic = self.data_files["municipalities"]
province_dic = self.data_files["provinces"]
if utils.count_wikilinks(self.municipio) == 1:
adm_q = utils.q_from_first_wikilink("es", self.municipio)
else:
# sometimes they're in ''
municip_raw = utils.remove_markup(self.municipio)
municip_match = utils.get_item_from_dict_by_key(
dict_name=municip_dic,
search_term=municip_raw,
search_in="itemLabel")

if len(municip_match) == 1:
adm_q = municip_match[0]
else:
self.add_to_report("municipio",
self.municipio,
"located_adm")
if not adm_q:
province_raw = self.provincia_iso.lower()
prov_match = utils.get_item_from_dict_by_key(
dict_name=province_dic,
search_term=province_raw,
search_in="itemLabel")
if len(prov_match) == 1:
adm_q = prov_match[0]
else:
self.add_to_report("provincia_iso",
self.provincia_iso,
"located_adm")

if adm_q:
self.add_statement("located_adm", adm_q)

def set_special_is(self):
is_dic = self.data_files["tipobic"]["mappings"]
if self.has_non_empty_attribute("tipobic"):
s_raw = self.tipobic.lower()
s_match = utils.get_matching_items_from_dict(value=s_raw,
dict_name=is_dic)
if len(s_match) == 1:
self.remove_statement("is")
self.add_statement("is", s_match[0])
else:
self.add_to_report("tipobic", self.tipobic, "is")

def update_labels(self):
spanish = utils.remove_markup(self.nombre)
self.add_label("es", spanish)

def update_descriptions(self):
place = "Spain"
if self.has_non_empty_attribute("municipio"):
place = "{}, {}".format(utils.remove_markup(self.municipio), place)
elif self.has_non_empty_attribute("lugar"):
place = "{}, {}".format(utils.remove_markup(self.lugar), place)
desc_dic = {"es": "Bien de Interés Cultural",
"en": "cultural property in {}".format(place)}
for lg in desc_dic:
self.add_description(lg, desc_dic[lg])

def set_location(self):
"""
Add location statement (P276).
Extract possible location from wikilinked value.
Run after set_adm_location and compared with
its result, to avoid using the same statement
for location and adm location.
"""
if self.has_non_empty_attribute("lugar"):
if utils.count_wikilinks(self.lugar) == 1:
adm_loc = self.get_statement_values("located_adm")
loc_q = utils.q_from_first_wikilink("es", self.lugar)
if adm_loc and loc_q != adm_loc[0]:
self.add_statement("location", loc_q)
else:
self.add_to_report("lugar", self.lugar, "location")

def set_heritage_id(self):
"""
Set Bien Interes Cultural as heritage_id.
Also use it as a distinguisher in case of duplicate
label/description pairs.
"""
self.add_disambiguator(str(self.bic), 'es')
self.add_statement("bien_de_interes", str(self.bic))

def set_monuments_all_id(self):
"""Map which column name in specific table to ID in monuments_all."""
self.monuments_all_id = self.bic

def __init__(self, db_row_dict, mapping, data_files, existing, repository):
Monument.__init__(self, db_row_dict, mapping,
data_files, existing, repository)
if self.bic in ["", "0", "-", "s/n"]:
self.upload = False
return
self.set_monuments_all_id()
self.set_changed()
self.wlm_source = self.create_wlm_source(self.monuments_all_id)
self.set_heritage_with_date()
self.set_heritage_id()
self.set_country()
self.set_adm_location()
self.set_location()
self.set_is()
self.set_special_is()
self.set_coords(("lat", "lon"))
self.set_commonscat()
self.set_image("imagen")
self.update_labels()
self.update_descriptions()
self.set_wd_item(self.find_matching_wikidata(mapping))


if __name__ == "__main__":
"""Command line entry point for importer."""
args = importer.handle_args()
dataset = Dataset("es", "es", EsEs)
dataset.data_files = {
"municipalities": "spain_municipalities.json",
"provinces": "spain_provinces.json"}
dataset.lookup_downloads = {"tipobic": "es (es)/tipobic"}
importer.main(args, dataset)
14 changes: 14 additions & 0 deletions importer/Monument.py
Expand Up @@ -213,6 +213,20 @@ def substitute_statement(self, prop_name, value, quals=None, refs=None):
self.remove_statement(prop_name)
self.add_statement(prop_name, value, quals, refs)

def get_statement_values(self, prop_name):
"""
Retrieve list of all statements with given property from data object.
e.g. get_statement_values("country") → ['Q29']
:param prop_name: name of the property,
as stated in the props library file
"""
base = self.wd_item["statements"]
prop = self.props[prop_name]
if prop in base:
return [x['value'] for x in base[prop]]

def set_wd_item(self, wd_item):
"""Associate the data object with a Wikidata item."""
if wd_item is not None:
Expand Down
2 changes: 1 addition & 1 deletion importer/importer_utils.py
Expand Up @@ -96,7 +96,7 @@ def remove_markup(text):
remove_br = re.compile('<br.*?>\W*', re.I)
text = remove_br.sub(' ', text)
text = " ".join(text.split())
if "[" in text:
if "[" in text or "''" in text:
text = wparser.parse(text)
text = text.strip_code()
return remove_multiple_spaces(text.strip())
Expand Down
2 changes: 1 addition & 1 deletion importer/mappings/P31_blacklist.json
Expand Up @@ -59,4 +59,4 @@
"item": "Q515",
"comment": "city"
}
]
]
21 changes: 21 additions & 0 deletions importer/mappings/es_(es).json
@@ -0,0 +1,21 @@
{
"country": {
"item": "Q29",
"name": "Spain"
},
"country_code": "es",
"default_is": {
"item": "Q2065736",
"name": "cultural property"
},
"heritage": {
"item": "Q23712",
"name": "Bien de Interés Cultural"
},
"language": "es",
"table_name": "es",
"unique": {
"comment": "code Bien de Interés Cultural",
"property": "P808"
}
}
5 changes: 3 additions & 2 deletions importer/mappings/props_general.json
Expand Up @@ -45,5 +45,6 @@
"architectural_style": "P149",
"heritage_brussels": "P3600",
"heritage_georgia": "P4166",
"heritage_iran": "P1369"
}
"heritage_iran": "P1369",
"bien_de_interes": "P808"
}

0 comments on commit cfc1e8b

Please sign in to comment.