Skip to content

Commit

Permalink
Merge branch 'master' into 106-revisar-inconsistencias-en-federacion-…
Browse files Browse the repository at this point in the history
…de-datasets
  • Loading branch information
lrromero committed Mar 7, 2018
2 parents 66119e3 + 87e2805 commit eb793f9
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 58 deletions.
35 changes: 29 additions & 6 deletions pydatajson/core.py
Expand Up @@ -153,7 +153,7 @@ def _update_validation_response(error, response):
"path": list(error.path),
# La instancia validada es irrelevante si el error es de tipo 1
"instance": (None if error.validator == "required" else
error.instance)
error.instance)
}

# Identifico a qué nivel de jerarquía sucedió el error.
Expand Down Expand Up @@ -678,7 +678,7 @@ def generate_harvestable_catalogs(self, catalogs, harvest='all',
catalog["dataset"] = [
dataset for dataset in catalog["dataset"]
if (catalog_url, dataset.get("title")) in
datasets_to_harvest
datasets_to_harvest
]
else:
catalog["dataset"] = []
Expand Down Expand Up @@ -858,7 +858,7 @@ def generate_catalog_readme(self, catalog, export_path=None):
"federated_datasets": indicators["datasets_federados_cant"],
"not_federated_datasets": indicators["datasets_no_federados_cant"],
"not_federated_datasets_pct": (
100.0 - indicators["datasets_federados_pct"]),
100.0 - indicators["datasets_federados_pct"]),
"not_federated_datasets_list": not_federated_datasets_list,
"federated_removed_datasets_list": federated_removed_datasets_list,
"federated_datasets_list": federated_datasets_list,
Expand Down Expand Up @@ -1046,9 +1046,32 @@ def generate_dataset_documentation(self, dataset_identifier,
else:
return text

def make_catalogs_backup(self, catalogs=None, catalog_ids=None,
local_dir="catalog", with_data=False):
"""Realiza copia de los datos y metadatos de uno o más catálogos."""
def make_catalogs_backup(self, catalogs=None,
local_catalogs_dir=".",
copy_metadata=True, copy_data=True):
"""Realiza copia de los datos y metadatos de uno o más catálogos.
Args:
catalogs (list or dict): Lista de catálogos (elementos que pueden
ser interpretados por DataJson como catálogos) o diccionario
donde las keys se interpretan como los catalog_identifier:
{
"modernizacion": "http://infra.datos.gob.ar/catalog/modernizacion/data.json"
}
Cuando es una lista, los ids se toman de catalog_identifer, y
se ignoran los catálogos que no tengan catalog_identifier.
Cuando se pasa un diccionario, los keys reemplazan a los
catalog_identifier (estos no se leeen).
local_catalogs_dir (str): Directorio local en el cual se va a crear
la carpeta "catalog/..." con todos los catálogos.
copy_metadata (bool): Si es verdadero, se generan los archivos
data.json y catalog.xlsx.
copy_data (bool): Si es verdadero, se descargan todas las
distribuciones de todos los catálogos.
Return:
None
"""

# TODO: implementar función
pass
Expand Down
2 changes: 2 additions & 0 deletions pydatajson/writers.py
Expand Up @@ -416,6 +416,8 @@ def _generate_field_table(catalog):
field, "field", ["dataset", "distribution"])
tab_field["dataset_title"] = catalog.get_dataset(
tab_field["dataset_identifier"]).get("title")
tab_field["distribution_title"] = catalog.get_distribution(
tab_field["distribution_identifier"]).get("title")
fields.append(tab_field)

# agrega todas las keys nuevas que no estén trackeadas
Expand Down
94 changes: 60 additions & 34 deletions tests/test_ckan_integration.py
Expand Up @@ -12,7 +12,8 @@

class PushTestCase(unittest.TestCase):
CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
cassette_library_dir=os.path.join("tests", "cassetes", "ckan_integration", "push_dataset"),
cassette_library_dir=os.path.join(
"tests", "cassetes", "ckan_integration", "push_dataset"),
filter_headers=['Authorization', 'X-CKAN-API-Key'],
record_mode='once')

Expand All @@ -26,8 +27,10 @@ def get_sample(cls, sample_filename):
@CKAN_VCR.use_cassette()
def setUp(self):
self.portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
self.full_catalog = pydatajson.DataJson(self.get_sample('full_data.json'))
self.justice_catalog = pydatajson.DataJson(self.get_sample('catalogo_justicia.json'))
self.full_catalog = pydatajson.DataJson(
self.get_sample('full_data.json'))
self.justice_catalog = pydatajson.DataJson(
self.get_sample('catalogo_justicia.json'))

@CKAN_VCR.use_cassette()
def tearDown(self):
Expand All @@ -36,11 +39,13 @@ def tearDown(self):
justice_dataset = self.justice_catalog.datasets[0]
justice_name = title_to_name(justice_dataset['title'])
try:
self.portal.call_action('dataset_purge', data_dict={'id': full_name})
self.portal.call_action(
'dataset_purge', data_dict={'id': full_name})
except NotFound:
pass
try:
self.portal.call_action('dataset_purge', data_dict={'id': justice_name})
self.portal.call_action(
'dataset_purge', data_dict={'id': justice_name})
except NotFound:
pass

Expand All @@ -54,7 +59,7 @@ def test_dataset_is_created_correctly(self):
dataset_id = dataset['identifier']
return_id = push_dataset_to_ckan(catalog, catalog_id, "oficina-de-muestra", dataset_id,
self.portal_url, self.apikey)
self.assertEqual(return_id, catalog_id+'_'+dataset_id)
self.assertEqual(return_id, catalog_id + '_' + dataset_id)

@CKAN_VCR.use_cassette()
def test_dataset_is_updated_correctly(self):
Expand All @@ -68,9 +73,9 @@ def test_dataset_is_updated_correctly(self):
return_id = push_dataset_to_ckan(catalog, catalog_id, "oficina-de-muestra", dataset_id,
self.portal_url, self.apikey)

data_dict = {'id': catalog_id+'_'+dataset_id}
data_dict = {'id': catalog_id + '_' + dataset_id}
package = self.portal.call_action('package_show', data_dict=data_dict)
self.assertEqual(return_id, catalog_id+'_'+dataset_id)
self.assertEqual(return_id, catalog_id + '_' + dataset_id)
self.assertEqual('updated description', package['notes'])

@CKAN_VCR.use_cassette()
Expand Down Expand Up @@ -98,31 +103,42 @@ def test_resources_swapped_correctly(self):
justice_dataset['distribution'], full_dataset['distribution']

data_dict = {'id': full_package_id}
full_package = self.portal.call_action('package_show', data_dict=data_dict)
full_package = self.portal.call_action(
'package_show', data_dict=data_dict)
data_dict = {'id': justice_package_id}
justice_package = self.portal.call_action('package_show', data_dict=data_dict)
justice_package = self.portal.call_action(
'package_show', data_dict=data_dict)

self.assertEqual(len(full_package['resources']), len(justice_dataset['distribution']))
self.assertEqual(len(justice_package['resources']), len(full_dataset['distribution']))
self.assertEqual(len(full_package['resources']), len(
justice_dataset['distribution']))
self.assertEqual(len(justice_package['resources']), len(
full_dataset['distribution']))

for resource, justice_distribution in zip(full_package['resources'], justice_dataset['distribution']):
self.assertEqual('same-catalog-id_'+justice_distribution['identifier'], resource['id'])
self.assertEqual('same-catalog-id_' +
justice_distribution['identifier'], resource['id'])

for resource, full_distribution in zip(justice_package['resources'], full_dataset['distribution']):
self.assertEqual('same-catalog-id_'+full_distribution['identifier'], resource['id'])
self.assertEqual('same-catalog-id_' +
full_distribution['identifier'], resource['id'])


class RemoveTestCase(unittest.TestCase):
CKAN_VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
cassette_library_dir=os.path.join("tests", "cassetes", "ckan_integration", "remove_dataset"),
cassette_library_dir=os.path.join(
"tests", "cassetes", "ckan_integration", "remove_dataset"),
filter_headers=['Authorization', 'X-CKAN-API-Key'],
record_mode='once')

test_datasets = [{'id': '1.1', 'owner_org': 'org-1', 'author': 'author_a', 'name': 'data1_1'},
{'id': '2.1', 'owner_org': 'org-2', 'author': 'author_a', 'name': 'data2_1'},
{'id': '2.2', 'owner_org': 'org-2', 'author': 'author_b', 'name': 'data2_2'},
{'id': '3.1', 'owner_org': 'org-3', 'author': 'author_a', 'name': 'data3_1'},
{'id': '3.2', 'owner_org': 'org-3', 'author': 'author_b', 'name': 'data3_2'},
{'id': '2.1', 'owner_org': 'org-2',
'author': 'author_a', 'name': 'data2_1'},
{'id': '2.2', 'owner_org': 'org-2',
'author': 'author_b', 'name': 'data2_2'},
{'id': '3.1', 'owner_org': 'org-3',
'author': 'author_a', 'name': 'data3_1'},
{'id': '3.2', 'owner_org': 'org-3',
'author': 'author_b', 'name': 'data3_2'},
{'id': '3.3', 'owner_org': 'org-3', 'author': 'author_c', 'name': 'data3_3'}]

portal_url = 'http://localhost:8080'
Expand All @@ -133,7 +149,8 @@ def setUp(self):
self.ckan_portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
for dataset in self.test_datasets:
try:
self.ckan_portal.call_action('dataset_purge', data_dict={'id': dataset['id']})
self.ckan_portal.call_action(
'dataset_purge', data_dict={'id': dataset['id']})
except NotFound:
continue
for dataset in self.test_datasets:
Expand All @@ -143,42 +160,50 @@ def setUp(self):
def tearDown(self):
for dataset in self.test_datasets:
try:
self.ckan_portal.call_action('dataset_purge', data_dict={'id': dataset['id']})
self.ckan_portal.call_action(
'dataset_purge', data_dict={'id': dataset['id']})
except NotFound:
continue

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_id(self):
filter_in = {'dataset': {'identifier': '1.1'}}
remove_datasets_from_ckan(self.portal_url, self.apikey, filter_in=filter_in)
remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_in=filter_in)
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data1_1' not in package_list)

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_title(self):
filter_in = {'dataset': {'title': 'data3_3'}}
remove_datasets_from_ckan(self.portal_url, self.apikey, filter_in=filter_in)
remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_in=filter_in)
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data3_3' not in package_list)

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_organization(self):
remove_datasets_from_ckan(self.portal_url, self.apikey, organization='org-2')
remove_datasets_from_ckan(
self.portal_url, self.apikey, organization='org-2')
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data2_1' not in package_list)
self.assertTrue('data2_2' not in package_list)

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_publisher_and_organization(self):
filter_in = {'dataset': {'publisher': {'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(self.portal_url, self.apikey, filter_in=filter_in, organization='org-3')
filter_in = {'dataset': {'publisher': {
'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_in=filter_in, organization='org-3')
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data3_2' not in package_list)

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_filter_out(self):
filter_out = {'dataset': {'publisher': {'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(self.portal_url, self.apikey, filter_out=filter_out)
filter_out = {'dataset': {'publisher': {
'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_out=filter_out)
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data2_2' in package_list)
self.assertTrue('data3_2' in package_list)
Expand All @@ -187,8 +212,10 @@ def test_remove_dataset_by_filter_out(self):

@CKAN_VCR.use_cassette()
def test_remove_dataset_by_filter_out_and_organization(self):
filter_out = {'dataset': {'publisher': {'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(self.portal_url, self.apikey, filter_out=filter_out, organization='org-3')
filter_out = {'dataset': {'publisher': {
'name': 'author_b', 'mbox': None}}}
remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_out=filter_out, organization='org-3')
package_list = self.ckan_portal.call_action('package_list')
self.assertTrue('data3_1' not in package_list)
self.assertTrue('data3_3' not in package_list)
Expand All @@ -197,9 +224,8 @@ def test_remove_dataset_by_filter_out_and_organization(self):
def test_empty_query_result(self):
filter_in = {'dataset': {'identifier': '4.4'}}
package_list_pre = self.ckan_portal.call_action('package_list')

remove_datasets_from_ckan(self.portal_url, self.apikey, filter_in=filter_in, organization='org-4')

remove_datasets_from_ckan(
self.portal_url, self.apikey, filter_in=filter_in, organization='org-4')
package_list_post = self.ckan_portal.call_action('package_list')
self.assertEqual(len(package_list_pre), len(package_list_post))

Expand Down

0 comments on commit eb793f9

Please sign in to comment.