Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding diff dataset command #109

Merged
merged 9 commits into from Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
88 changes: 88 additions & 0 deletions README.md
Expand Up @@ -282,6 +282,94 @@ update_resource.update_resource(ckan_host=CKAN_HOST,
resource_id=resource_id)
```

### Check diff before updating a dataset

#### CLI

Running the `diff` command

```bash
dpckan dataset diff --datapackage some-path/datapackage.json
Differences detected:
- On field title
- CKAN value A vowel letters dataset for tests CHANGED
- DataPackage value A vowel letters dataset for tests
Equal fields: version, url, license_id, owner_org, tags, notes

```

#### Via Python code

Using the python `diff_dataset` function

```python
import os
from dpckan.diff_dataset import diff_dataset

CKAN_HOST = os.environ.get('CKAN_HOST')
CKAN_KEY = os.environ.get('CKAN_KEY')
datapackage_path = 'local/path/para/datapackage.json'

# A chamada de funções via código Python exige passagem de todos os argumentos
diffs, oks = diff_dataset(
ckan_host=CKAN_HOST,
ckan_key=CKAN_KEY,
datapackage=datapackage_path
)

diffs
[{'field_name': 'title', 'ckan_value': 'A vowel letters dataset for tests CHANGED', 'datapackage_value': 'A vowel letters dataset for tests'}]

oks
['version', 'url', 'license_id', 'owner_org', 'tags', 'notes']

```


### Check diff for resources

#### CLI

Running the `diff` command for a resource

```bash
dpckan resource diff --datapackage some-path/datapackage.json --resource-name="This is the actual data"
Differences detected:
- On field format
- CKAN value: CSV
- DataPackage value: csv
Equal fields: description
```

#### Via Python code

Using the python `diff_dataset` function

```python
import os
from dpckan.diff_resource import diff_resource

CKAN_HOST = os.environ.get('CKAN_HOST')
CKAN_KEY = os.environ.get('CKAN_KEY')
datapackage_path = 'local/path/para/datapackage.json'
datapackage_path = 'dpckan/tests/data-samples/datapackage-example/datapackage.json'
resource_name = 'This is the actual data'

# A chamada de funções via código Python exige passagem de todos os argumentos
diffs, oks = diff_resource(
ckan_host=CKAN_HOST,
ckan_key=CKAN_KEY,
datapackage=datapackage_path,
resource_name=resource_name
)

diffs
[{'field_name': 'format', 'ckan_value': 'CSV', 'datapackage_value': 'csv'}]

oks
['description']
```

## Desenvolvimento

### Contribuir para o projeto
Expand Down
5 changes: 5 additions & 0 deletions dpckan/cli.py
Expand Up @@ -3,6 +3,9 @@
from dpckan.update_dataset import update_cli
from dpckan.create_resource import create_resource_cli
from dpckan.update_resource import update_resource_cli
from dpckan.diff_dataset import diff_dataset_cli
from dpckan.diff_resource import diff_resource_cli


@click.group(context_settings=dict(help_option_names=["-h", "--help"]))
def cli():
Expand All @@ -20,6 +23,7 @@ def dataset():

dataset.add_command(create_cli)
dataset.add_command(update_cli)
dataset.add_command(diff_dataset_cli)

@cli.group()
def resource():
Expand All @@ -30,3 +34,4 @@ def resource():

resource.add_command(create_resource_cli, 'create')
resource.add_command(update_resource_cli, 'update')
resource.add_command(diff_resource_cli, 'diff')
98 changes: 98 additions & 0 deletions dpckan/diff_dataset.py
@@ -0,0 +1,98 @@
import sys
import click
from ckanapi import RemoteCKAN
from dpckan.validations import run_validations
from dpckan.functions import (
load_complete_datapackage,
is_dataset_published,
dataset_diff
)


def diff_dataset(ckan_host, ckan_key, datapackage):
"""
Detect changes between datapackage an the created dataset.

Parâmetros:

-------

ckan_host: string

host ou ambiente da instância CKAN para a qual se deseja publicar conjunto de dados.
Exemplo: https://demo.ckan.org/

ckan_key: string

Chave CKAN do usuário e ambiente para a qual se deseja publicar conjunto de dados.

datapackage: string

Caminho local para arquivo datapackage.json.

Retorna:

-------

A list of (non-expected) differences between the datapackage and the CKAN dataset

"""
package = load_complete_datapackage(datapackage)
run_validations(ckan_host, ckan_key, package)

ckan_instance = RemoteCKAN(ckan_host, apikey=ckan_key)
if not is_dataset_published(ckan_instance, package):
raise Exception('Conjunto de dados nao existente.')

ckan_instance = RemoteCKAN(ckan_host, apikey=ckan_key)
return dataset_diff(ckan_instance, package)


@click.command(name='diff')
@click.option('--ckan-host', '-H', envvar='CKAN_HOST', required=True,
help="Ckan host, exemplo: https://demo.ckan.org/") # -H para respeitar convenção de -h ser help
@click.option('--ckan-key', '-k', envvar='CKAN_KEY', required=True,
help="Ckan key autorizando o usuário a realizar publicações/atualizações em datasets")
@click.option('--datapackage', '-dp', required=True, default='datapackage.json')
def diff_dataset_cli(ckan_host, ckan_key, datapackage):
"""
Detect changes between datapackage an the created dataset.

Parâmetros:

----------

ckan_host: string (não obrigatório caso variável CKAN_HOST esteja cadastrada na máquina ou em arquivo .env)

host ou ambiente da instância CKAN para a qual se deseja publicar conjunto de dados.
Exemplo: https://demo.ckan.org/

ckan_key: string (não obrigatório caso variável CKAN_KEY esteja cadastrada na máquina ou em arquivo .env)

Chave CKAN do usuário e ambiente para a qual se deseja publicar conjunto de dados.

datapackage: string (não obrigatório caso comando seja executado no mesmo diretório do arquivo datapackage.json)

Caminho local para arquivo datapackage.json.

Retorna:

-------

A list of (non-expected) differences between the datapackage and the CKAN dataset
"""

diffs, oks = diff_dataset(ckan_host, ckan_key, datapackage)
if len(diffs) == 0:
click.echo("There are no differences")
else:
click.echo("Differences detected:")
for diff in diffs:
click.echo(f" - On field {diff['field_name']}")
click.echo(f" - CKAN value {diff['ckan_value']}")
click.echo(f" - DataPackage value {diff['datapackage_value']}")

if len(oks) == 0:
click.echo("No equal field found")
else:
click.echo("Equal fields: {}".format(', '.join(oks)))
106 changes: 106 additions & 0 deletions dpckan/diff_resource.py
@@ -0,0 +1,106 @@
import sys
import click
from ckanapi import RemoteCKAN
from dpckan.validations import run_validations
from dpckan.functions import (
load_complete_datapackage,
is_dataset_published,
resource_diff
)


def diff_resource(ckan_host, ckan_key, datapackage, resource_name):
"""
Detect changes between datapackage an the created dataset.

Parâmetros:

-------

ckan_host: string

host ou ambiente da instância CKAN para a qual se deseja publicar conjunto de dados.
Exemplo: https://demo.ckan.org/

ckan_key: string

Chave CKAN do usuário e ambiente para a qual se deseja publicar conjunto de dados.

datapackage: string

Caminho local para arquivo datapackage.json.

Retorna:

-------

A list of (non-expected) differences between the datapackage and the CKAN dataset

"""
package = load_complete_datapackage(datapackage)
run_validations(ckan_host, ckan_key, package)

ckan_instance = RemoteCKAN(ckan_host, apikey=ckan_key)
if not is_dataset_published(ckan_instance, package):
raise Exception('Conjunto de dados nao existente.')

ckan_instance = RemoteCKAN(ckan_host, apikey=ckan_key)
return resource_diff(ckan_instance, package, resource_name)


@click.command()
@click.option('--ckan-host', '-H', envvar='CKAN_HOST', required=True,
help="Ckan host, exemplo: https://demo.ckan.org/") # -H para respeitar convenção de -h ser help
@click.option('--ckan-key', '-k', envvar='CKAN_KEY', required=True,
help="Ckan key autorizando o usuário a realizar publicações/atualizações em datasets")
@click.option('--datapackage', '-dp', required=True, default='datapackage.json')
@click.option('--resource-name', '-rn', required=True)
def diff_resource_cli(ckan_host, ckan_key, datapackage, resource_name):
"""
Detect changes between datapackage resource an a CKAN resource.

Parâmetros:

----------

ckan_host: string (não obrigatório caso variável CKAN_HOST esteja cadastrada na máquina ou em arquivo .env)

host ou ambiente da instância CKAN para a qual se deseja publicar conjunto de dados.
Exemplo: https://demo.ckan.org/

ckan_key: string (não obrigatório caso variável CKAN_KEY esteja cadastrada na máquina ou em arquivo .env)

Chave CKAN do usuário e ambiente para a qual se deseja publicar conjunto de dados.

datapackage: string (não obrigatório caso comando seja executado no mesmo diretório do arquivo datapackage.json)

Caminho local para arquivo datapackage.json.

resource_name: string

Nome do recurso, presente no arquivo datapackage.json, que será atualizado.

Retorna:

-------

A list of (non-expected) differences between the datapackage and the CKAN dataset
"""

diffs, oks = diff_resource(ckan_host, ckan_key, datapackage, resource_name)
if len(diffs) == 0:
click.echo("There are no differences")
else:
click.echo("Differences detected:")
for diff in diffs:
click.echo(f" - On field {diff['field_name']}")
if diff.get('ckan_value'):
click.echo(f" - CKAN value: {diff['ckan_value']}")
if diff.get('datapackage_value'):
click.echo(f" - DataPackage value: {diff['datapackage_value']}")
if diff.get('error'):
click.echo(f" - Error: {diff['error']}")
if len(oks) == 0:
click.echo("No equal field found")
else:
click.echo("Equal fields: {}".format(', '.join(oks)))