## Cabeçalho

In [224]:
%load_ext autoreload
%autoreload 2

from pprint import pprint
import sys
from ckanapi import RemoteCKAN
import ckanapi.errors
from ckanapi.errors import NotFound, ValidationError
import pandas as pd
import random
import numpy as np
import copy

from pathlib import Path
import json

from migration_functions import *

# from ckan_migration import Migrate

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Download packages

In [220]:
LOCAL_CKAN_URL = 'http://localhost:5000'
DEV_CKAN_URL = 'https://staging.basedosdados.org'
PROD_CKAN_URL = 'https://basedosdados.org'

local_packages = download_packages(LOCAL_CKAN_URL,'prod')
dev_packages = download_packages(DEV_CKAN_URL,'dev')
prod_packages = download_packages(PROD_CKAN_URL,'prod')

## Validation

In [None]:

packages = local_packages

#list_resources_with_no_dataset_id(packages)
#list_tables_with_no_columns(packages)
#list_external_links_with_missing_fields(packages)
list_resources_with_no_entity(packages) # necessary to make (bdm_table, external_link, information_request).observation_level
										# a required field
list_columns_with_missing_fields(packages)


## Migration

In [148]:
class Migrator:
    def __init__(self, ckan_remote: RemoteCKAN, package_dict):
        self.ckan_remote = ckan_remote
        self.package_dict = package_dict

    def create(self):
        try:
            self.ckan_remote.action.package_create(**self.package_dict)
        except NotFound as e:
            print(e)
    
    def update(self):
        try:
            self.ckan_remote.action.package_update(**self.package_dict)
        except NotFound as e:
            print(e)
            
    def purge(self):
        try:
            self.ckan_remote.action.dataset_purge(id=self.package_dict['name'])
        except NotFound as e:
            print(e)
   
    def delete(self):
        try:
            self.ckan_remote.action.package_delete(id=self.package_dict['name'])
        except NotFound as e:
            print(e)
	
    def validate(self):
        try:
            self.ckan_remote.action.bd_dataset_validate(**self.package_dict)
        except NotFound as e:
            print(e)  

In [225]:

packages = copy.deepcopy(prod_packages)
updated_packages = []

for i, package in enumerate(packages):

	print(i, '-', package['name'])

	updated_package = packages[i]

	updated_package = delete_package_dataset_id(updated_package)
	updated_package = replace_missing_dataset_ids(updated_package)
	updated_package = create_missing_entity_fields(updated_package)
	updated_package = create_format_field(updated_package)
	updated_package = create_short_description_field(updated_package)
	updated_package = create_partner_organization_field(updated_package)
	
	updated_package = delete_package_field(updated_package, 'spatial_coverage')
	updated_package = delete_package_field(updated_package, 'temporal_coverage')
	updated_package = delete_package_field(updated_package, 'update_frequency')
	updated_package = delete_package_field(updated_package, 'entity')
	updated_package = delete_package_field(updated_package, 'time_unit')
	updated_package = delete_package_field(updated_package, 'download_type')
	
	#updated_package = migrate_notes_field(updated_package)
	  # facing problem in https://github.com/basedosdados/website/issues/227
	  # problem: custom package fields live in 
	  # will need to leave CKAN
	
	updated_package = standardize_old_spatial_coverage(updated_package)
	updated_package = migrate_spatial_coverage(updated_package)
	updated_package = migrate_country_ip_address_required(updated_package)
	updated_package = migrate_temporal_coverage(updated_package)
	updated_package = migrate_observation_level(updated_package)
	updated_package = migrate_partitions_field(updated_package)
	updated_package = migrate_measurement_unit_field(updated_package)
	
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'entity')
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'identifying_columns')
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'covered_by_dictionary')
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'time_unit')
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'formato')
	updated_package = delete_resource_field(updated_package, 'bdm_table', 'bdm_file_size')

	updated_package = migrate_data_cleaning_code_url(updated_package)

	updated_package = migrate_external_link_title_to_name(updated_package)

	updated_package = delete_resource_field(updated_package, 'external_link', 'entity')
	updated_package = delete_resource_field(updated_package, 'external_link', 'identifying_columns')
	updated_package = delete_resource_field(updated_package, 'external_link', 'time_unit')
	updated_package = delete_resource_field(updated_package, 'external_link', 'formato')
	updated_package = delete_resource_field(updated_package, 'external_link', 'title')
	
	updated_package = migrate_time_unit(updated_package)

	updated_package = replace_missing_column_fields(updated_package)

	updated_packages.append(updated_package)
	

	

0 - br-senado-cpipandemia
1 - br-sp-gov-orcamento
2 - br-me-caged
3 - br-cvm-administradores-carteira
4 - br-cvm-oferta-publica-distribuicao
5 - br-ana-atlas-esgotos
6 - br-bd-diretorios-brasil
7 - centennia-historical-atlas
8 - br-anatel-banda-larga-fixa
9 - br-me-siconfi
10 - br-ba-feiradesantana-camara-leis
11 - br-ms-cnes
12 - br-ana-reservatorios
13 - br-ibge-ipca15
14 - br-ibge-inpc
15 - br-ibge-populacao
16 - br-anatel-telefonia-movel
17 - br-abrinq-oca
18 - br-ibge-ipca
19 - br-tse-eleicoes
20 - br-ms-sinan
21 - br-ms-vacinacao-covid19
22 - br-ons-energia-armazenada
23 - br-ibge-pib
24 - mundo-onu-adh
25 - br-sp-alesp
26 - br-geobr-mapas
27 - br-ibge-pnad
28 - br-poder360-pesquisas
29 - justica-aberta
30 - br-ibge-pnadc
31 - br-bd-indicadores
32 - br-me-rais
33 - br-bd-diretorios-mundo
34 - mundo-bm-learning-poverty
35 - br-ms-imunizacoes
36 - br-mme-consumo-energia-eletrica
37 - br-inep-indicador-nivel-socioeconomico
38 - br-inep-formacao-docente
39 - br-ms-sinasc
40 - lincham

In [238]:

ckan_remote_local = RemoteCKAN(
    "http://localhost:5000",
    "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqdGkiOiJsZHdhem90YnBoN09YU2JRUFR2UmU1ZGNvbmlDVFlkTlF0OWRqcVhROHAyTmpQTlhhbzFTQlBtQ2tlTzNldjVMVFdRWUw4QXZXYVRkNU5YRyIsImlhdCI6MTY0MzMwNDE3OH0.5XphVpnwHT_eAHm8yjrueTDny4vXuwx-eL0X88IB_sw"
)
ckan_remote_dev = RemoteCKAN(
    "https://staging.basedosdados.org",
    ""
)

ckan_remote = ckan_remote_dev

local_names = [package['name'] for package in local_packages]
dev_names = [package['name'] for package in dev_packages]
prod_names = [package['name'] for package in prod_packages]




## Purge packages

In [239]:

for i, package in enumerate(dev_packages):
	
	print(i, ' - ', package['name'])

	migrator = Migrator(ckan_remote, package)
	migrator.purge()


0  -  br-ms-cnes
1  -  br-camara-atividade-legislativa
2  -  br-camara-dados-abertos
3  -  br-me-cno
4  -  br-cgu-pessoal-executivo-federal
5  -  world-competitiveness-ranking
6  -  young-lives
7  -  worldwide-mobile-data-pricing
8  -  worldwide-bureaucracy-indicators-wwbi
9  -  world-war-i-document-archive
10  -  world-values-survey
11  -  world-urbanization-prospects-wup
12  -  world-uncertainty-index-wui
13  -  world-stadiums
14  -  world-sea-temperatures
15  -  world-press-freedom-index
16  -  world-poverty-clock
17  -  world-population-prospects-wpp
18  -  worldpop
19  -  worldometer
20  -  world-management-survey-wms
21  -  world-input-output-database-wiod
22  -  world-inequality-database
23  -  world-higher-education-database-whed
24  -  world-happiness-report
25  -  world-governance-indicators
26  -  world-giving-index-wgi
27  -  world-fertility-surveys-wfs
28  -  world-digital-library-wdl
29  -  world-development-indicators
30  -  world-database-on-protected-areas-wdpa
31  -  

## Create packages

In [208]:

for i, updated_package in enumerate(updated_packages):
    
	print(i, ' - ', updated_package['name'])

	updated_package_without_id = copy.deepcopy(updated_package)
	del updated_package_without_id['id']
	for k, resource in enumerate(updated_package_without_id['resources']):
		del updated_package_without_id['resources'][k]['id']

	if updated_package_without_id['name'] != 'br-ibge-censo-demografico':
		migrator = Migrator(ckan_remote, updated_package_without_id)
	else:
		censo = copy.deepcopy(updated_package_without_id)
		censo['resources'] = censo['resources'][0:29] # waiting to solve issue #website/219
		migrator = Migrator(ckan_remote, censo)
	
	try:
		ckan_remote.action.package_create(**updated_package_without_id)
	except Exception as e:
		print(e)


0  -  br-ms-cnes
1  -  br-camara-atividade-legislativa
2  -  br-camara-dados-abertos
3  -  br-me-cno
4  -  br-cgu-pessoal-executivo-federal
5  -  world-competitiveness-ranking
6  -  young-lives
7  -  worldwide-mobile-data-pricing
8  -  worldwide-bureaucracy-indicators-wwbi
9  -  world-war-i-document-archive
10  -  world-values-survey
11  -  world-urbanization-prospects-wup
12  -  world-uncertainty-index-wui
13  -  world-stadiums
14  -  world-sea-temperatures
15  -  world-press-freedom-index
16  -  world-poverty-clock
17  -  world-population-prospects-wpp
18  -  worldpop
19  -  worldometer
20  -  world-management-survey-wms
21  -  world-input-output-database-wiod
22  -  world-inequality-database
23  -  world-higher-education-database-whed
24  -  world-happiness-report
25  -  world-governance-indicators
26  -  world-giving-index-wgi
27  -  world-fertility-surveys-wfs
28  -  world-digital-library-wdl
29  -  world-development-indicators
30  -  world-database-on-protected-areas-wdpa
31  -  