In [None]:
# Importing modules
from tqdm           import tqdm
from ckanapi        import RemoteCKAN
from basedosdados   import read_sql
from ckanapi.errors import NotFound, ValidationError

import os
import json
import requests
import pandas as pd
import ckanapi.errors

In [None]:
# Creating migration class 

class Migrator:
    def __init__(self, ckan_remote: RemoteCKAN, package_dict):
        self.ckan_remote  = ckan_remote
        self.package_dict = package_dict

    def update(self):
        try:
            self.ckan_remote.action.package_update(**self.package_dict)
        except NotFound as e:
            print(e)

    def validate(self):
        try:
            self.ckan_remote.action.bd_dataset_validate(**self.package_dict)
        except NotFound as e:
            print(e)

In [None]:

def download_packages(ORIGINAL_CKAN_URL, env):
    """Downloads packages from CKAN"""

    api_url       = ORIGINAL_CKAN_URL + "/api/3/action/package_search?q=&rows=3000"
    packages_list = requests.get(api_url, verify=False).json()["result"]["results"]

    for package in packages_list:

        if not os.path.isdir(f"/tmp/packages/"):

            os.mkdir(f"/tmp/packages/")

        if not os.path.isdir(f"/tmp/packages/{env}"):

            os.mkdir(f"/tmp/packages/{env}")

        name = package["name"]

        json.dump(package, open(f"/tmp/packages/{env}/{name}", "w"))

    return packages_list

In [None]:
def remove_data_cleaning_url(package):
    """Remove data cleaning url from package dict"""

    for i, resource in enumerate(package["resources"]):

        if resource["resource_type"] == "bdm_table":

            if "data_cleaning_code_url" in resource:

                del resource["data_cleaning_code_url"]         
                
    return package

In [None]:
# Global variables 

LOCAL_CKAN_URL = "http://localhost"
DEV_CKAN_URL   = "https://staging.basedosdados.org"
PROD_CKAN_URL  = "https://basedosdados.org"

CKAN_API_KEY_LOCAL = os.environ["CKAN_API_KEY_LOCAL"]
CKAN_API_KEY_DEV   = os.environ["CKAN_API_KEY_DEV"  ]
CKAN_API_KEY_PROD  = os.environ["CKAN_API_KEY_PROD" ]


In [None]:
# Downloading packages

local_packages = download_packages(LOCAL_CKAN_URL, "local")
dev_packages   = download_packages(DEV_CKAN_URL  , "dev"  )
prod_packages  = download_packages(PROD_CKAN_URL , "prod" )

In [None]:
# Removing data cleaning url from local packages

update_packages = []
for package in tqdm(local_packages):
        update_packages.append(remove_data_cleaning_url(package))

In [None]:
# Instantiating ckanapi RemoteCKAN object

ckan_remote = RemoteCKAN(LOCAL_CKAN_URL, apikey=CKAN_API_KEY_LOCAL)

In [None]:
# Validating and updating packages changes

for i, package in tqdm(enumerate(update_packages)):
    try:
        migration = Migrator(ckan_remote, package)
        migration.validate()
        migration.update()
    except:
        print(i)
        break

In [None]:
# Dev packages 

update_packages = []
for package in tqdm(dev_packages):
        update_packages.append(remove_data_cleaning_url(package))

ckan_remote = RemoteCKAN(DEV_CKAN_URL, apikey=CKAN_API_KEY_DEV)

for i, package in tqdm(enumerate(update_packages)):
    migration = Migrator(ckan_remote, package)
    migration.validate()
    migration.update()

In [None]:
# Prod packages

update_packages = []
for package in tqdm(prod_packages):
        update_packages.append(remove_data_cleaning_url(package))

ckan_remote = RemoteCKAN(PROD_CKAN_URL, apikey=CKAN_API_KEY_PROD)

for i, package in tqdm(enumerate(update_packages)):
    migration = Migrator(ckan_remote, package)
    migration.validate()
    migration.update()