In [1]:
from ckanapi import RemoteCKAN
import ckanapi.errors
from ckanapi.errors import NotFound, ValidationError
import pandas as pd
from basedosdados import read_sql
import requests
import os
import json
from tqdm import tqdm

In [2]:
class Migrator:
    def __init__(self, ckan_remote: RemoteCKAN, package_dict):
        self.ckan_remote = ckan_remote
        self.package_dict = package_dict

    def create(self):
        try:
            self.ckan_remote.action.package_create(**self.package_dict)
        except NotFound as e:
            print(e)

    def update(self):
        try:
            self.ckan_remote.action.package_update(**self.package_dict)
        except NotFound as e:
            print(e)

    def purge(self):
        try:
            self.ckan_remote.action.dataset_purge(id=self.package_dict["name"])
        except NotFound as e:
            print(e)

    def delete(self):
        try:
            self.ckan_remote.action.package_delete(id=self.package_dict["name"])
        except NotFound as e:
            print(e)

    def validate(self):
        try:
            self.ckan_remote.action.bd_dataset_validate(**self.package_dict)
        except NotFound as e:
            print(e)

In [3]:
def download_packages(ORIGINAL_CKAN_URL, env):
    api_url = ORIGINAL_CKAN_URL + "/api/3/action/package_search?q=&rows=3000"
    packages = requests.get(api_url, verify=False).json()["result"]["results"]
    for p in packages:
        if not os.path.isdir(f"packages/"):
            os.mkdir(f"packages/")
        if not os.path.isdir(f"packages/{env}"):
            os.mkdir(f"packages/{env}")
        name = p["name"]
        json.dump(p, open(f"packages/{env}/{name}", "w"))
    return packages

In [4]:
def get_number_rows(package):
    for i, resource in enumerate(package["resources"]):
        if resource["resource_type"] == "bdm_table":
            if "number_rows" not in resource or resource["number_rows"] == "":
                dataset_id = resource['dataset_id']
                table_id = resource['table_id']
                try:
                    query = f"SELECT COUNT(*) AS n_rows FROM `basedosdados.{dataset_id}.{table_id}`"
                    n_rows = read_sql(query=query, billing_project_id='basedosdados-dev', from_file=True)['n_rows'].to_list()[0]
                    resource["number_rows"] = int(n_rows)
                except:
                    resource["number_rows"] = None             

    return package

In [5]:
LOCAL_CKAN_URL = "http://localhost:5000"
DEV_CKAN_URL = "https://staging.basedosdados.org"
PROD_CKAN_URL = "https://basedosdados.org"

local_packages = download_packages(LOCAL_CKAN_URL, "dev")
dev_packages = download_packages(DEV_CKAN_URL, "dev")
prod_packages = download_packages(PROD_CKAN_URL, "prod")

In [6]:
api_key_dev = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqdGkiOiJOeFZJT1YybGNwaHowM0kzb3N0Z0lXTkFvN3lRZmhCOGQ5SEJuRkI2Ml9zcV9BZUJ4Mmh6Nm13akwzQmpLNXR2LTUtU014b0pmWDcxVC1tciIsImlhdCI6MTY1ODg3NjgxMn0.hSg-2gb8XEC8_e_zvyBICqM5EKXiXprBVep1tKWa59I"
api_key_prod = ''

# Local packages

In [13]:
update_packages = []
for package in tqdm(local_packages):
        update_packages.append(get_number_rows(package))

ckan_remote = RemoteCKAN(LOCAL_CKAN_URL, apikey=api_key_dev)

for i, package in tqdm(enumerate(update_packages)):
    migration = Migrator(ckan_remote, package)
    migration.validate()
    migration.update()

# DEV packages

In [8]:
update_packages = []
for package in tqdm(local_packages):
        update_packages.append(get_number_rows(package))

ckan_remote = RemoteCKAN(DEV_CKAN_URL, apikey=api_key_dev)

for i, package in tqdm(enumerate(update_packages)):
    migration = Migrator(ckan_remote, package)
    migration.validate()
    migration.update()

# PROD packages

In [13]:
update_packages = []
for package in tqdm(local_packages):
        update_packages.append(get_number_rows(package))

ckan_remote = RemoteCKAN(PROD_CKAN_URL, apikey=api_key_dev)

for i, package in tqdm(enumerate(update_packages)):
    migration = Migrator(ckan_remote, package)
    migration.validate()
    migration.update()