From 975d85a2a0b48ce02f1ddf2ad2240a7989e07a33 Mon Sep 17 00:00:00 2001
From: Vinicius <vncsna@gmail.com>
Date: Mon, 6 May 2024 22:21:01 -0300
Subject: [PATCH] feat: update backend and metadata modules

---
 .mergify.yml                                  |  16 -
 .pre-commit-config.yaml                       |  17 +-
 docs/docs/api_reference_python.md             |   7 +-
 python-package/basedosdados/__init__.py       |  11 +-
 python-package/basedosdados/backend.py        | 497 ++++++++++++++++++
 .../basedosdados/backend/__init__.py          | 298 -----------
 python-package/basedosdados/constants.py      |   5 +-
 .../basedosdados/{upload => core}/base.py     |   2 +-
 .../{templates => core}/config.toml           |   0
 python-package/basedosdados/download/base.py  |  68 ---
 .../basedosdados/download/download.py         |  80 ++-
 .../basedosdados/download/metadata.py         | 488 +++++------------
 .../basedosdados/templates/dataset/README.md  |   7 -
 .../templates/dataset/dataset_description.txt |  43 --
 .../basedosdados/templates/table/publish.sql  |  30 --
 .../templates/table/table_description.txt     | 107 ----
 .../basedosdados/upload/connection.py         |   2 +-
 python-package/basedosdados/upload/dataset.py |   2 +-
 python-package/basedosdados/upload/storage.py |   2 +-
 python-package/basedosdados/upload/table.py   |  16 +-
 python-package/pyproject.toml                 |   3 -
 python-package/tests/conftest.py              |   2 +-
 22 files changed, 687 insertions(+), 1016 deletions(-)
 delete mode 100644 .mergify.yml
 create mode 100644 python-package/basedosdados/backend.py
 delete mode 100644 python-package/basedosdados/backend/__init__.py
 rename python-package/basedosdados/{upload => core}/base.py (99%)
 rename python-package/basedosdados/{templates => core}/config.toml (100%)
 delete mode 100644 python-package/basedosdados/download/base.py
 delete mode 100644 python-package/basedosdados/templates/dataset/README.md
 delete mode 100644 python-package/basedosdados/templates/dataset/dataset_description.txt
 delete mode 100644 python-package/basedosdados/templates/table/publish.sql
 delete mode 100644 python-package/basedosdados/templates/table/table_description.txt

diff --git a/.mergify.yml b/.mergify.yml
deleted file mode 100644
index 9d432bba0..000000000
--- a/.mergify.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-pull_request_rules:
-  - name: Automatic update for PRs
-    conditions:
-      - -conflict # skip PRs with conflicts
-      - -draft # filter-out GH draft PRs
-    actions:
-      update:
-  - name: Warn author on conflicts
-    conditions:
-      - conflict
-    actions:
-      comment:
-        message: "@{{author}} esse pull request tem conflitos 😩"
-      label:
-        add:
-          - conflict
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f75737485..b0c342d2a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,20 +21,5 @@ repos:
     rev: v0.2.0
     hooks:
       - id: ruff
-        args: [--fix]
+        args: [--fix, --select, I]
       - id: ruff-format
-
-  - repo: https://github.com/returntocorp/semgrep
-    rev: v1.30.0
-    hooks:
-    - id: semgrep
-      language: python
-      args: [
-        "--error",
-        "--config",
-        "auto",
-        "--exclude-rule",
-        "python.lang.security.audit.subprocess-shell-true.subprocess-shell-true",
-        "--exclude-rule",
-        "yaml.github-actions.security.third-party-action-not-pinned-to-commit-sha.third-party-action-not-pinned-to-commit-sha",
-      ]
diff --git a/docs/docs/api_reference_python.md b/docs/docs/api_reference_python.md
index 8e98f5edf..a70b32eaa 100644
--- a/docs/docs/api_reference_python.md
+++ b/docs/docs/api_reference_python.md
@@ -4,8 +4,7 @@
 Esta API é composta por funções com 2 tipos de funcionalidade:
 
 - Módulos para **requisição de dados**: para aquele(as) que desejam
-  somente consultar os dados e metadados do nosso projeto (ou qualquer outro
-  projeto no Google Cloud).
+  somente consultar os dados e metadados do nosso projeto.
 
 - Classes para **gerenciamento de dados** no Google Cloud: para
   aqueles(as) que desejam subir dados no nosso projeto (ou qualquer outro
@@ -15,7 +14,7 @@ Esta API é composta por funções com 2 tipos de funcionalidade:
 
 ## Módulos (Requisição de dados)
 
-::: basedosdados.download.download
+::: basedosdados.download.metadata
     handler: python
     rendering:
             show_root_heading: no
@@ -25,7 +24,7 @@ Esta API é composta por funções com 2 tipos de funcionalidade:
       docstring_options:
         replace_admonitions: no
 
-::: basedosdados.download.metadata
+::: basedosdados.download.download
     handler: python
     rendering:
             show_root_heading: no
diff --git a/python-package/basedosdados/__init__.py b/python-package/basedosdados/__init__.py
index 0fa9067c2..6e816a927 100644
--- a/python-package/basedosdados/__init__.py
+++ b/python-package/basedosdados/__init__.py
@@ -1,7 +1,6 @@
 """
 Importing the module will automatically import the submodules.
 """
-# flake8: noqa
 import os
 import sys
 
@@ -14,15 +13,11 @@
 
 from basedosdados.backend import Backend
 from basedosdados.constants import config, constants
-from basedosdados.download.base import reauth
 from basedosdados.download.download import download, read_sql, read_table
 from basedosdados.download.metadata import (
-    get_dataset_description,
-    get_table_columns,
-    get_table_description,
-    get_table_size,
-    list_dataset_tables,
-    list_datasets,
+    get_columns,
+    get_datasets,
+    get_tables,
     search,
 )
 from basedosdados.upload.connection import Connection
diff --git a/python-package/basedosdados/backend.py b/python-package/basedosdados/backend.py
new file mode 100644
index 000000000..c5bb8429c
--- /dev/null
+++ b/python-package/basedosdados/backend.py
@@ -0,0 +1,497 @@
+"""
+Module for interacting with the backend.
+"""
+from typing import Any, Dict
+
+from loguru import logger
+from requests import get
+
+try:
+    from gql import Client, gql
+    from gql.transport.requests import RequestsHTTPTransport
+
+    _backend_dependencies = True
+except ImportError:
+    _backend_dependencies = False
+
+from basedosdados.constants import constants
+from basedosdados.exceptions import (
+    BaseDosDadosException,
+    BaseDosDadosMissingDependencyException,
+)
+
+
+class SingletonMeta(type):
+    """Singleton Meta to avoid multiple instances of a class"""
+
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super().__call__(*args, **kwargs)
+        return cls._instances[cls]
+
+
+class Backend(metaclass=SingletonMeta):
+    def __init__(self, search_url: str = None, graphql_url: str = None):
+        """
+        Backend class to communicate with the backend.
+
+        Args:
+            graphql_url (str): URL of the GraphQL endpoint.
+        """
+        self.search_url: str = search_url or constants.BACKEND_SEARCH_URL.value
+        self.graphql_url: str = graphql_url or constants.BACKEND_GRAPHQL_URL.value
+        self.graphql_client: "Client" = self._get_client()
+
+    def get_datasets(
+        self,
+        dataset_id: str = None,
+        dataset_name: str = None,
+        page: int = 1,
+        page_size: int = 10,
+    ):
+        """
+        Get a list of available datasets,
+        either by `dataset_id` or `dataset_name`
+
+        Args:
+            dataset_id(str): dataset slug in google big query (gbq).
+            dataset_name(str): dataset name in base dos dados metadata.
+
+            page(int): page for pagination.
+            page_size(int): page size for pagination.
+            backend(Backend): backend instance, injected automatically.
+
+        Returns:
+            list[dict]: List of datasets.
+        """
+
+        query = """
+            query ($first: Int!, $offset: Int!) {
+                allDataset(first: $first, offset: $offset) {
+                    edges {
+                        node {
+                            slug
+                            name
+                            description
+                            organization {
+                                name
+                            }
+                            tags {
+                                edges {
+                                    node {
+                                        name
+                                    }
+                                }
+                            }
+                            themes {
+                                edges {
+                                    node {
+                                        name
+                                    }
+                                }
+                            }
+                            createdAt
+                            updatedAt
+                        }
+                    }
+                    totalCount
+                }
+            }
+        """
+        variables = {"first": page_size, "offset": (page - 1) * page_size}
+
+        extra = None
+        if dataset_id:
+            extra = f'id: "{dataset_id}"'
+        if dataset_name:
+            extra = f'name_Icontains: "{dataset_name}"'
+        if extra:
+            query = query.replace("$offset)", f"$offset, {extra})")
+
+        return self._execute_query(query, variables, page, page_size).get("allDataset")
+
+    def get_tables(
+        self,
+        dataset_id: str = None,
+        table_id: str = None,
+        table_name: str = None,
+        page: int = 1,
+        page_size: int = 10,
+    ):
+        """
+        Get a list of available tables,
+        either by `dataset_id`, `table_id` or `table_name`
+
+        Args:
+            dataset_id(str): dataset slug in google big query (gbq).
+            table_id(str): table slug in google big query (gbq).
+            table_name(str): table name in base dos dados metadata.
+
+            page(int): page for pagination.
+            page_size(int): page size for pagination.
+            backend(Backend): backend instance, injected automatically.
+
+        Returns:
+            list[dict]: List of tables.
+        """
+
+        query = """
+            query ($first: Int!, $offset: Int!) {
+                allTable(first: $first, offset: $offset) {
+                    edges {
+                        node {
+                            slug
+                            name
+                            description
+                            numberRows
+                            numberColumns
+                            uncompressedFileSize
+
+                        }
+                    }
+                    totalCount
+                }
+            }
+        """
+        variables = {"first": page_size, "offset": (page - 1) * page_size}
+
+        extra = None
+        if table_id:
+            extra = f'id: "{table_id}"'
+        if dataset_id:
+            extra = f'dataset_id: "{dataset_id}"'
+        if table_name:
+            extra = f'name_Icontains: "{table_name}"'
+        if extra:
+            query = query.replace("$offset)", f"$offset, {extra})")
+
+        return self._execute_query(query, variables, page, page_size).get("allTable")
+
+    def get_columns(
+        self,
+        table_id: str = None,
+        column_id: str = None,
+        column_name: str = None,
+        page: int = 1,
+        page_size: int = 10,
+    ):
+        """
+        Get a list of available columns,
+        either by `table_id`, `column_id` or `column_name`
+
+        Args:
+            table_id(str): table slug in google big query (gbq).
+            column_id(str): column slug in google big query (gbq).
+            column_name(str): table name in base dos dados metadata.
+
+            page(int): page for pagination.
+            page_size(int): page size for pagination.
+            backend(Backend): backend instance, injected automatically.
+
+        Returns:
+            list[dict]: List of tables.
+        """
+
+        query = """
+            query ($first: Int!, $offset: Int!) {
+                allColumn(first: $first, offset: $offset) {
+                    edges {
+                        node {
+                            name
+                            description
+                            observations
+                            bigqueryType {
+                                name
+                            }
+                        }
+                    }
+                    totalCount
+                }
+            }
+        """
+        variables = {"first": page_size, "offset": (page - 1) * page_size}
+
+        extra = None
+        if column_id:
+            extra = f'id: "{column_id}"'
+        if table_id:
+            extra = f'table_id: "{table_id}"'
+        if column_name:
+            extra = f'name_Icontains: "{column_name}"'
+        if extra:
+            query = query.replace("$offset)", f"$offset, {extra})")
+
+        return self._execute_query(query, variables, page, page_size).get("allColumn")
+
+    def search(self, q: str = None, page: int = 1, page_size: int = 10) -> list[dict]:
+        """
+        Search for datasets, querying all available metadata for the term `q`
+
+        Args:
+            q(str): search term.
+
+            page(int): page for pagination.
+            page_size(int): page size for pagination.
+            backend(Backend): backend instance, injected automatically.
+
+        Returns:
+            dict: page of tables.
+        """
+        response = get(
+            url=self.search_url,
+            params={"q": q, "page": page, "page_size": page_size},
+        )
+        if response.status_code not in [200]:
+            raise BaseDosDadosException(response.text)
+        return response.json()
+
+    def get_dataset_config(self, dataset_id: str) -> Dict[str, Any]:
+        """
+        Get dataset configuration.
+        Args:
+            dataset_id (str): The ID for the dataset.
+        Returns:
+            Dict: Dataset configuration.
+        """
+        query = """
+            query ($dataset_id: ID!){
+                allDataset(id: $dataset_id) {
+                    edges {
+                        node {
+                            slug
+                            name
+                            descriptionPt
+                            createdAt
+                            updatedAt
+                            themes {
+                                edges {
+                                    node {
+                                        namePt
+                                    }
+                                }
+                            }
+                            tags {
+                                edges {
+                                    node {
+                                        namePt
+                                    }
+                                }
+                            }
+                            organization {
+                                namePt
+                            }
+                        }
+                    }
+                }
+            }
+        """
+        dataset_id = self._get_dataset_id_from_name(dataset_id)
+        if dataset_id:
+            variables = {"dataset_id": dataset_id}
+            response = self._execute_query(query=query, variables=variables)
+            return self._simplify_graphql_response(response).get("allDataset")[0]
+        else:
+            return {}
+
+    def get_table_config(self, dataset_id: str, table_id: str) -> Dict[str, Any]:
+        """
+        Get table configuration.
+        Args:
+            dataset_id (str): The ID for the dataset.
+            table_id (str): The ID for the table.
+        Returns:
+            Dict: Table configuration.
+        """
+
+        query = """
+            query ($table_id: ID!){
+                allTable(id: $table_id) {
+                    edges {
+                        node {
+                            slug
+                            dataset {
+                                slug
+                                organization {
+                                    slug
+                                }
+                            }
+                            namePt
+                            descriptionPt
+                            columns {
+                            edges {
+                                node {
+                                    name
+                                    isInStaging
+                                    isPartition
+                                    descriptionPt
+                                    observations
+                                    bigqueryType {
+                                        name
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                }
+            }
+        """
+        table_id = self._get_table_id_from_name(
+            gcp_dataset_id=dataset_id, gcp_table_id=table_id
+        )
+
+        if table_id:
+            variables = {"table_id": table_id}
+            response = self._execute_query(query=query, variables=variables)
+            return self._simplify_graphql_response(response).get("allTable")[0]
+        else:
+            return {}
+
+    def _get_dataset_id_from_name(self, gcp_dataset_id):
+        query = """
+            query ($gcp_dataset_id: String!){
+                allCloudtable(gcpDatasetId: $gcp_dataset_id) {
+                    edges {
+                        node {
+                                table {
+                                    dataset {
+                                        _id
+                                    }
+                            }
+                        }
+                    }
+                }
+            }
+        """
+
+        variables = {"gcp_dataset_id": gcp_dataset_id}
+        response = self._execute_query(query=query, variables=variables)
+        r = {} if response is None else self._simplify_graphql_response(response)
+        if r.get("allCloudtable", []) != []:
+            return r.get("allCloudtable")[0].get("table").get("dataset").get("_id")
+        msg = f"{gcp_dataset_id} not found. Please create the metadata first in {self.graphql_url}"
+        logger.info(msg)
+        return None
+
+    def _get_table_id_from_name(self, gcp_dataset_id, gcp_table_id):
+        query = """
+            query ($gcp_dataset_id: String!, $gcp_table_id: String!){
+                allCloudtable(gcpDatasetId: $gcp_dataset_id, gcpTableId: $gcp_table_id) {
+                    edges {
+                        node {
+                                table {
+                                    _id
+                            }
+                        }
+                    }
+                }
+            }
+        """
+
+        if gcp_dataset_id:
+            variables = {
+                "gcp_dataset_id": gcp_dataset_id,
+                "gcp_table_id": gcp_table_id,
+            }
+
+            response = self._execute_query(query=query, variables=variables)
+            r = {} if response is None else self._simplify_graphql_response(response)
+            if r.get("allCloudtable", []) != []:
+                return r.get("allCloudtable")[0].get("table").get("_id")
+        msg = f"No table {gcp_table_id} found in {gcp_dataset_id}. Please create in {self.graphql_url}"
+        logger.info(msg)
+        return None
+
+    def _get_client(
+        self, headers: Dict[str, str] = None, fetch_schema_from_transport: bool = False
+    ) -> "Client":
+        """
+        Get a GraphQL client.
+
+        Args:
+            headers (Dict[str, str], optional): Headers to be passed to the client.
+                Defaults to None.
+            fetch_schema_from_transport (bool, optional): Whether to fetch the schema
+                from the transport. Defaults to False.
+
+        Returns:
+            Client: GraphQL client.
+        """
+        if not _backend_dependencies:
+            raise BaseDosDadosMissingDependencyException(
+                "Optional dependencies for backend interaction are not installed. "
+                'Please install basedosdados with the "upload" extra, such as:'
+                "\n\npip install basedosdados[upload]"
+            )
+        transport = RequestsHTTPTransport(
+            url=self.graphql_url, headers=headers, use_json=True
+        )
+        return Client(
+            transport=transport, fetch_schema_from_transport=fetch_schema_from_transport
+        )
+
+    def _execute_query(
+        self,
+        query: str,
+        variables: Dict[str, str] = None,
+        page: int = 1,
+        page_size: int = 10,
+    ) -> Dict[str, Any]:
+        """
+        Execute a GraphQL query.
+
+        Args:
+            query (str): GraphQL query.
+            variables (Dict[str, str], optional): Variables to be passed to the query. Defaults to None.
+
+        Returns:
+            Dict: GraphQL response.
+        """
+        try:
+            response = self.graphql_client.execute(
+                gql(query), variable_values=variables
+            )
+        except Exception as e:
+            logger.error(
+                f"The API URL in the config.toml file may be incorrect "
+                f"or the API might be temporarily unavailable!\n"
+                f"Error executing query: {e}."
+            )
+        return self._simplify_response(response or {}, page, page_size)
+
+    def _simplify_response(
+        self, response: dict, page: int = 1, page_size: int = 10
+    ) -> dict:
+        """
+        Simplify the graphql response
+
+        Args:
+            response: the graphql response
+
+        Returns:
+            dict: the simplified graphql response
+        """
+        if response is None:
+            return {}
+        if response == {}:
+            return {}
+
+        output_ = {}
+        for key, value in response.items():
+            if isinstance(value, list) and key == "edges":
+                output_["items"] = [
+                    self._simplify_response(v).get("node") for v in value
+                ]
+            elif isinstance(value, dict):
+                output_[key] = self._simplify_response(value)
+            else:
+                output_[key] = value
+
+        if "totalCount" in output_:
+            output_["page"] = page
+            output_["page_size"] = page_size
+            output_["page_total"] = int(output_.pop("totalCount") / page_size)
+
+        return output_
diff --git a/python-package/basedosdados/backend/__init__.py b/python-package/basedosdados/backend/__init__.py
deleted file mode 100644
index 4e862ed21..000000000
--- a/python-package/basedosdados/backend/__init__.py
+++ /dev/null
@@ -1,298 +0,0 @@
-"""
-Module for interacting with the backend.
-"""
-from typing import Any, Dict
-
-from loguru import logger
-
-try:
-    from gql import Client, gql
-    from gql.transport.requests import RequestsHTTPTransport
-
-    _backend_dependencies = True
-except ImportError:
-    _backend_dependencies = False
-
-from basedosdados.exceptions import BaseDosDadosMissingDependencyException
-
-
-class Backend:
-    def __init__(self, graphql_url: str):
-        """
-        Backend class for interacting with the backend.
-
-        Args:
-            graphql_url (str): URL of the GraphQL endpoint.
-        """
-        self._graphql_url: str = graphql_url
-
-    @property
-    def graphql_url(self) -> str:
-        """
-        GraphQL endpoint URL.
-        """
-        return self._graphql_url
-
-    def _get_client(
-        self, headers: Dict[str, str] = None, fetch_schema_from_transport: bool = False
-    ) -> "Client":
-        """
-        Get a GraphQL client.
-
-        Args:
-            headers (Dict[str, str], optional): Headers to be passed to the client. Defaults to
-                None.
-            fetch_schema_from_transport (bool, optional): Whether to fetch the schema from the
-                transport. Defaults to False.
-
-        Returns:
-            Client: GraphQL client.
-        """
-        if not _backend_dependencies:
-            raise BaseDosDadosMissingDependencyException(
-                "Optional dependencies for backend interaction are not installed. "
-                'Please install basedosdados with the "upload" extra, such as:'
-                "\n\npip install basedosdados[upload]"
-            )
-        transport = RequestsHTTPTransport(
-            url=self.graphql_url, headers=headers, use_json=True
-        )
-        return Client(
-            transport=transport, fetch_schema_from_transport=fetch_schema_from_transport
-        )
-
-    def _execute_query(
-        self,
-        query: str,
-        variables: Dict[str, str] = None,
-        client: "Client" = None,
-        headers: Dict[str, str] = None,
-        fetch_schema_from_transport: bool = False,
-    ) -> Dict[str, Any]:
-        """
-        Execute a GraphQL query.
-
-        Args:
-            query (str): GraphQL query.
-            variables (Dict[str, str], optional): Variables to be passed to the query. Defaults
-                to None.
-            client (Client, optional): GraphQL client. Defaults to None.
-            headers (Dict[str, str], optional): Headers to be passed to the client. Defaults to
-                None.
-            fetch_schema_from_transport (bool, optional): Whether to fetch the schema from the
-                transport. Defaults to False.
-
-        Returns:
-            Dict: GraphQL response.
-        """
-        if not _backend_dependencies:
-            raise BaseDosDadosMissingDependencyException(
-                "Optional dependencies for backend interaction are not installed. "
-                'Please install basedosdados with the "upload" extra, such as:'
-                "\n\npip install basedosdados[upload]"
-            )
-        if not client:
-            client = self._get_client(
-                headers=headers, fetch_schema_from_transport=fetch_schema_from_transport
-            )
-        try:
-            return client.execute(gql(query), variable_values=variables)
-        except Exception as e:
-            msg = f"The API URL in the config.toml file may be incorrect or the API might be temporarily unavailable!\nError executing query: {e}."
-            logger.error(msg)
-            return None
-
-    def _get_dataset_id_from_name(self, gcp_dataset_id):
-        query = """
-            query ($gcp_dataset_id: String!){
-                allCloudtable(gcpDatasetId: $gcp_dataset_id) {
-                    edges {
-                        node {
-                                table {
-                                    dataset {
-                                        _id
-                                    }
-                            }
-                        }
-                    }
-                }
-            }
-        """
-
-        variables = {"gcp_dataset_id": gcp_dataset_id}
-        response = self._execute_query(query=query, variables=variables)
-        r = {} if response is None else self._simplify_graphql_response(response)
-        if r.get("allCloudtable", []) != []:
-            return r.get("allCloudtable")[0].get("table").get("dataset").get("_id")
-        msg = f"{gcp_dataset_id} not found. Please create the metadata first in {self.graphql_url}"
-        logger.info(msg)
-        return None
-
-    def _get_table_id_from_name(self, gcp_dataset_id, gcp_table_id):
-        query = """
-            query ($gcp_dataset_id: String!, $gcp_table_id: String!){
-                allCloudtable(gcpDatasetId: $gcp_dataset_id, gcpTableId: $gcp_table_id) {
-                    edges {
-                        node {
-                                table {
-                                    _id
-                            }
-                        }
-                    }
-                }
-            }
-        """
-
-        if gcp_dataset_id:
-            variables = {
-                "gcp_dataset_id": gcp_dataset_id,
-                "gcp_table_id": gcp_table_id,
-            }
-
-            response = self._execute_query(query=query, variables=variables)
-            r = {} if response is None else self._simplify_graphql_response(response)
-            if r.get("allCloudtable", []) != []:
-                return r.get("allCloudtable")[0].get("table").get("_id")
-        msg = f"No table {gcp_table_id} found in {gcp_dataset_id}. Please create in {self.graphql_url}"
-        logger.info(msg)
-        return None
-
-    def get_dataset_config(self, dataset_id: str) -> Dict[str, Any]:
-        """
-        Get dataset configuration.
-
-        Args:
-            dataset_id (str): The ID for the dataset.
-
-        Returns:
-            Dict: Dataset configuration.
-        """
-        query = """
-            query ($dataset_id: ID!){
-                allDataset(id: $dataset_id) {
-                    edges {
-                        node {
-                            slug
-                            name
-                            descriptionPt
-                            createdAt
-                            updatedAt
-                            themes {
-                                edges {
-                                    node {
-                                        namePt
-                                    }
-                                }
-                            }
-                            tags {
-                                edges {
-                                    node {
-                                        namePt
-                                    }
-                                }
-                            }
-                            organization {
-                                namePt
-                            }
-                        }
-                    }
-                }
-            }
-
-        """
-        dataset_id = self._get_dataset_id_from_name(dataset_id)
-        if dataset_id:
-            variables = {"dataset_id": dataset_id}
-            response = self._execute_query(query=query, variables=variables)
-            return self._simplify_graphql_response(response).get("allDataset")[0]
-        else:
-            return {}
-
-    def get_table_config(self, dataset_id: str, table_id: str) -> Dict[str, Any]:
-        """
-        Get table configuration.
-
-        Args:
-            dataset_id (str): The ID for the dataset.
-            table_id (str): The ID for the table.
-
-        Returns:
-            Dict: Table configuration.
-        """
-
-        query = """
-            query ($table_id: ID!){
-                allTable(id: $table_id) {
-                    edges {
-                        node {
-                            slug
-                            dataset {
-                                slug
-                                organization {
-                                    slug
-                                }
-                            }
-                            namePt
-                            descriptionPt
-                            columns {
-                            edges {
-                                node {
-                                    name
-                                    isInStaging
-                                    isPartition
-                                    descriptionPt
-                                    observations
-                                    bigqueryType {
-                                        name
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-                }
-            }
-        """
-        table_id = self._get_table_id_from_name(
-            gcp_dataset_id=dataset_id, gcp_table_id=table_id
-        )
-
-        if table_id:
-            variables = {"table_id": table_id}
-            response = self._execute_query(query=query, variables=variables)
-            return self._simplify_graphql_response(response).get("allTable")[0]
-        else:
-            return {}
-
-    def _simplify_graphql_response(self, response: dict) -> dict:
-        """
-        Simplify the graphql response
-        Args:
-            response: the graphql response
-        Returns:
-            dict: the simplified graphql response
-        """
-        if response == {}:  # pragma: no cover
-            return {}
-
-        output_ = {}
-
-        for key in response:
-            try:
-                if (
-                    isinstance(response[key], dict)
-                    and response[key].get("edges") is not None  # noqa
-                ):
-                    output_[key] = [
-                        v.get("node")
-                        for v in list(
-                            map(self._simplify_graphql_response, response[key]["edges"])
-                        )
-                    ]
-                elif isinstance(response[key], dict):
-                    output_[key] = self._simplify_graphql_response(response[key])
-                else:
-                    output_[key] = response[key]
-            except TypeError as e:
-                logger.error(f"Erro({e}): {key} - {response[key]}")
-        return output_
diff --git a/python-package/basedosdados/constants.py b/python-package/basedosdados/constants.py
index 6154d649f..f6c3880a0 100644
--- a/python-package/basedosdados/constants.py
+++ b/python-package/basedosdados/constants.py
@@ -13,9 +13,10 @@ class config:
     Configuration for the project.
     """
 
-    verbose: bool = True
     billing_project_id: str = None
     project_config_path: str = None
+
+    verbose: bool = True
     from_file: bool = False
 
 
@@ -33,3 +34,5 @@ class constants(Enum):
     REFRESH_TOKEN_URL: str = "/api/token/refresh/"
     VERIFY_TOKEN_URL: str = "/api/token/verify/"
     TEST_ENDPOINT: str = "/api/v1/private/bigquerytypes/"
+    BACKEND_SEARCH_URL: str = "https://backend.basedosdados.org/search"
+    BACKEND_GRAPHQL_URL: str = "https://backend.basedosdados.org/graphql"
diff --git a/python-package/basedosdados/upload/base.py b/python-package/basedosdados/core/base.py
similarity index 99%
rename from python-package/basedosdados/upload/base.py
rename to python-package/basedosdados/core/base.py
index cbbb8b145..ceaf778d8 100644
--- a/python-package/basedosdados/upload/base.py
+++ b/python-package/basedosdados/core/base.py
@@ -20,7 +20,7 @@
 from google.oauth2 import service_account
 from loguru import logger
 
-from basedosdados.backend import Backend
+from basedosdados import Backend
 from basedosdados.constants import config, constants
 
 warnings.filterwarnings("ignore")
diff --git a/python-package/basedosdados/templates/config.toml b/python-package/basedosdados/core/config.toml
similarity index 100%
rename from python-package/basedosdados/templates/config.toml
rename to python-package/basedosdados/core/config.toml
diff --git a/python-package/basedosdados/download/base.py b/python-package/basedosdados/download/base.py
deleted file mode 100644
index 190cfc39a..000000000
--- a/python-package/basedosdados/download/base.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""
-Functions for manage auth and credentials
-"""
-import sys
-
-from functools import lru_cache
-
-import pydata_google_auth
-from google.cloud import bigquery, storage
-
-from basedosdados.upload.base import Base
-
-SCOPES = [
-    "https://www.googleapis.com/auth/cloud-platform",
-]
-
-
-def reauth():
-    """
-    Reauth user credentials
-    """
-
-    pydata_google_auth.get_user_credentials(
-        SCOPES, credentials_cache=pydata_google_auth.cache.REAUTH
-    )
-
-
-def credentials(from_file=False, reauth=False):
-    """
-    Get user credentials
-    """
-
-    # check if is running in colab
-    if "google.colab" in sys.modules:
-        from google.colab import auth
-
-        auth.authenticate_user()
-        return None
-
-    if from_file:
-        return Base()._load_credentials(mode="prod")
-
-    if reauth:
-        return pydata_google_auth.get_user_credentials(
-            SCOPES, credentials_cache=pydata_google_auth.cache.REAUTH
-        )
-
-    return pydata_google_auth.get_user_credentials(
-        SCOPES,
-    )
-
-
-@lru_cache(256)
-def google_client(billing_project_id, from_file, reauth):
-    """
-    Get Google Cloud client for bigquery and storage
-    """
-
-    return dict(
-        bigquery=bigquery.Client(
-            credentials=credentials(from_file=from_file, reauth=reauth),
-            project=billing_project_id,
-        ),
-        storage=storage.Client(
-            credentials=credentials(from_file=from_file, reauth=reauth),
-            project=billing_project_id,
-        ),
-    )
diff --git a/python-package/basedosdados/download/download.py b/python-package/basedosdados/download/download.py
index 357d333a5..28807f778 100644
--- a/python-package/basedosdados/download/download.py
+++ b/python-package/basedosdados/download/download.py
@@ -5,18 +5,13 @@
 import os
 import re
 import shutil
+import sys
 import time
-from functools import partialmethod
-
+from functools import lru_cache, partialmethod
 from pathlib import Path
 
-import pandas_gbq
-from google.cloud import bigquery, bigquery_storage_v1
-from pandas_gbq.gbq import GenericGBQException
-from pydata_google_auth.exceptions import PyDataCredentialsError
-
 from basedosdados.constants import config
-from basedosdados.download.base import credentials, google_client
+from basedosdados.core.base import Base
 from basedosdados.exceptions import (
     BaseDosDadosAccessDeniedException,
     BaseDosDadosAuthorizationException,
@@ -24,19 +19,11 @@
     BaseDosDadosInvalidProjectIDException,
     BaseDosDadosNoBillingProjectIDException,
 )
-
-
-def _set_config_variables(billing_project_id, from_file):
-    """
-    Set billing_project_id and from_file variables
-    """
-
-    # standard billing_project_id configuration
-    billing_project_id = billing_project_id or config.billing_project_id
-    # standard from_file configuration
-    from_file = from_file or config.from_file
-
-    return billing_project_id, from_file
+from google.cloud import bigquery, bigquery_storage_v1, storage
+from pandas_gbq import read_gbq
+from pandas_gbq.gbq import GenericGBQException
+from pydata_google_auth import cache, get_user_credentials
+from pydata_google_auth.exceptions import PyDataCredentialsError
 
 
 def read_sql(
@@ -78,13 +65,12 @@ def read_sql(
             timeout=3600 * 2,
         )
 
-        return pandas_gbq.read_gbq(
+        return read_gbq(
             query,
-            credentials=credentials(from_file=from_file, reauth=reauth),
-            project_id=billing_project_id,
+            project_id=config.billing_project_id,
             use_bqstorage_api=use_bqstorage_api,
+            credentials=_credentials(from_file=config.from_file, reauth=reauth),
         )
-
     except GenericGBQException as e:
         if "Reason: 403" in str(e):
             raise BaseDosDadosAccessDeniedException from e
@@ -230,7 +216,7 @@ def download(
             "Either table_id, dataset_id or query should be filled."
         )
 
-    client = google_client(billing_project_id, from_file, reauth)
+    client = _google_client(billing_project_id, from_file, reauth)
 
     # makes sure that savepath is a filepath and not a folder
     savepath = _sets_savepath(savepath)
@@ -521,3 +507,45 @@ def _sets_savepath(savepath):
         )
 
     return savepath
+
+
+def _credentials(
+    from_file: bool = False,
+    reauth: bool = False,
+    scopes: list[str] = ["https://www.googleapis.com/auth/cloud-platform"],
+):
+    """
+    Get user credentials
+    """
+
+    if "google.colab" in sys.modules:
+        from google.colab import auth
+
+        auth.authenticate_user()
+        return None
+
+    if from_file:
+        return Base()._load_credentials(mode="prod")
+
+    if reauth:
+        return get_user_credentials(scopes, credentials_cache=cache.REAUTH)
+
+    return get_user_credentials(scopes)
+
+
+@lru_cache(256)
+def _google_client(billing_project_id: str, from_file: bool, reauth: bool):
+    """
+    Get Google Cloud client for bigquery and storage
+    """
+
+    return dict(
+        bigquery=bigquery.Client(
+            credentials=_credentials(from_file=from_file, reauth=reauth),
+            project=billing_project_id,
+        ),
+        storage=storage.Client(
+            credentials=_credentials(from_file=from_file, reauth=reauth),
+            project=billing_project_id,
+        ),
+    )
diff --git a/python-package/basedosdados/download/metadata.py b/python-package/basedosdados/download/metadata.py
index d7d387bf0..bffe1b509 100644
--- a/python-package/basedosdados/download/metadata.py
+++ b/python-package/basedosdados/download/metadata.py
@@ -1,415 +1,165 @@
 """
 Functions to get metadata from BD's API
 """
-import math
+from functools import wraps
 
-from collections import defaultdict
+from basedosdados.backend import Backend
 
-import pandas as pd
-import requests
-from google.cloud import bigquery
 
+def check_input(f):
+    """Checks if the number of inputs is valid"""
 
-def _safe_fetch(url: str):
-    """
-    Safely fetchs urls and, if somehting goes wrong, informs user what is the possible cause
-    """
-    response = None
-    try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as errh:
-        print("Http Error:", errh)
-    except requests.exceptions.ConnectionError as errc:
-        print("Error Connecting:", errc)
-    except requests.exceptions.Timeout as errt:
-        print("Timeout Error:", errt)
-    except requests.exceptions.RequestException as err:
-        print("This url doesn't appear to exists:", err)
-
-    return response
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        if sum([a is not None for a in args]) > 1:
+            raise ValueError("At most one of the inputs must be non null")
+        return f(*args, **kwargs)
 
+    return wrapper
 
-def _dict_from_page(json_response):
-    """
-    Generate a dict from BD's API response with dataset_id and description as keys
-    """
-    temp_dict = {
-        "dataset_id": [
-            dataset["name"] for dataset in json_response["result"]["datasets"]
-        ],
-        "description": [
-            dataset["notes"] if "notes" in dataset.keys() else None
-            for dataset in json_response["result"]["datasets"]
-        ],
-    }
-
-    return temp_dict
 
+def inject_backend(f):
+    """Inject backend instance if doesn't exists"""
 
-def _fix_size(s, step=80):
-    final = ""
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        if "backend" not in kwargs:
+            kwargs["backend"] = Backend()
+        return f(*args, **kwargs)
 
-    for l in s.split(" "):  # noqa
-        final += (l + " ") if len(final.split("\n")[-1]) < step else "\n"
+    return wrapper
 
-    return final
 
-
-def _print_output(df):
-    """Prints dataframe contents as print blocks
-    Args:
-        df (pd.DataFrame): table to be printed
+@check_input
+@inject_backend
+def get_datasets(
+    dataset_id: str = None,
+    dataset_name: str = None,
+    page: int = 1,
+    page_size: int = 10,
+    backend: Backend = None,
+) -> list[dict]:
     """
+    Get a list of available datasets,
+    either by `dataset_id` or `dataset_name`
 
-    columns = df.columns
-    step = 80
-    print()
-    for _, row in df.iterrows():
-        for c in columns:
-            print(_fix_size(f"{c}: \n\t{row[c]}"))
-        print("-" * (step + 15))
-    print()
-
-
-def _handle_output(verbose, output_type, df, col_name=None):
-    """Handles datasets and tables listing outputs based on user's choice.
-    Either prints it to the screen or returns it as a `list` object.
     Args:
-        verbose (bool): amount of verbosity
-        output_type (str): type of output
-        df (pd.DataFrame, bigquery.Dataset or bigquery.Table): table containing datasets metadata
-        col_name (str): name of column with id's data
-    """
+        dataset_id(str): dataset slug in google big query (gbq).
+        dataset_name(str): dataset name in base dos dados metadata.
 
-    df_is_dataframe = isinstance(df, pd.DataFrame)
-    df_is_bq_dataset_or_table = isinstance(df, bigquery.Table)
-    df_is_bq_dataset_or_table |= isinstance(df, bigquery.Dataset)
-
-    if verbose is True and df_is_dataframe:
-        _print_output(df)
-
-    elif verbose is True and df_is_bq_dataset_or_table:
-        print(df.description)
-
-    elif verbose is False:
-        if output_type == "list":
-            return df[col_name].to_list()
-        if output_type == "str":
-            return df.description
-        if output_type == "records":
-            return df.to_dict("records")
-        msg = '`output_type` argument must be set to "list", "str" or "records".'
-        raise ValueError(msg)
-    raise TypeError("`verbose` argument must be of `bool` type.")
-
-
-def list_datasets(with_description=False, verbose=True):
-    """
-    This function uses `bd_dataset_search` website API
-    enpoint to retrieve a list of available datasets.
-
-    Args:
-        with_description (bool): Optional
-            If True, fetch short dataset description for each dataset.
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, a list object is returned.
+        page(int): page for pagination.
+        page_size(int): page size for pagination.
+        backend(Backend): backend instance, injected automatically.
 
     Returns:
-        list | stdout
-    """
-    # first request is made separately since we need to now the number of pages before the iteration
-    page_size = 100  # this function will only made more than one requisition if there are more than 100 datasets in the API response
-    url = f"https://basedosdados.org/api/3/action/bd_dataset_search?q=&resource_type=bdm_table&page=1&page_size={page_size}"
-    response = _safe_fetch(url)
-    json_response = response.json()
-    n_datasets = json_response["result"]["count"]
-    n_pages = math.ceil(n_datasets / page_size)
-    temp_dict = _dict_from_page(json_response)
-
-    temp_dicts = [temp_dict]
-    for page in range(2, n_pages + 1):
-        url = f"https://basedosdados.org/api/3/action/bd_dataset_search?q=&resource_type=bdm_table&page={page}&page_size={page_size}"
-        response = _safe_fetch(url)
-        json_response = response.json()
-        temp_dict = _dict_from_page(json_response)
-        temp_dicts.append(temp_dict)
-
-    dataset_dict = defaultdict(list)
-
-    for d in temp_dicts:
-        for key, value in d.items():
-            dataset_dict[key].append(value)
-
-    # flat inner lists
-    dataset_dict["dataset_id"] = [
-        item for sublist in dataset_dict["dataset_id"] for item in sublist
-    ]
-    dataset_dict["description"] = [
-        item for sublist in dataset_dict["description"] for item in sublist
-    ]
-    # select desired output using dataset_id info. Note that the output is either a standardized string or a list #pylint: disable=C0301
-    if verbose & (with_description is False):
-        return _print_output(pd.DataFrame.from_dict(dataset_dict)[["dataset_id"]])
-    if verbose & with_description:
-        return _print_output(
-            pd.DataFrame.from_dict(dataset_dict)[["dataset_id", "description"]]
-        )
-    if (verbose is False) & (with_description is False):
-        return dataset_dict["dataset_id"]
-    if (verbose is False) & with_description:
-        return [
-            {
-                "dataset_id": dataset_dict["dataset_id"][k],
-                "description": dataset_dict["description"][k],
-            }
-            for k in range(len(dataset_dict["dataset_id"]))
-        ]
-    raise ValueError(
-        "`verbose` and `with_description` argument must be of `bool` type."
-    )
-
-
-def list_dataset_tables(
-    dataset_id,
-    with_description=False,
-    verbose=True,
-):
-    """
-    Fetch table_id for tables available at the specified dataset_id. Prints the information on screen or returns it as a list.
+        dict: List of datasets.
+    """
+    result = backend.get_datasets(dataset_id, dataset_name, page, page_size)
+    for item in result.get("items", []) or []:
+        item["organization"] = item.get("organization", {}).get("name")
+        item["tags"] = [i.get("name") for i in item.get("tags", {}).get("items")]
+        item["themes"] = [i.get("name") for i in item.get("themes", {}).get("items")]
+    return result
+
+
+@check_input
+@inject_backend
+def get_tables(
+    dataset_id: str = None,
+    table_id: str = None,
+    table_name: str = None,
+    page: int = 1,
+    page_size: int = 10,
+    backend: Backend = None,
+) -> list[dict]:
+    """
+    Get a list of available tables,
+    either by `dataset_id`, `table_id` or `table_name`
 
     Args:
-        dataset_id (str): Optional.
-            Dataset id returned by list_datasets function
-        limit (int):
-            Field to limit the number of results
-        with_description (bool): Optional
-             If True, fetch short table descriptions for each table that match the search criteria.
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, a list object is returned.
-
-    Returns:
-        stdout | list
-    """
-
-    dataset_id = dataset_id.replace(
-        "-", "_"
-    )  # The dataset_id pattern in the bd_dataset_search endpoint response uses a hyphen as a separator, while in the endpoint urls that specify the dataset_id parameter the separator used is an underscore. See issue #1079
-
-    url = f"https://basedosdados.org/api/3/action/bd_bdm_dataset_show?dataset_id={dataset_id}"
-
-    response = _safe_fetch(url)
-
-    json_response = response.json()
-
-    dataset = json_response["result"]
-    # this dict has all information need to output the function
-    table_dict = {
-        "table_id": [
-            dataset["resources"][k]["name"]
-            for k in range(len(dataset["resources"]))
-            if dataset["resources"][k]["resource_type"] == "bdm_table"
-        ],
-        "description": [
-            dataset["resources"][k]["description"]
-            for k in range(len(dataset["resources"]))
-            if dataset["resources"][k]["resource_type"] == "bdm_table"
-        ],
-    }
-    # select desired output using table_id info. Note that the output is either a standardized string or a list
-    if verbose & (with_description is False):
-        return _print_output(pd.DataFrame.from_dict(table_dict)[["table_id"]])
-    if verbose & with_description:
-        return _print_output(
-            pd.DataFrame.from_dict(table_dict)[["table_id", "description"]]
-        )
-    if (verbose is False) & (with_description is False):
-        return table_dict["table_id"]
-    if (verbose is False) & with_description:
-        return [
-            {
-                "table_id": table_dict["table_id"][k],
-                "description": table_dict["description"][k],
-            }
-            for k in range(len(table_dict["table_id"]))
-        ]
+        dataset_id(str): dataset slug in google big query (gbq).
+        table_id(str): table slug in google big query (gbq).
+        table_name(str): table name in base dos dados metadata.
 
-    raise ValueError(
-        "`verbose` and `with_description` argument must be of `bool` type."
-    )
-
-
-def get_dataset_description(
-    dataset_id,
-    verbose=True,
-):
-    """
-    Prints the full dataset description.
-
-    Args:
-        dataset_id (str): Required.
-            Dataset id available in list_datasets.
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, data is returned as a `str`.
+        page(int): page for pagination.
+        page_size(int): page size for pagination.
+        backend(Backend): backend instance, injected automatically.
 
     Returns:
-        stdout | str
+        dict: List of tables.
     """
-    url = f"https://basedosdados.org/api/3/action/bd_bdm_dataset_show?dataset_id={dataset_id}"
-
-    response = _safe_fetch(url)
 
-    json_response = response.json()
+    return backend.get_tables(dataset_id, table_id, table_name, page, page_size)
 
-    description = json_response["result"]["notes"]
 
-    if verbose:
-        return print(description)
-    return description
-
-
-def get_table_description(
-    dataset_id,
-    table_id,
-    verbose=True,
-):
+@check_input
+@inject_backend
+def get_columns(
+    table_id: str = None,
+    column_id: str = None,
+    columns_name: str = None,
+    page: int = 1,
+    page_size: int = 10,
+    backend: Backend = None,
+) -> list[dict]:
     """
-    Prints the full table description.
+    Get a list of available columns,
+    either by `table_id`, `column_id` or `column_name`
 
     Args:
-        dataset_id (str): Required.
-            Dataset id available in list_datasets.
-        table_id (str): Required.
-            Table id available in list_dataset_tables
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, data is returned as a `str`.
-
-    Returns:
-        stdout | str
-    """
+        table_id(str): table slug in google big query (gbq).
+        column_id(str): column slug in google big query (gbq).
+        column_name(str): table name in base dos dados metadata.
 
-    url = f"https://basedosdados.org/api/3/action/bd_bdm_table_show?dataset_id={dataset_id}&table_id={table_id}"
-
-    response = _safe_fetch(url)
-
-    json_response = response.json()
-
-    description = json_response["result"]["description"]
-
-    if verbose:
-        return print(description)
-    return description
-
-
-def get_table_columns(
-    dataset_id,
-    table_id,
-    verbose=True,
-):
-    """
-        Fetch the names, types and descriptions for the columns in the specified table. Prints
-        information on screen.
-    Args:
-        dataset_id (str): Required.
-            Dataset id available in list_datasets.
-        table_id (str): Required.
-            Table id available in list_dataset_tables
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, data is returned as a `list` of `dict`s.
+        page(int): page for pagination.
+        page_size(int): page size for pagination.
+        backend(Backend): backend instance, injected automatically.
 
     Returns:
-        stdout | list
+        dict: List of tables.
     """
 
-    url = f"https://basedosdados.org/api/3/action/bd_bdm_table_show?dataset_id={dataset_id}&table_id={table_id}"
-
-    response = _safe_fetch(url)
-
-    json_response = response.json()
+    result = backend.get_columns(table_id, column_id, columns_name, page, page_size)
+    for item in result.get("items", []) or []:
+        item["bigquery_type"] = item.pop("bigqueryType", {}).get("name")
+    return result
 
-    columns = json_response["result"]["columns"]
 
-    if verbose:
-        return _print_output(pd.DataFrame(columns))
-    return columns
-
-
-def get_table_size(
-    dataset_id,
-    table_id,
-    verbose=True,
-):
-    """Use a query to get the number of rows and size (in Mb) of a table.
-
-    WARNING: this query may cost a lot depending on the table.
-
-    Args:
-        dataset_id (str): Optional.
-            Dataset id available in basedosdados. It should always come with table_id.
-        table_id (str): Optional.
-            Table id available in basedosdados.dataset_id.
-            It should always come with dataset_id.
-        verbose (bool): Optional.
-            If set to True, information is printed to the screen. If set to False, data is returned as a `list` of `dict`s.
+@check_input
+@inject_backend
+def search(
+    q: str = None,
+    page: int = 1,
+    page_size: int = 10,
+    backend: Backend = None,
+) -> list[dict]:
     """
-    url = f"https://basedosdados.org/api/3/action/bd_bdm_table_show?dataset_id={dataset_id}&table_id={table_id}"
-
-    response = _safe_fetch(url)
-
-    json_response = response.json()
-
-    size = json_response["result"]["size"]
-
-    if size is None:
-        return print("Size not available")
-    if verbose:
-        return _print_output(pd.DataFrame(size))
-    return size
-
-
-def search(query, order_by):
-    """This function works as a wrapper to the `bd_dataset_search` website API
-    enpoint.
+    Search for datasets, querying all available metadata for the term `q`
 
     Args:
-        query (str):
-            String to search in datasets and tables' metadata.
-        order_by (str): score|popular|recent
-            Field by which the results will be ordered.
+        q(str): search term.
+
+        page(int): page for pagination.
+        page_size(int): page size for pagination.
+        backend(Backend): backend instance, injected automatically.
 
     Returns:
-        pd.DataFrame:
-            Response from the API presented as a pandas DataFrame. Each row is
-            a table. Each column is a field identifying the table.
+        dict: List of datasets and metadata.
     """
-
-    # validate order_by input
-    if order_by not in ["score", "popular", "recent"]:
-        raise ValueError(
-            f'order_by must be score, popular or recent. Received "{order_by}"'
+    items = []
+    for item in backend.search(q, page, page_size).get("results", []):
+        items.append(
+            {
+                "slug": item.get("slug"),
+                "name": item.get("name"),
+                "description": item.get("description"),
+                "n_tables": item.get("n_tables"),
+                "n_raw_data_sources": item.get("n_raw_data_sources"),
+                "n_information_requests": item.get("n_information_requests"),
+                "organization": {
+                    "slug": item.get("organizations", [{}])[0].get("slug"),
+                    "name": item.get("organizations", [{}])[0].get("name"),
+                },
+            }
         )
-
-    url = f"https://basedosdados.org/api/3/action/bd_dataset_search?q={query}&order_by={order_by}&resource_type=bdm_table"
-
-    response = _safe_fetch(url)
-
-    json_response = response.json()
-
-    dataset_dfs = []
-    # first loop identify the number of the tables in each datasets
-    for dataset in json_response["result"]["datasets"]:
-        tables_dfs = []
-        len(dataset["resources"])
-        # second loop extracts tables' information for each dataset
-        for table in dataset["resources"]:
-            data_table = pd.DataFrame(
-                {k: str(table[k]) for k in list(table.keys())}, index=[0]
-            )
-            tables_dfs.append(data_table)
-        # append tables' dataframes for each dataset
-        data_ds = tables_dfs[0].append(tables_dfs[1:]).reset_index(drop=True)
-        dataset_dfs.append(data_ds)
-    # append datasets' dataframes
-    df = dataset_dfs[0].append(dataset_dfs[1:]).reset_index(drop=True)
-
-    return df
+    return items
diff --git a/python-package/basedosdados/templates/dataset/README.md b/python-package/basedosdados/templates/dataset/README.md
deleted file mode 100644
index d6e62676c..000000000
--- a/python-package/basedosdados/templates/dataset/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-Como capturar os dados de {{ dataset_id }}?
-
-1. Para capturar esses dados, basta verificar o link dos dados originais indicado em `dataset_config.yaml` no item `website`.
-
-2. Caso tenha sido utilizado algum código de captura ou tratamento, estes estarão contidos em `code/`. Se o dado publicado for em sua versão bruta, não existirá a pasta `code/`.
-
-Os dados publicados estão disponíveis em: https://basedosdados.org/dataset/{{ dataset_id | replace("_","-") }}
diff --git a/python-package/basedosdados/templates/dataset/dataset_description.txt b/python-package/basedosdados/templates/dataset/dataset_description.txt
deleted file mode 100644
index bc8001c8e..000000000
--- a/python-package/basedosdados/templates/dataset/dataset_description.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-{% macro input(var) -%}
-{% if (
-       (var is not none) and
-       (("<" not in var | string) and (">" not in var | string))
-) -%}
-{{- caller() }}
-{%- endif %}
-{%- endmacro -%}
-
-{{ description }}
-
-Para saber mais acesse:
-Website: {{ url_ckan }}
-Github: https://github.com/basedosdados/mais/
-
-Ajude a manter o projeto :)
-Apoia-se: https://apoia.se/basedosdados
-
-Instituição (Quem mantém os dados oficiais?)
------------
-Nome: {{ organization -}}
-
-{% if website is defined %}
-{% call input(website[0]) -%}
-Onde encontrar os dados
------------------------
-{% if (website is not none) -%}
-{% for partition in website -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
-{% endif %}
-
-{% call input(groups[0]) -%}
-Grupos
-------
-{% if (groups is not none) -%}
-{% for partition in groups -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
diff --git a/python-package/basedosdados/templates/table/publish.sql b/python-package/basedosdados/templates/table/publish.sql
deleted file mode 100644
index d7b86b4f1..000000000
--- a/python-package/basedosdados/templates/table/publish.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-
-Query para publicar a tabela.
-
-Esse é o lugar para:
-    - modificar nomes, ordem e tipos de colunas
-    - dar join com outras tabelas
-    - criar colunas extras (e.g. logs, proporções, etc.)
-
-Qualquer coluna definida aqui deve também existir em `table_config.yaml`.
-
-# Além disso, sinta-se à vontade para alterar alguns nomes obscuros
-# para algo um pouco mais explícito.
-
-TIPOS:
-    - Para modificar tipos de colunas, basta substituir STRING por outro tipo válido.
-    - Exemplo: `SAFE_CAST(column_name AS NUMERIC) column_name`
-    - Mais detalhes: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
-
-*/
-{% set project = project_id_prod %}
-CREATE VIEW {{ project }}.{{ dataset_id }}.{{ table_id }} AS
-SELECT
-{% for column in columns|list + partition_columns|list -%}
-{%- if not loop.last -%}
-    SAFE_CAST({{ column }} AS STRING) {{ column }},
-{% else -%}
-    SAFE_CAST({{ column }} AS STRING) {{ column }}
-{% endif -%}{% endfor -%}
-from {{ project_id }}.{{ dataset_id }}_staging.{{ table_id }} as t
diff --git a/python-package/basedosdados/templates/table/table_description.txt b/python-package/basedosdados/templates/table/table_description.txt
deleted file mode 100644
index 6c4f93ff6..000000000
--- a/python-package/basedosdados/templates/table/table_description.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-{% macro input(var) -%}
-{% if (
-       (var is not none) and
-       (("<" not in var | string) and (">" not in var | string))
-) -%}
-{{- caller() }}
-{%- endif %}
-{%- endmacro -%}
-
-{{ description }}
-
-Para saber mais acesse:
-Website: {{ url_ckan }}
-Github: {{ url_github }}
-
-Ajude a manter o projeto :)
-Apoia-se: https://apoia.se/basedosdados
-
-Publicado por
--------------
-Nome: {{ published_by.name -}}
-{% call input(published_by.code_url ) %}
-Código: {{ published_by.code_url }}
-{%- endcall -%}
-{% call input(published_by.website ) %}
-Website: {{ published_by.website }}
-{%- endcall -%}
-{% call input(published_by.email ) %}
-Email: {{ published_by.email }}
-{%- endcall -%}
-
-{% if data_cleaned_by is defined %}
-Tratado por
------------
-Nome: {{ data_cleaned_by.name -}}
-{% call input(data_cleaned_by.code_url) %}
-Código: {{ data_cleaned_by.code_url }}
-{%- endcall -%}
-{% call input(data_cleaned_by.website) %}
-Website: {{ data_cleaned_by.website }}
-{%- endcall -%}
-{% call input(data_cleaned_by.email) %}
-Email: {{ data_cleaned_by.email }}
-{%- endcall %}
-{% endif %}
-
-
-{% call input(partitions) -%}
-Partições (Filtre a tabela por essas colunas para economizar dinheiro e tempo)
----------
-{% if (partitions is not none) -%}
-{% for partition in partitions -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
-
-{% if identifying_columns is defined %}
-{% call input(identifying_columns[0]) -%}
-Colunas identificando linhas unicamente
--------------------
-{% if (identifying_columns is not none) -%}
-{% for partition in identifying_columns -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
-{% endif %}
-
-{% if temporal_coverage is defined %}
-{% call input(temporal_coverage[0]) -%}
-Cobertura Temporal
-------------------
-{% if (temporal_coverage is not none) -%}
-{% for partition in temporal_coverage -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
-{% endif %}
-
-{% if spatial_coverage is defined %}
-{% call input(spatial_coverage[0]) -%}
-Cobertura Espacial
-------------------
-{% if (spatial_coverage is not none) -%}
-{% for partition in spatial_coverage -%}
-- {{ partition }}
-{% endfor -%}
-{%- endif %}
-{% endcall -%}
-{% endif %}
-
-{% if data_cleaning_description is defined %}
-{% call input(data_cleaning_description) -%}
-Tratamento
-----------
-{{ data_cleaning_description }}
-{% endcall %}
-{% endif %}
-
-{% if update_frequency is defined %}
-{%- call input(update_frequency) -%}
-Frequencia de Atualização
--------------------------
-{{ update_frequency }}{% endcall %}
-{% endif %}
diff --git a/python-package/basedosdados/upload/connection.py b/python-package/basedosdados/upload/connection.py
index b050fec78..a09371b32 100644
--- a/python-package/basedosdados/upload/connection.py
+++ b/python-package/basedosdados/upload/connection.py
@@ -16,7 +16,7 @@
     GetConnectionRequest,
 )
 
-from basedosdados.upload.base import Base
+from basedosdados.core.base import Base
 
 
 class Connection(Base):
diff --git a/python-package/basedosdados/upload/dataset.py b/python-package/basedosdados/upload/dataset.py
index 2ea8bf2a8..5584e5490 100644
--- a/python-package/basedosdados/upload/dataset.py
+++ b/python-package/basedosdados/upload/dataset.py
@@ -8,7 +8,7 @@
 from google.cloud import bigquery
 from loguru import logger
 
-from basedosdados.upload.base import Base
+from basedosdados.core.base import Base
 
 
 class Dataset(Base):
diff --git a/python-package/basedosdados/upload/storage.py b/python-package/basedosdados/upload/storage.py
index 739c1854c..f6291fe0e 100644
--- a/python-package/basedosdados/upload/storage.py
+++ b/python-package/basedosdados/upload/storage.py
@@ -12,7 +12,7 @@
 from tqdm import tqdm
 
 from basedosdados.exceptions import BaseDosDadosException
-from basedosdados.upload.base import Base
+from basedosdados.core.base import Base
 
 # google retryble exceptions. References: https://googleapis.dev/python/storage/latest/retry_timeout.html#module-google.cloud.storage.retry
 
diff --git a/python-package/basedosdados/upload/table.py b/python-package/basedosdados/upload/table.py
index 1e6af80c5..8257d5b52 100644
--- a/python-package/basedosdados/upload/table.py
+++ b/python-package/basedosdados/upload/table.py
@@ -17,7 +17,7 @@
 from loguru import logger
 
 from basedosdados.exceptions import BaseDosDadosException
-from basedosdados.upload.base import Base
+from basedosdados.core.base import Base
 from basedosdados.upload.connection import Connection
 from basedosdados.upload.dataset import Dataset
 from basedosdados.upload.datatypes import Datatype
@@ -298,20 +298,6 @@ def _get_cross_columns_from_bq_api(self):
         api = self._get_columns_metadata_from_api()
         api_columns = api.get("partition_columns") + api.get("columns")
 
-        # bq_columns_list = [col.get("name") for col in bq_columns]
-        # api_columns_list = [col.get("name") for col in api_columns]
-
-        # not_in_api_columns = [
-        #     col for col in bq_columns_list if col not in api_columns_list
-        # ]
-        # not_in_bq_columns = [
-        #     col for col in api_columns_list if col not in bq_columns_list
-        # ]
-        # print("bq_columns_list", len(bq_columns_list))
-        # print("api_columns_list", len(api_columns_list))
-        # print("not_in_api_columns", not_in_api_columns)
-        # print("not_in_bq_columns", not_in_bq_columns)
-
         if api_columns != []:
             for bq_col in bq_columns:
                 for api_col in api_columns:
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 5181d9b3b..dffd6c91e 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -51,9 +51,6 @@ all = ["gql", "pandavro", "requests-toolbelt"]
 avro = ["pandavro"]
 upload = ["gql", "requests-toolbelt"]
 
-[tool.taskipy.tasks]
-lint = "semgrep scan --error --config auto --exclude-rule yaml.github-actions.security.third-party-action-not-pinned-to-commit-sha.third-party-action-not-pinned-to-commit-sha && ruff check ."
-
 [pytest]
 addopts = "-p no:warnings"
 
diff --git a/python-package/tests/conftest.py b/python-package/tests/conftest.py
index 55a7f09cd..b62b2c8b7 100644
--- a/python-package/tests/conftest.py
+++ b/python-package/tests/conftest.py
@@ -13,7 +13,7 @@
 
 from basedosdados import Metadata  # TODO: deprecate
 from basedosdados import Dataset, Storage, Table
-from basedosdados.upload.base import Base
+from basedosdados.core.base import Base
 
 DATASET_ID = "pytest"
 TABLE_ID = "pytest"