In [6]:
import requests
import pandas as pd
from time import sleep
class CkanConsumer:
    def __init__(self, main_url):
        self.url = f'https://{main_url}/api/action/datastore_search'

    def request(self, resource_id) -> pd.DataFrame:
        params = {
            'limit': 1000,
            'offset': 0,
            'resource_id': resource_id
        }
        data = []

        print(f'[INFO] - Getting data from {self.url} on resource {resource_id} at offset {len(data)}')
        response = requests.get(self.url, params=params)
        total = response.json()['result']['total']
        data.extend(response.json()['result']['records'])

        while len(data) < total:
            params['offset'] = len(data)
            print(f'[INFO] - Getting data from {self.url} on resource {resource_id} at offset {len(data)}')
            response = requests.get(self.url, params=params)
            while response.status_code != 200:
                print(f'[ERROR] - Response code {response.status_code} from {self.url} on resource {resource_id} at offset {len(data)}')
                print(f'[INFO] - Waiting 5 seconds before trying again')
                sleep(5)
                response = requests.get(self.url, params=params)
            data.extend(response.json()['result']['records'])
        
        print(f'[INFO] - Total of {len(data)} records retrieved from {self.url} on resource {resource_id}')
        #Reassembling the dataframe
        df = pd.DataFrame(data)
        return df

In [7]:
# Use ckan_consumer.request(resource_id) to get a dataframe from a resource_id
ufrn_consumer = CkanConsumer('dados.ufrn.br')
cursos_ufrn = ufrn_consumer.request('a10bc434-9a2d-491a-ae8c-41cf643c35bc')
docentes_ufrn = ufrn_consumer.request('6a8e5461-e748-45c6-aac6-432188d88dde')
discentes_ufrn = ufrn_consumer.request('14afbb6c-395e-411c-b24d-0e494cb95866')

[INFO] - Getting data from https://dados.ufrn.br/api/action/datastore_search on resource a10bc434-9a2d-491a-ae8c-41cf643c35bc at offset 0
[INFO] - Total of 120 records retrieved from https://dados.ufrn.br/api/action/datastore_search on resource a10bc434-9a2d-491a-ae8c-41cf643c35bc
[INFO] - Getting data from https://dados.ufrn.br/api/action/datastore_search on resource 6a8e5461-e748-45c6-aac6-432188d88dde at offset 0
[INFO] - Getting data from https://dados.ufrn.br/api/action/datastore_search on resource 6a8e5461-e748-45c6-aac6-432188d88dde at offset 1000
[INFO] - Getting data from https://dados.ufrn.br/api/action/datastore_search on resource 6a8e5461-e748-45c6-aac6-432188d88dde at offset 2000
[INFO] - Total of 2772 records retrieved from https://dados.ufrn.br/api/action/datastore_search on resource 6a8e5461-e748-45c6-aac6-432188d88dde
[INFO] - Getting data from https://dados.ufrn.br/api/action/datastore_search on resource 14afbb6c-395e-411c-b24d-0e494cb95866 at offset 0
[INFO] - Gettin

In [8]:
discentes_ufrn.head()

Unnamed: 0,status,nome_unidade_gestora,ano_ingresso,nome_discente,periodo_ingresso,tipo_discente,nome_curso,modalidade_educacao,sexo,sigla_nivel_ensino,nome_unidade,id_curso,forma_ingresso,id_unidade_gestora,matricula,id_unidade,_id,nivel_ensino
0,ATIVO,CENTRO DE TECNOLOGIA,2022,AARON VINICIUS MAIA SOBRINHO,1,REGULAR,ENGENHARIA DE PETRÓLEO,PRESENCIAL,M,G,CENTRO DE TECNOLOGIA,2071220.0,SiSU,445.0,20220017174,445.0,1,GRADUAÇÃO
1,ATIVO,CENTRO DE TECNOLOGIA,2022,ABÃ BARBOSA DA SILVA,2,REGULAR,ENGENHARIA MECÂNICA,PRESENCIAL,M,G,CENTRO DE TECNOLOGIA,2000031.0,SiSU,445.0,20220057240,445.0,2,GRADUAÇÃO
2,ATIVO,"CENTRO DE CIÊNCIAS HUMANAS, LETRAS E ARTES",2022,ABDA BEATRIZ DE ARAUJO PINHEIRO,1,REGULAR,BACHARELADO INTERDISCIPLINAR EM HUMANIDADES,PRESENCIAL,F,G,INSTITUTO HUMANITAS DE ESTUDOS INTEGRADOS,149717829.0,SiSU,442.0,20220046390,30304.0,3,GRADUAÇÃO
3,ATIVO,,2022,ABDALLAH SAID SALEH YUSUF,2,ESPECIAL,,,M,G,,,ALUNO EM MOBILIDADE NACIONAL,,20220077682,,4,GRADUAÇÃO
4,CANCELADO,UNIVERSIDADE FEDERAL DO RIO GRANDE DO NORTE,2022,ABEL BARBOSA DE SOUZA NETO,1,REGULAR,CURSO TÉCNICO DA METRÓPOLE DIGITAL,SEMI-PRESENCIAL,M,T,INSTITUTO METROPOLE DIGITAL,96054058.0,PROCESSO SELETIVO,605.0,20223009354,6069.0,5,TÉCNICO


In [2]:
import pymongo
from pymongo import database

def get_mongo_db(institute:str) -> database:
    password = "p7Zj5AJGqrEEzd7v"
    client = pymongo.MongoClient(f"mongodb+srv://academic_admin:{password}@dbacademic-cluster.enzu3ui.mongodb.net/?retryWrites=true&w=majority")
    db = client.get_database(f"{institute}_dbacademic")
    return db

def insert_many(database: database, collection_name: str, data: list) -> dict:
    collection = database.get_collection(collection_name)
    try:
        collection.insert_many(data)
    except Exception as e:
        print(e)
        return {
            "status"  : "Error",
            "inserted": collection.count_documents({})
        }
    return {
        "status"  : "Success",
        "inserted": collection.count_documents({})
    }