In [1]:
import os
import json
import requests
import logging
import re
import time
import datetime
import math

from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed


from requests.adapters import HTTPAdapter, Retry

In [2]:
BASE_FOLDER = "downloads"
BASE_FOLDER_PATH = "./" + BASE_FOLDER

In [3]:
class Downloader:
    def __init__(self) -> None:
        self.poll = []
        self.inital_size = 0
        self.current_size = 0
    
    def add(self, item):
        self.poll.append(item)

    def clear(self):
        del self.poll
        self.poll = []
    
    def get_printable_status(self):
        return "{} of {} left".format(self.current_size, self.inital_size)

    def run(self, max_workers=8):
        def _worker(url, path):
            return (requests.get(url, allow_redirects=True), path)

        with ThreadPoolExecutor(max_workers=max_workers) as exe:
            #Setup
            self.inital_size = len(self.poll)
            self.current_size = self.inital_size
            self.initial_time = datetime.datetime.now()


            print('Running downloader for {} urls'.format(self.inital_size))
            futures = [exe.submit(_worker, *i) for i in self.poll]
            self.poll = []

            for future in as_completed(futures):
                self.current_size = self.current_size - 1

                #print("{} of {} left".format(self.current_size, self.inital_size,))
                r, path = future.result()
                if not r.status_code == 200:
                    print("URL {} returned staus {}".format(r.url, r.status_code))
                    continue
                
                if not os.path.exists(path):
                    os.makedirs(path)

                filename = "no_name.json"
                if r.url.find('/'):
                    filename = r.url.rsplit('/', 1)[1]
                file_path = path + "/" + filename

                with open(file_path, 'wb') as f:
                    f.write(r.content)
                    f.close()

            del self.poll
            self.poll = []

#print(get_status((datetime.datetime.now()-before).total_seconds(), poll_size, actual_poll_size))
#def get_status(elapsed_seconds, initial_size, current_size, bar_size=50):
#            output = '[%s%s] %d%% in %d seconds'
#            percent = 1 - current_size / initial_size
#            output = output % (('#' * math.floor(percent*bar_size)) , ('-' * math.floor(bar_size - percent*bar_size)), (percent*100), elapsed_seconds)
#            output = output + " ({} of {})".format(initial_size - current_size, initial_size)
#            return output

In [4]:
#my_pool = DownloadPool()
my_downloader = Downloader()

1 - Get config file F1  
2 - for each _pleito_, each election  
2.1 - Get Election configuration file with cityes and states F2  
3 - For each state in F2  
3.1 Get file with city, zone and section F3  
4 - For each state/city/zone/section, in F3, get voting result config file F4  
5 - For each F4, fetch results file F5

In [5]:
#STAGE 1
#download config file
URL_CONFIG_FILE = "https://resultados.tse.jus.br/oficial/comum/config/ele-c.json"
#my_pool.add(Downloadable(URL_CONFIG_FILE, BASE_FOLDER_PATH))
#my_pool.start()


my_downloader.add((URL_CONFIG_FILE, BASE_FOLDER_PATH))
my_downloader.run()

Running downloader for 1 urls


In [6]:
#STAGE 2
#Process CONFIG file "ele-c.json" AND GET ALL STATES
F1_FILE_PATH = "{}/{}".format(BASE_FOLDER_PATH, "ele-c.json")
if (not os.path.isfile(F1_FILE_PATH)):
    raise Exception("F1 file does not exists in {}".format(F1_FILE_PATH)) 

CICLO = ""

with open(F1_FILE_PATH, 'r', encoding='utf-8') as f_json:
    json_obj = json.load(f_json)
    #print(json_obj)
    CICLO = json_obj['c']
    PLEITOS = json_obj['pl']
    for pleito in PLEITOS:
        cd_pleito = pleito['cd']
        for eleicao in pleito['e']:
            cd_eleicao = eleicao['cd']
            if(cd_eleicao != '544'):
                continue
            eleicao_config_path = "{}/{}/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_eleicao)
            URL_CONFIG_ELEICAO = 'https://resultados.tse.jus.br/{}/{}/{}/config/mun-e{}-cm.json'.format("oficial", CICLO, cd_eleicao, cd_eleicao.zfill(6))
            #municipios_config_path = get_file(URL_CONFIG_ELEICAO, eleicao_config_path)
            #my_pool.add(Downloadable(URL_CONFIG_ELEICAO, eleicao_config_path))
            my_downloader.add((URL_CONFIG_ELEICAO, eleicao_config_path))
        my_downloader.run()
        #my_pool.start()

Running downloader for 1 urls


In [7]:
#STAGE 3
#GET ALL VOTING SECTIONS
F1_FILE_PATH = "{}/{}".format(BASE_FOLDER_PATH, "ele-c.json")
if (not os.path.isfile(F1_FILE_PATH)):
    raise Exception("F1 file does not exists in {}".format(F1_FILE_PATH)) 

CICLO = ""
with open(F1_FILE_PATH, 'r', encoding='utf-8') as f_json:
    json_obj = json.load(f_json)
    CICLO = json_obj['c']
    PLEITOS = json_obj['pl']
    for pleito in PLEITOS:
        cd_pleito = pleito['cd']
        for eleicao in pleito['e']:
            cd_eleicao = eleicao['cd']
            F2_CONFIG_PATH = "{}/{}/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_eleicao)

            F2_FILE_PATH = "{}/{}".format(F2_CONFIG_PATH, 'mun-e{}-cm.json'.format(cd_eleicao.zfill(6)))
            if (not os.path.isfile(F2_FILE_PATH)):
                continue
            print(F2_FILE_PATH)
            
            with open(F2_FILE_PATH, 'r', encoding='utf-8') as f_json:
                F3_CONFIG_PATH = "{}/{}/arquivo-urna/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_pleito)

                municipios_json = json.load(f_json)
                for uf in municipios_json['abr']:
                    UF_CODE = uf['cd'].lower()
                    F3_CONFIG_PATH_UF = "{}/{}".format(F3_CONFIG_PATH,UF_CODE)
                    URL_CONFIG_MUNICIPIOS = 'https://resultados.tse.jus.br/{}/{}/arquivo-urna/{}/config/{}/{}-p{}-cs.json'.format("oficial", CICLO, cd_pleito, UF_CODE, UF_CODE, cd_pleito.zfill(6))
                    #estado_config_path = get_file(URL_CONFIG_MUNICIPIOS,F3_CONFIG_PATH_UF)
                    #my_pool.add(Downloadable(URL_CONFIG_MUNICIPIOS, F3_CONFIG_PATH_UF))
#my_pool.start()
                    my_downloader.add((URL_CONFIG_MUNICIPIOS, F3_CONFIG_PATH_UF))
my_downloader.run()



./downloads/ele2022/544/config/mun-e000544-cm.json
Running downloader for 28 urls


In [8]:
#STAGE 4
#GET CONFIG FILE FOR EACH VOTING MACHINE
F1_FILE_PATH = "{}/{}".format(BASE_FOLDER_PATH, "ele-c.json")
if (not os.path.isfile(F1_FILE_PATH)):
    raise Exception("F1 file does not exists in {}".format(F1_FILE_PATH)) 

CICLO = ""
with open(F1_FILE_PATH, 'r', encoding='utf-8') as f_json:
    json_obj = json.load(f_json)
    CICLO = json_obj['c']
    PLEITOS = json_obj['pl']
    for pleito in PLEITOS:
        cd_pleito = pleito['cd']
        for eleicao in pleito['e']:
            cd_eleicao = eleicao['cd']
            F2_CONFIG_PATH = "{}/{}/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_eleicao)

            F2_FILE_PATH = "{}/{}".format(F2_CONFIG_PATH, 'mun-e{}-cm.json'.format(cd_eleicao.zfill(6)))
            if (not os.path.isfile(F2_FILE_PATH)):
                continue
            
            with open(F2_FILE_PATH, 'r', encoding='utf-8') as f_json:
                F3_CONFIG_PATH = "{}/{}/arquivo-urna/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_pleito)

                municipios_json = json.load(f_json)
                
                for uf in municipios_json['abr']:
                    UF_CODE = uf['cd'].lower()
                    
                    F3_CONFIG_PATH_UF = "{}/{}".format(F3_CONFIG_PATH,UF_CODE)
                    F3_FILE = '{}/{}'.format(F3_CONFIG_PATH_UF, '{}-p{}-cs.json'.format(UF_CODE, cd_pleito.zfill(6)))
                    
                    with open(F3_FILE, 'r', encoding='utf-8') as esf_json:
                        estado_json = json.load(esf_json)
                        for cidade in estado_json['abr'][0]['mu']:
                            cd_cidade = cidade['cd']
                            for zona in cidade['zon']:
                                cd_zona = zona['cd']
                                for secao in zona['sec']:
                                    cd_secao = secao['ns']
                                    
                                    DADOS_URNA_PATH = "{}/{}/arquivo-urna/{}/dados/{}/{}/{}/{}".format(BASE_FOLDER_PATH, CICLO, cd_pleito, UF_CODE, cd_cidade, cd_zona.zfill(4), cd_secao.zfill(4))
                                    DADOS_URNA_FILENAME = 'p{}-{}-m{}-z{}-s{}-aux.json'.format(cd_pleito.zfill(6), UF_CODE, cd_cidade.zfill(5),cd_zona.zfill(4), cd_secao.zfill(4))
                                    URL_DADOS_URNA = 'https://resultados.tse.jus.br/{}/{}/arquivo-urna/{}/dados/{}/{}/{}/{}/{}'.format("oficial", CICLO, cd_pleito, UF_CODE, cd_cidade, cd_zona.zfill(4), cd_secao.zfill(4), DADOS_URNA_FILENAME)
                                    
                                    if (os.path.isfile('{}/{}'.format(DADOS_URNA_PATH, DADOS_URNA_FILENAME))):
                                        continue

                                    #estado_config_path = get_file(URL_DADOS_URNA,DADOS_URNA_PATH)
                                    #my_pool.add(Downloadable(URL_DADOS_URNA, DADOS_URNA_PATH))
                    #my_pool.start()
                                    my_downloader.add((URL_DADOS_URNA, DADOS_URNA_PATH))
                    my_downloader.run()
        



Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls


In [9]:
#STAGE 5
#GET RDV FILE
F1_FILE_PATH = "{}/{}".format(BASE_FOLDER_PATH, "ele-c.json")
if (not os.path.isfile(F1_FILE_PATH)):
    raise Exception("F1 file does not exists in {}".format(F1_FILE_PATH)) 

CICLO = ""
with open(F1_FILE_PATH, 'r', encoding='utf-8') as f_json:
    json_obj = json.load(f_json)
    CICLO = json_obj['c']
    PLEITOS = json_obj['pl']
    for pleito in PLEITOS:
        cd_pleito = pleito['cd']
        for eleicao in pleito['e']:
            cd_eleicao = eleicao['cd']
            F2_CONFIG_PATH = "{}/{}/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_eleicao)

            F2_FILE_PATH = "{}/{}".format(F2_CONFIG_PATH, 'mun-e{}-cm.json'.format(cd_eleicao.zfill(6)))
            if (not os.path.isfile(F2_FILE_PATH)):
                continue
            
            with open(F2_FILE_PATH, 'r', encoding='utf-8') as f_json:
                F3_CONFIG_PATH = "{}/{}/arquivo-urna/{}/config".format(BASE_FOLDER_PATH, CICLO, cd_pleito)

                municipios_json = json.load(f_json)
                
                for uf in municipios_json['abr']:
                    UF_CODE = uf['cd'].lower()
                    #TODO RETIRAR RESTRIÇÂO POR UF
                    #if(UF_CODE != 'ac'):
                        #continue
                    
                    F3_CONFIG_PATH_UF = "{}/{}".format(F3_CONFIG_PATH,UF_CODE)
                    F3_FILE = '{}/{}'.format(F3_CONFIG_PATH_UF, '{}-p{}-cs.json'.format(UF_CODE, cd_pleito.zfill(6)))
                    
                    with open(F3_FILE, 'r', encoding='utf-8') as esf_json:
                        estado_json = json.load(esf_json)
                        for cidade in estado_json['abr'][0]['mu']:
                            cd_cidade = cidade['cd']
                            for zona in cidade['zon']:
                                cd_zona = zona['cd']
                                for secao in zona['sec']:
                                    cd_secao = secao['ns']
                                    
                                    DADOS_URNA_PATH = "{}/{}/arquivo-urna/{}/dados/{}/{}/{}/{}".format(BASE_FOLDER_PATH, CICLO, cd_pleito, UF_CODE, cd_cidade, cd_zona.zfill(4), cd_secao.zfill(4))
                                    URNA_DADOS_FILENAME = 'p{}-{}-m{}-z{}-s{}-aux.json'.format(cd_pleito.zfill(6), UF_CODE, cd_cidade.zfill(5),cd_zona.zfill(4), cd_secao.zfill(4))

                                    with open('{}/{}'.format(DADOS_URNA_PATH, URNA_DADOS_FILENAME), 'r', encoding='utf-8') as esf_json:
                                        urna_files_json = json.load(esf_json)
                                        urna_hash = urna_files_json['hashes'][0]['hash']
                                        DADOS_URNA_HASH_PATH = '{}/{}'.format(DADOS_URNA_PATH, urna_hash)
                                        if(urna_files_json['hashes'][0]['st'] == "Sem arquivo"):
                                            continue
                                        for file in urna_files_json['hashes'][0]['nmarq']:
                                            if (os.path.isfile('{}/{}'.format(DADOS_URNA_HASH_PATH, file))):
                                                continue
                                            
                                            URL_DADOS_URNA = 'https://resultados.tse.jus.br/{}/{}/arquivo-urna/{}/dados/{}/{}/{}/{}/{}/{}'.format("oficial", CICLO, cd_pleito, UF_CODE, cd_cidade, cd_zona.zfill(4), cd_secao.zfill(4), urna_hash, file)
                                            #dados_urna_file_path = get_file(URL_DADOS_URNA,DADOS_URNA_HASH_PATH)
                                            #my_pool.add(Downloadable(URL_DADOS_URNA, DADOS_URNA_HASH_PATH))
                    
                                #my_pool.start()
                                            my_downloader.add((URL_DADOS_URNA, DADOS_URNA_HASH_PATH))
                            my_downloader.run()

        



Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running downloader for 0 urls
Running do