In [1]:
%load_ext autoreload
%autoreload 0

In [41]:
import functools
import json
import os

import numpy as np
import pandas as pd
from scrapy.utils.project import get_project_settings
from tqdm.notebook import tqdm

from tse.common.ballot_box_files import (BallotBoxFileType,
                                         get_ballot_box_files_map,
                                         read_ballot_box_logs)
from tse.common.pathinfo import PathInfo
from tse.parsers import SectionAuxParser, SectionsConfigParser, CityConfigParser

In [50]:
%autoreload

In [5]:
settings = get_project_settings()
plea = settings["PLEA"]
elections = settings["ELECTIONS"]
states= settings["STATES"]

display(settings["ENVIRONMENT"])
display(settings["CYCLE"])
display(plea)
display(elections)
display(' '.join(states))


'oficial'

'ele2022'

'406'

['544', '546', '548']

'br ac al am ap ba ce df es go ma mg ms mt pa pb pe pi pr rj rn ro rr rs sc se sp to zz'

In [6]:
@functools.lru_cache(500000)
def load_json(path):
    with open(PathInfo.get_local_path(settings, path), "r") as f:
        return json.load(f)

In [11]:
def get_sections():
    for state in states:
        if state == "br":
            continue

        section_config_path = PathInfo.get_sections_config_path(plea, state)
        
        config_data = load_json(section_config_path)

        for city, zone, section in SectionsConfigParser.expand_sections(config_data):
            aux_path = PathInfo.get_section_aux_path(plea, state, city, zone, section)
            aux_data = load_json(aux_path)
            hash, hashdate, filenames = SectionAuxParser.get_files(aux_data)
            yield ((state, city, zone, section), (hash, hashdate, filenames))

all_section_files = dict(tqdm(get_sections(), total=472075))

  0%|          | 0/472075 [00:00<?, ?it/s]

In [17]:
mux = pd.MultiIndex.from_tuples(all_section_files.keys(), names=["state", "city", "zone", "section"])
df_all_section_files = pd.DataFrame(list(all_section_files.values()), index=mux, columns=["hash", "hashdate", "files"])
df_all_section_files

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,hash,hashdate,files
state,city,zone,section,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ac,1066,4,77,395459446c754b34572b56304a706a6a413454646f6f5a...,2022-10-02 19:06:03,"[o00406-0106600040077.vscmr, o00406-0106600040..."
ac,1066,4,78,76366235735557583732434d586f62466765505a6c4169...,2022-10-02 18:52:38,"[o00406-0106600040078.vscmr, o00406-0106600040..."
ac,1066,4,79,7137566a703779784b472d63676a2d324e743856527946...,2022-10-02 18:52:43,"[o00406-0106600040079.logjez, o00406-010660004..."
ac,1066,4,80,7071334531373673564c445a787a6d626772646c36704a...,2022-10-02 19:05:44,"[o00406-0106600040080.bu, o00406-0106600040080..."
ac,1066,4,115,5257727135536c66657735443649706f57394637457556...,2022-10-02 19:06:08,"[o00406-0106600040115.bu, o00406-0106600040115..."
...,...,...,...,...,...,...
zz,39187,1,1428,,NaT,
zz,39187,1,3011,,NaT,
zz,39225,1,931,,NaT,
zz,99180,1,1228,4f31656c37516d52532d756c362d444378377a4c35597a...,2022-10-02 22:57:46,"[o00406-9918000011228.logsajez, o00406-9918000..."


In [28]:
df_all_section_files.groupby(["state", "city", "zone"]).size().sort_values(ascending=False)

state  city   zone
sp     71072  375     787
              374     754
              372     742
              253     691
              376     690
                     ... 
zz     29912  1         1
       29904  1         1
       29890  1         1
       29882  1         1
       99473  1         1
Length: 6283, dtype: int64

In [51]:
def get_cities():
    cities_config_path = PathInfo.get_cities_config_path(elections[0])
    for state, city, city_ibge, name, is_capital, zones in CityConfigParser.expand_cities(load_json(cities_config_path)):
        yield ((state, city), (city_ibge, name, is_capital, zones))

all_cities = dict(get_cities())

In [52]:
mux = pd.MultiIndex.from_tuples(all_cities.keys(), names=["state", "city"])
df_all_cities = pd.DataFrame(list(all_cities.values()), index=mux, columns=["city_ibge", "name", "is_capital", "zones"])
df_all_cities

Unnamed: 0_level_0,Unnamed: 1_level_0,city_ibge,name,is_capital,zones
state,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ac,1120,1200013,ACRELÂNDIA,False,[8]
ac,1570,1200054,ASSIS BRASIL,False,[6]
ac,1058,1200104,BRASILÉIA,False,[6]
ac,1007,1200138,BUJARI,False,[9]
ac,1015,1200179,CAPIXABA,False,[2]
...,...,...,...,...,...
to,96199,1721109,TOCANTÍNIA,False,[5]
to,73458,1721257,TUPIRAMA,False,[23]
to,73237,1721307,TUPIRATINS,False,[6]
to,96652,1722081,WANDERLÂNDIA,False,[27]


In [85]:
df_all_cities[df_all_cities["name"] == "SÃO PAULO"]
df_all_cities.loc["sp", "71072"]

city_ibge                                               3550308
name                                                  SÃO PAULO
is_capital                                                 True
zones         [20, 248, 250, 256, 373, 376, 392, 2, 247, 249...
Name: (sp, 71072), dtype: object

In [95]:
row = df_all_section_files.loc["ac", "1066", "4", "77"]
log_filename = get_ballot_box_files_map(row["files"])[BallotBoxFileType.LOG]
log_path = PathInfo.get_ballot_box_file_path(plea, "ac", "1066", "4", "77", row["hash"], log_filename)
log_path

'arquivo-urna/406/dados/ac/01066/0004/0077/395459446c754b34572b56304a706a6a413454646f6f5a6f5664426f5169564241506566444932644f75493d/o00406-0106600040077.logjez'

In [96]:
logs = dict(read_ballot_box_logs(PathInfo.get_local_path(settings, log_path)))
df = logs["o00406-0106600040077.logjez"]
df

Unnamed: 0,timestamp,level,id_ballot_box,app,message,hash
0,2022-09-22 14:11:11,INFO,67305985,LOGD,Início das operações do logd,8205594207814213261
1,2022-09-22 14:11:11,INFO,67305985,LOGD,Urna ligada em 22/09/2022 às 14:09:59,4421815302842738656
2,2022-09-22 14:11:11,INFO,67305985,SCUE,Iniciando aplicação - Oficial - 1º turno,396608720597996448
3,2022-09-22 14:11:11,INFO,67305985,SCUE,Versão da aplicação: 8.26.0.0 - Onça-pintada,17957230827251748098
4,2022-09-22 14:11:13,INFO,67305985,SCUE,Urna operando com rede elétrica,15157650888632883953
...,...,...,...,...,...,...
5356,2022-10-02 15:52:57,INFO,67305985,VOTA,Gerando arquivo de resultado [.ver] + [Início],612076369282010836
5357,2022-10-02 15:52:57,INFO,67305985,VOTA,Gerando arquivo de resultado [.ver] + [Término],14799536291532103694
5358,2022-10-02 15:52:57,INFO,67305985,VOTA,Gerando arquivo de resultado [.chvtp] + [Início],17677498444101028370
5359,2022-10-02 15:52:57,INFO,67305985,VOTA,Gerando arquivo de resultado [.chvtp] + [Término],12710055081428421582
