# Regras Regex dos Atos

### Importações

In [2]:
import re
import pandas as pd

from os import listdir
from os.path import isfile, join

### Atos a serem extraidos 

In [3]:
df = pd.DataFrame([["Aposentadorias", "'CONCEDER', 'APOSENTAR'", "Não"],
                  ["Reversões", "REVERTER A ATIVIDADE", "Não"],
                  ["Atos tonados sem efeitos (Aposentadoria)", "TORNAR SEM EFEITO", "Sim"]],
                  index = [4, 5, 6],
                  columns = ["Atos", "Regex", "Multiplos"])

style_df = pd.DataFrame(df).style.set_properties(**{'text-align': 'left'}).set_table_styles([ dict(selector='th', props=[('text-align','left')] ) ])
style_df


Unnamed: 0,Atos,Regex,Multiplos
4,Aposentadorias,"'CONCEDER', 'APOSENTAR'",Não
5,Reversões,REVERTER A ATIVIDADE,Não
6,Atos tonados sem efeitos (Aposentadoria),TORNAR SEM EFEITO,Sim


### Funções de Auxilio

In [4]:
def print_dataframe(df):
    style_df = (df.style.set_properties(**{'text-align': 'left'})
                                        .set_table_styles([ dict(selector='th',
                                                                 props=[('text-align','left')])])
                   )
    return style_df

def get_txts(path):
    years = [join(path, x) for x in listdir(path) if not isfile(join(path, x))]
    txts = []
    for year in years:
        months = [join(year, x) for x in listdir(year) if not isfile(join(year, x))]
        for month in months:
            txts += [join(month, x) for x in listdir(month) if isfile(join(month, x))]
    return txts


# def print_database_dfs(files_path, ato):
#     res_dfs = []
#     for txt in files_path:
#         txt_str = open(txt, "r").read()
#         ret = Retirements(txt_str)
#         if not ret.data_frame.empty:
#             res_dfs.append(ret.data_frame)

#     rets_final = pd.concat([pd.DataFrame(df) for df in res_dfs],
#                             ignore_index=True)
#     print_dataframe(rets_final)


### Diretórios

In [6]:
dodfs_space_dir = "../data/dodfs_txt_espaco"
dodfs_space_files = get_txts(dodfs_space_dir)

dodfs_n_dir = "../data/dodfs_txt_barra_n"
dodfs_n_files = get_txts(dodfs_n_dir)

output = "./results"

## Classes

In [7]:
test_file = open(dodfs_n_files[1], "r")
test_txt = test_file.read()

### Aposentadorias

In [None]:
class Retirements(Regex):
    
    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Tipo do Ato", "SEI", "Nome", "Matrícula", "Tipo de Aposentadoria", "Cargo", "Classe",
               "Padrao", "Quadro", "Fundamento Legal", "Orgao", "Vigencia", "Matricula SIAPE"]
        
        self.rules = {"nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,|\s]",
                      "tipo_ret": "",
                      "cargo": "Cargo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "quadro": "d?[e|a|o]?(Quadro[\s\S]*?)[,|;|.]",
                      "fundamento": "nos\stermos\sdo\s([\s\S]*?),\sa?\s",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "vigencia": "",
                      "siape": "[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]"}
                      
        self._raw_acts = self._extract_instances()   
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Aposentadoria"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"

        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(APOSENTAR|CONCEDER\sAPOSENTADORIA),?\s?"
        body = "([\s\S]*?)"
        end = "[P|p]rocesso:?\s[s|S]?[e|E]?[i|I]?\s?[n|N]?[o|O]?\s?([\s\S]*?)[.]\s"
        rule = start + body + end
        found = self.find_all(rule)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
            
        return results

### Reversões

In [None]:
class Revertions(Regex):

    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Tipo do Ato", "SEI", "Nome", "Matrícula", "Cargo", "Classe",
               "Padrao", "Quadro", "Fundamento Legal", "Orgao", "Vigencia", "Matricula SIAPE"]
        
        self.rules = {"nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,| ]",
                      "cargo": "[C|c]argo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "quadro": "d?[e|a|o]?(Quadro[\s\S]*?)[,|;|.]",
                      "fundamento": "nos\stermos\sdo\s([\s\S]*?),\sa?\s",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "vigencia": "",
                      "siape": ""}
                      
        self._raw_acts = self._extract_instances()   
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Reversão"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"
        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(reverter\sa\satividade),?\s?"
        body = "([\s\S]*?)"
        end = "[P|p]rocesso:?\s[s|S]?[e|E]?[i|I]?\s?[n|N]?[o|O]?\s?([\s\S]*?)[.]\s"
        end2 = "Processo\sde\sReversao:?\sn?\s?([\s\S]*?)[.]\s"
        end3 = "Processo\sde\sReversao\sSigiloso:?\s([\s\S]*?)[.]\s"
        end4 = "Processo\sde\sReversao\sPGDF\sSEI:?\s([\s\S]*?)[.]\s"
        rule = start + body + end
        found = self.find_all(rule, re.IGNORECASE)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
        return results


## Abono de Permanência

In [11]:
class stayAllowance(Regex):
    
    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Nome do Servidor", "Matrícula", "Cargo Efetivo", "Classe", 
                         "Padrão", "Quadro pessoal permanente ou Suplementar",
                         "Fundamento Legal do abono de permanência", "Órgão" 
                         "Processo GDF/SEI", "Vigencia","Matricula SIAPE"]
        
        self.rules = {"nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,|\s]",
                      "cargo": "Cargo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "quadro": "d?[e|a|o]?(Quadro[\s\S]*?)[,|;|.]",
                      "fundamento": "nos\stermos\sdo\s([\s\S]*?),\sa?\s",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "processo": "Processo SEI: [\s\S]*?\.\n",
                      "vigencia": "a contar de [\s\S]*?\,",
                      "siape": "[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]"}
                      
        self._raw_acts = self._extract_instances()   
        #self._acts = self._acts_props()
        #self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Abono Permência"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"

        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(Abono DE PERMANENCIA (ao|equiva)[\s\S]*?)\s"
        body = "([\s\S]*?)"
        end = "(\.\n)"
        rule = start + body + end
        rule = re.compile("(Abono DE PERMANENCIA (ao|equiva)[\s\S]*?\.\n)", re.IGNORECASE)
        found = self.find_all(rule)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
            
        return results

## Retificações - (Aposentadorias)

In [None]:
class ratificationsRetirements(Regex):
    
    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Tipo de Documento", "Número do documento", "Data do documento ",
                         "Número do DODF", "Data do DODF", "Página do DODF", "Nome do Servidor",
                         "Matrícula", "Cargo", "Classe", "Padrao", "Matricula SIAPE",
                         "Informação Errada", "Informação Corrigida"]
        
        self.rules = {"Tipo doc": "",
                      "Num doc": "",
                      "Data doc": "",
                      "Num dodf": "",
                      "Data dodf": "",
                      "Pag dodf": "",
                      "nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,|\s]",
                      "cargo": "Cargo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "siape": "[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]",
                      "Info errada": "",
                      "Info corrigida": ""
                     }
                      
        self._raw_acts = self._extract_instances()   
        #self._acts = self._acts_props()
        #self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Retificações-(Aposentadorias)"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"

        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(RETIFICAR,.*?ato\sque\sconcedeu\saposentadoria[\s\S]*?)"
        body = ""
        end = "(\.\n)"
        rule = start + body + end
        #rule = re.compile("(RETIFICAR,.*?ato\sque\sconcedeu\saposentadoria[\s\S]*?\.\n)", re.IGNORECASE)
        found = self.find_all(rule)
        results = {}
        for instance in found:
            *start, body, sei = instance
            results[sei] = body
        
        print(results)
        return results

## Substituição de Funções

In [None]:
class roleReplacement(Regex):
    
    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Nome do Servidor Substituto", "Matrícula do Servidor Substituto", 
                         "Nome do Servidor a ser Substituido", "Matrícula do Servidor a ser Substituido"
                         "Cargo", "Símbolo do cargo do servidor substituto",
                         "Cargo comissionado objeto da substituição",
                         "Símbolo do cargo comissionado objeto da substituição",
                         "Hierarquia da Lotação", "Órgão", "Data Inicial da Vigência", 
                         "Data Final de Vigência", "Matrícula SIAPE", "Motivo"]
        
        self.rules = {"Nome Serv Substituto": "",
                      "Matricula Serv Substituto": "",
                      "Nome do Servidor a ser Substituido": "",
                      "Matrícula do Servidor a ser Substituido": "",
                      "cargo": "Cargo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "Símbolo do cargo do servidor substituto": "",
                      "Cargo comissionado objeto da substituição": "",
                      "Hierarquia da Lotação": "",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "Data Inicial da Vigência": "",
                      "Data Final de Vigência": "",
                      "siape": "[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]",
                      "Motivo": ""
                     }
                      
        self._raw_acts = self._extract_instances()   
        #self._acts = self._acts_props()
        #self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Substituição de Funções"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"

        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(DESIGNAR)"
        body = "(.*?para\ssubstituir[\s\S]*?)"
        end = "(\.\n)"
        rule = start + body + end
        found = self.find_all(rule)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
        
        print(results)
        return results

## Exoeração

In [None]:
class Exoneracao(Regex):
    
    def __init__(self,text):
        super().__init__(text)
        self._columns = ['nome','matricula','simbolo','cargo_comissao','lotacao','orgao','vigencia','pedido',
                         'cargo_efetivo','siape','motivo']
        self.rules = {
            "nome": r"([A-ZÀ-Ž\s]+[A-ZÀ-Ž])",
            "matricula": r"matr[í|i]cula\s?n?o?\s([\s\S]*?)[,|\s]",
            "simbolo": r"[S|s][í|i]mbolo\s?n?o?\s([\s\S]*?)[,|\s]",
            "cargo_comissao": "",
            "lotacao": "",
            "orgao": "",
            "vigencia": "",
            "pedido": r"(a pedido)",
            "cargo_efetivo": "",
            "siape": r"[S|s][I|i][A|a][P|p][E|e]\s[N|n]*[o|O]*\s?([\s\S]*?)[,| | .]",
            "motivo": ""
        }
        self._raw_acts = self._extract_instances()
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, act_raw):
        act = {}
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"
        return act
    
    def _acts_props(self):
        acts = []
        for raw in self._raw_acts:
            act = self._act_props(raw)
            acts.append(act)
        return acts        
        
    def _extract_instances(self):
        start = r"(EXONERAR)"
        body = r"([\s\S]*?)"
        end = "\."
        rule = start + body + end
        found = self.find_all(rule)
        results = []
        for instance in found:
            start, body = instance
            results.append(body)
            
        return results

## Nomeação

In [None]:
class NomeacaoComissionados(Regex):
    
    def __init__(self,text):
        super().__init__(text)
        self._columns = ['nome','cargo_efetivo','matricula','siape','simbolo','cargo_comissao','lotacao','orgao']
        self.rules = {
            "nome": r"(^[A-ZÀ-Ž\s]+[A-ZÀ-Ž])",
            "cargo_efetivo": r"",
            "matricula": r"matr[í|i]cula\s?n?o?\s([\s\S]*?)[,|\s]",
            "siape": r"[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]",
            "simbolo": r"[S|s][í|i]mbolo\s?n?o?\s([\s\S]*?)[,|\s]",
            "cargo_comissao": "",
            "lotacao": "",
            "orgao": ""
        }
        self._raw_acts = self._extract_instances()
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, act_raw):
        act = {}
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"
        return act
    
    def _acts_props(self):
        acts = []
        for raw in self._raw_acts:
            act = self._act_props(raw)
            acts.append(act)
        return acts        
        
    def _extract_instances(self):
        start = r"(NOMEAR)"
        body = r"([\s\S]*?)"
        end = "\."
        rule = start + body + end
        found = self.find_all(rule)
        results = []
        for instance in found:
            start, body = instance
            results.append(body)
            
        return resultsclass NomeacaoComissionados(Regex):
    
    def __init__(self,text):
        super().__init__(text)
        self._columns = ['nome','cargo_efetivo','matricula','siape','simbolo','cargo_comissao','lotacao','orgao']
        self.rules = {
            "nome": r"(^[A-ZÀ-Ž\s]+[A-ZÀ-Ž])",
            "cargo_efetivo": r"",
            "matricula": r"matr[í|i]cula\s?n?o?\s([\s\S]*?)[,|\s]",
            "siape": r"[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]",
            "simbolo": r"[S|s][í|i]mbolo\s?n?o?\s([\s\S]*?)[,|\s]",
            "cargo_comissao": "",
            "lotacao": "",
            "orgao": ""
        }
        self._raw_acts = self._extract_instances()
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, act_raw):
        act = {}
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"
        return act
    
    def _acts_props(self):
        acts = []
        for raw in self._raw_acts:
            act = self._act_props(raw)
            acts.append(act)
        return acts        
        
    def _extract_instances(self):
        start = r"(NOMEAR)"
        body = r"([\s\S]*?)"
        end = "\."
        rule = start + body + end
        found = self.find_all(rule)
        results = []
        for instance in found:
            start, body = instance
            results.append(body)
            
        return results