# Regras Regex dos Atos

### Importações

In [None]:
import re
import pandas as pd

from os import listdir
from os.path import isfile, join



### Atos a serem extraidos 

In [None]:
df = pd.DataFrame([["Aposentadorias", "'CONCEDER', 'APOSENTAR'", "Não"],
                  ["Reversões", "REVERTER A ATIVIDADE", "Não"],
                  ["Atos tonados sem efeitos (Aposentadoria)", "TORNAR SEM EFEITO", "Sim"]],
                  index = [4, 5, 6],
                  columns = ["Atos", "Regex", "Multiplos"])

style_df = pd.DataFrame(df).style.set_properties(**{'text-align': 'left'}).set_table_styles([ dict(selector='th', props=[('text-align','left')] ) ])
style_df


In [None]:
def print_dataframe(df):
    style_df = (df.style.set_properties(**{'text-align': 'left'})
                                        .set_table_styles([ dict(selector='th',
                                                                 props=[('text-align','left')])])
                   )
    return style_df

def get_txts(path):
    years = [join(path, x) for x in listdir(path) if not isfile(join(path, x))]
    txts = []
    for year in years:
        months = [join(year, x) for x in listdir(year) if not isfile(join(year, x))]
        for month in months:
            txts += [join(month, x) for x in listdir(month) if isfile(join(month, x))]
    return txts



### Diretórios

In [None]:
dodfs_space_dir = "./data/dodfs_txt_espaco"
dodfs_space_files = get_txts(dodfs_space_dir)

dodfs_n_dir = "./data/dodfs_txt_barra_n"
dodfs_n_files = get_txts(dodfs_n_dir)

output = "./results"

## Classes

In [None]:
test_file = open(dodfs_n_files[1], "r")
test_txt = test_file.read()

### REGEX (Classe Base)

In [None]:
class Regex:
    
    def __init__(self, text):
        self._text = text
        self._raw_acts = {}
        self._acts = []
        self._columns = []
        self.data_frame = pd.DataFrame()
    
    def find_all(self, rule, flag=0):
        return re.findall(rule, self._text, flags=flag)
    
    def find_in_act(self, rule, act):
        match = re.search(rule, act) 
        if match:
            return match.groups()
        return "nan"
    
    def _build_dataframe(self):
        if len(self._acts) > 0:
            df = pd.DataFrame(self._acts)
            df.columns = self._columns
            return df
        return pd.DataFrame()
    

### Aposentadorias

In [None]:
class Retirements(Regex):
    
    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Tipo do Ato", "SEI", "Nome", "Matrícula", "Tipo de Aposentadoria", "Cargo", "Classe",
               "Padrao", "Quadro", "Fundamento Legal", "Orgao", "Vigencia", "Matricula SIAPE"]
        
        self.rules = {"nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,|\s]",
                      "tipo_ret": "",
                      "cargo": "Cargo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "quadro": "d?[e|a|o]?(Quadro[\s\S]*?)[,|;|.]",
                      "fundamento": "nos\stermos\sdo\s([\s\S]*?),\sa?\s",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "vigencia": "",
                      "siape": "[S|s][I|i][A|a][P|p][E|e]\s[N|n]?[o|O]?\s([\s\S]*?)[,| | .]"}
                      
        self._raw_acts = self._extract_instances()   
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Aposentadoria"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"

        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(APOSENTAR|CONCEDER\sAPOSENTADORIA),?\s?"
        body = "([\s\S]*?)"
        end = "[P|p]rocesso:?\s[s|S]?[e|E]?[i|I]?\s?[n|N]?[o|O]?\s?([\s\S]*?)[.]\s"
        rule = start + body + end
        found = self.find_all(rule)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
            
        return results

In [None]:
res_dfs = []
for txt in dodfs_n_files:
    txt_str = open(txt, "r").read()
    ret = Retirements(txt_str)
    if not ret.data_frame.empty:
        res_dfs.append(ret.data_frame)

rets_final = pd.concat([pd.DataFrame(df) for df in res_dfs],
                        ignore_index=True)
print_dataframe(rets_final)



### Reversões

In [None]:
class Revertions(Regex):

    def __init__(self, text):
        super().__init__(text)
        self._columns = ["Tipo do Ato", "SEI", "Nome", "Matrícula", "Cargo", "Classe",
               "Padrao", "Quadro", "Fundamento Legal", "Orgao", "Vigencia", "Matricula SIAPE"]
        
        self.rules = {"nome": "\s([^,]*?),\smatricula",
                      "matricula":"matricula\s?n?o?\s([\s\S]*?)[,| ]",
                      "cargo": "[C|c]argo\s[d|D]?[e|E]?\s([\s\S]*?),",
                      "classe": "[C|c]lasse\s([\s\S]*?),",
                      "padrao": "[p|P]adr[a|ã]o\s([\s\S]*?),",
                      "quadro": "d?[e|a|o]?(Quadro[\s\S]*?)[,|;|.]",
                      "fundamento": "nos\stermos\sdo\s([\s\S]*?),\sa?\s",
                      "orgao": "Lotacao: ([\s\S]*?)[.]",
                      "vigencia": "",
                      "siape": ""}
                      
        self._raw_acts = self._extract_instances()   
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
        
    
    def _act_props(self, sei, act_raw):
        act = {}
        act["tipo_ato"] = "Reversão"
        act["sei"] = sei
        for key in self.rules:
            try:
                act[key], = self.find_in_act(self.rules[key], act_raw)
            except:
                act[key] = "nan"
        return act
    
    def _acts_props(self):
        acts = []
        for sei, raw in self._raw_acts.items():
            act = self._act_props(sei, raw)
            acts.append(act)
        return acts        
        
    
    def _extract_instances(self):
        start = "(reverter\sa\satividade),?\s?"
        body = "([\s\S]*?)"
        end = "[P|p]rocesso:?\s[s|S]?[e|E]?[i|I]?\s?[n|N]?[o|O]?\s?([\s\S]*?)[.]\s"
        end2 = "Processo\sde\sReversao:?\sn?\s?([\s\S]*?)[.]\s"
        end3 = "Processo\sde\sReversao\sSigiloso:?\s([\s\S]*?)[.]\s"
        end4 = "Processo\sde\sReversao\sPGDF\sSEI:?\s([\s\S]*?)[.]\s"
        rule = start + body + end
        found = self.find_all(rule, re.IGNORECASE)
        results = {}
        for instance in found:
            start, body, sei = instance
            results[sei] = body
        return results


### Atos Tornados Sem Efeito (Aposentadoria)

In [None]:
res_dfs = []
for txt in dodfs_n_files:
    txt_str = open(txt, "r").read()
    rev = Revertions(txt_str)
    if not rev.data_frame.empty:
        res_dfs.append(rev.data_frame)

revs_final = pd.concat([pd.DataFrame(df) for df in res_dfs],
           ignore_index=True)
print_dataframe(revs_final)


In [None]:
class NonEffect(Regex):

    def __init__(self, text):
        super().__init__(text)
        self._columns = []
        
        self.rules = {}
                      
        self._raw_acts = self._extract_instances()   
        self._acts = self._acts_props()
        self.data_frame = self._build_dataframe()
        
    
    def _act_props(self, sei, act_raw):
#         act = {}
#         act["tipo_ato"] = "Aposentadoria"
#         act["sei"] = sei
#         for key in self.rules:
#             try:
#                 act[key], = self.find_in_act(self.rules[key], act_raw)
#             except:
#                 act[key] = "nan"

        return act
    
    def _acts_props(self):
#         acts = []
#         for sei, raw in self._raw_acts.items():
#             act = self._act_props(sei, raw)
#             acts.append(act)
#         return acts        
        
    
    def _extract_instances(self):
#         start = "(APOSENTAR|CONCEDER\sAPOSENTADORIA),?\s?"
#         body = "([\s\S]*?)"
#         end = "[P|p]rocesso:?\s[s|S]?[e|E]?[i|I]?\s?[n|N]?[o|O]?\s?([\s\S]*?)[.]\s"
#         rule = start + body + end
#         found = self.find_all(rule)
#         results = {}
#         for instance in found:
#             start, body, sei = instance
#             results[sei] = body
            
#         return results



#### Testes Unitários