# Preprocessamento:

+ Criar variável que identifique o anotador

+ Mover tags que começam com token ' ' (vazio)

+ Remover linhas com '\n' seguidos

+ REGEX:
    + Garantir letra e números onde tamanho for maior que 1.
    + Passar múltiplos símbolos para outra linha. Exemplo:  §3º -->  § \n 3 \n º
    + Remover pontuação de centenas dos números. Exemplo: 12.200 --> 12200

+ Visualização das sentenças com displacy (from spacy import displacy) 

+ Incluir POS tagging.

+ Tranformar o dado $x_i$ em um $x'_i$ que incorpora os 2 últimos e próximos tokens.

# Classificador

+ Visualização: Separar o conjunto de test em 2 ou 3 arquivos e visualizar o que o modelo classificou e o que os anotadores classificaram (separar por id do anotador).

+ Parâmetros utilizados no classificador.

+ Analisar o formato dos dados que tem maior acerto e menor acerto tambem.

+ Para criar um contexto no erro imprimir 10 palavras antes e depois de dois erros.


# Instruções para os anotadores:

+ Atentar à marcação de tags que envolve espaço 

+ Atentar para não incluir espaço no início da Tag

# Organização do diretório: 

+ Manter toda a análise em somente um diretório

+ Formato do diretório com os datasets: /resources/dataset/

+ Notebooks:
    + '01 - Processamento.ipynb'
        + Gerar 'treino.csv' e teste.csv' para processamento
    + '02 - [CRF].ipynb' - Criando o modelo
        + Usar arquivo dos dados preprocessados gerado pelo notebook 1.
        + Gerar modelo (xxx.model)
    + '03 - Metricas.ipynb'

In [1]:
import os
import glob
import pandas as pd
import numpy as np
import re 
from collections import Counter

from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV
import time
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

In [2]:
# Encontra todos os csv's dentro das pastas de 'mock'

extension = 'csv'
all_filenames = [i for i in glob.glob('mock/*/**/***/****.{}'.format(extension))]

In [3]:
all_filenames[:3]

['mock/161704902/[PRATICA_ETAPA_1]/Documentos/20180510_Rcl_22328_314302526.ner.csv',
 'mock/161704902/[PRATICA_ETAPA_1]/Documentos/20170904_ARE_1062176_312610974.ner.csv',
 'mock/161704902/[PRATICA_ETAPA_1]/Documentos/20161121_ARE_1009041_310751428.ner.csv']

# Preprocessamento 

In [4]:
# Cria uma tag de inicio e fim de arquivo em cada 'csv' antes de apendar todos eles.

frames = []
for all_files in all_filenames:
    df = pd.read_csv(all_files,delimiter=';', na_values='NaN') # Lê o arquivo
    df['Tag'].iloc[0] , df['Tag'].iloc[-1] = ['INICIO_ARQ', 'FIM_ARQ'] # Altera a primeira e ultima Tag desse csv
    frames.append(df) # Adiciona esse dataframe no 'dataframe maior'
    
combined_csv = pd.concat(frames).reset_index(drop=True)
combined_csv.to_csv("combined_csv.csv",index=False,encoding='utf-8') # Cria um arquivo com todas as anotações.
combined_csv['Token'] = combined_csv['Token'].astype('str')

In [5]:
combined_csv.head(15), combined_csv.tail(20)

(         Token         Tag
 0       Ementa  INICIO_ARQ
 1                        O
 2            e           O
 3                        O
 4      Acórdão           O
 5                        O
 6   06/03/2018           O
 7                        O
 8     PRIMEIRA           O
 9                        O
 10       TURMA           O
 11                       O
 12          \n           O
 13          \n           O
 14  RECLAMAÇÃO           O,                               Token      Tag
 967890                           de        O
 967891                                     O
 967892                     Oliveira        O
 967893                                     O
 967894                       Duarte        O
 967895                                     O
 967896                           \n        O
 967897                           \n        O
 967898                    Assessora        O
 967899                            -        O
 967900                        Chefe        O


In [6]:
print("Número de linhas dos arquivos concatenados:", len(combined_csv['Tag']))

Número de linhas dos arquivos concatenados: 967910


In [7]:
combined_csv[-10:] # Conferindo se o index foi resetado

Unnamed: 0,Token,Tag
967900,Chefe,O
967901,,O
967902,do,O
967903,,O
967904,Plenário,O
967905,\n,O
967906,id,O
967907,:,O
967908,,O
967909,20141203_ADI_4350_285683668,FIM_ARQ


# Encontra parágrafo duplo no arquivo. Uma opção de separar por sentenças.

In [8]:
a_df = combined_csv #Simplifica o nome do arquivo para a função nao ficar grande demais.
starts = a_df[a_df['Token']=='\n'].index & a_df[a_df['Token'].shift(-1)=='\n'].index #Identifica os paragrafos duplos
print(u'Padrões(sentenças) encontrados:', len(starts))

Padrões(sentenças) encontrados: 9708


In [9]:
%%time

combined_csv['Sentence #'] = 'Sentence'

combined_csv['Sentence #'][:starts[0]+2] = 'Sentence %d'%(1) # Primeira sentença
combined_csv['Sentence #'][starts[-1]+2:] = 'Sentence %d'%(len(starts)+1) # Última sentença

for i in range(1,len(starts)):
    combined_csv['Sentence #'][starts[i-1]+2:starts[i]+2] = 'Sentence %d'%(i+1) 

combined_csv.head(), combined_csv.tail()

CPU times: user 5min 48s, sys: 271 ms, total: 5min 49s
Wall time: 5min 50s


(     Token         Tag  Sentence #
 0   Ementa  INICIO_ARQ  Sentence 1
 1                    O  Sentence 1
 2        e           O  Sentence 1
 3                    O  Sentence 1
 4  Acórdão           O  Sentence 1,
                               Token      Tag     Sentence #
 967905                           \n        O  Sentence 9709
 967906                           id        O  Sentence 9709
 967907                            :        O  Sentence 9709
 967908                                     O  Sentence 9709
 967909  20141203_ADI_4350_285683668  FIM_ARQ  Sentence 9709)

In [10]:
# Número de sentenças
len(combined_csv['Sentence #'].unique())

9709

# Atualiza a Tag que termina com 'Doutrinador' para 'Doutrina'

In [11]:
combined_csv.Tag.unique()

array(['INICIO_ARQ', 'O', 'B_Pessoa', 'I_Pessoa', 'B_Precedente',
       'I_Precedente', 'B_Ref. Legislativa', 'I_Ref. Legislativa',
       'B_Doutrina', 'I_Doutrina', 'FIM_ARQ', 'B_Doutrinador',
       'I_Doutrinador'], dtype=object)

In [12]:
# Strip o final 'dor' de todo o DataFrame (formato extremo)
# combined_csv.Tag = combined_csv.Tag.str.rstrip('dor')

In [13]:
# indices de onde a tag ocorre
indx = combined_csv[combined_csv.Tag.str.endswith('Doutrinador')].index.values

### Rodar apenas uma vez

In [14]:
%%time
# Demorado e custoso
for i in range(len(indx)):
    combined_csv.Tag.iloc[indx[i]] = combined_csv.Tag.iloc[indx[i]].rstrip('dor')

CPU times: user 4min 21s, sys: 192 ms, total: 4min 21s
Wall time: 4min 22s


In [15]:
combined_csv.Tag.unique()

array(['INICIO_ARQ', 'O', 'B_Pessoa', 'I_Pessoa', 'B_Precedente',
       'I_Precedente', 'B_Ref. Legislativa', 'I_Ref. Legislativa',
       'B_Doutrina', 'I_Doutrina', 'FIM_ARQ'], dtype=object)

# Remove enter duplo depois de criar as sentenças

In [18]:
# Teste para ver os casoso onde ocorre enter duplo.
for i in range(len(starts)):
    print(combined_csv.iloc[starts[i]:starts[i]+2][:5])

   Token Tag  Sentence #
12    \n   O  Sentence 1
13    \n   O  Sentence 1
   Token Tag  Sentence #
24    \n   O  Sentence 2
25    \n   O  Sentence 2
   Token Tag  Sentence #
98    \n   O  Sentence 3
99    \n   O  Sentence 3
    Token Tag  Sentence #
114    \n   O  Sentence 4
115    \n   O  Sentence 4
    Token Tag  Sentence #
161    \n   O  Sentence 5
162    \n   O  Sentence 5
    Token       Tag  Sentence #
204    \n  I_Pessoa  Sentence 6
205    \n  I_Pessoa  Sentence 6
    Token       Tag  Sentence #
208    \n  I_Pessoa  Sentence 7
209    \n  I_Pessoa  Sentence 7
    Token Tag  Sentence #
212    \n   O  Sentence 8
213    \n   O  Sentence 8
    Token Tag  Sentence #
263    \n   O  Sentence 9
264    \n   O  Sentence 9
    Token Tag   Sentence #
327    \n   O  Sentence 10
328    \n   O  Sentence 10
    Token Tag   Sentence #
399    \n   O  Sentence 11
400    \n   O  Sentence 11
    Token Tag   Sentence #
467    \n   O  Sentence 12
468    \n   O  Sentence 12
    Token Tag   Sentence #
5

      Token Tag    Sentence #
14945    \n   O  Sentence 155
14946    \n   O  Sentence 155
      Token Tag    Sentence #
14968    \n   O  Sentence 156
14969    \n   O  Sentence 156
      Token Tag    Sentence #
14991    \n   O  Sentence 157
14992    \n   O  Sentence 157
      Token Tag    Sentence #
15040    \n   O  Sentence 158
15041    \n   O  Sentence 158
      Token Tag    Sentence #
15223    \n   O  Sentence 159
15224    \n   O  Sentence 159
      Token Tag    Sentence #
15381    \n   O  Sentence 160
15382    \n   O  Sentence 160
      Token Tag    Sentence #
15441    \n   O  Sentence 161
15442    \n   O  Sentence 161
      Token Tag    Sentence #
15476    \n   O  Sentence 162
15477    \n   O  Sentence 162
      Token       Tag    Sentence #
15573    \n  I_Pessoa  Sentence 163
15574    \n  I_Pessoa  Sentence 163
      Token Tag    Sentence #
15771    \n   O  Sentence 164
15772    \n   O  Sentence 164
      Token Tag    Sentence #
15845    \n   O  Sentence 165
15846    \n   O  Sente

      Token Tag    Sentence #
33643    \n   O  Sentence 310
33644    \n   O  Sentence 310
      Token Tag    Sentence #
33752    \n   O  Sentence 311
33753    \n   O  Sentence 311
      Token                 Tag    Sentence #
33779    \n  I_Ref. Legislativa  Sentence 312
33780    \n  I_Ref. Legislativa  Sentence 312
      Token Tag    Sentence #
33786    \n   O  Sentence 313
33787    \n   O  Sentence 313
      Token Tag    Sentence #
33857    \n   O  Sentence 314
33858    \n   O  Sentence 314
      Token Tag    Sentence #
33997    \n   O  Sentence 315
33998    \n   O  Sentence 315
      Token Tag    Sentence #
34090    \n   O  Sentence 316
34091    \n   O  Sentence 316
      Token Tag    Sentence #
34215    \n   O  Sentence 317
34216    \n   O  Sentence 317
      Token Tag    Sentence #
34272    \n   O  Sentence 318
34273    \n   O  Sentence 318
      Token Tag    Sentence #
34507    \n   O  Sentence 319
34508    \n   O  Sentence 319
      Token Tag    Sentence #
34679    \n   O  Sente

      Token Tag    Sentence #
50088    \n   O  Sentence 469
50089    \n   O  Sentence 469
      Token Tag    Sentence #
50181    \n   O  Sentence 470
50182    \n   O  Sentence 470
      Token Tag    Sentence #
50234    \n   O  Sentence 471
50235    \n   O  Sentence 471
      Token Tag    Sentence #
50272    \n   O  Sentence 472
50273    \n   O  Sentence 472
      Token Tag    Sentence #
50298    \n   O  Sentence 473
50299    \n   O  Sentence 473
      Token Tag    Sentence #
50446    \n   O  Sentence 474
50447    \n   O  Sentence 474
      Token Tag    Sentence #
50843    \n   O  Sentence 475
50844    \n   O  Sentence 475
      Token Tag    Sentence #
50987    \n   O  Sentence 476
50988    \n   O  Sentence 476
      Token Tag    Sentence #
51094    \n   O  Sentence 477
51095    \n   O  Sentence 477
      Token Tag    Sentence #
51214    \n   O  Sentence 478
51215    \n   O  Sentence 478
      Token Tag    Sentence #
51265    \n   O  Sentence 479
51266    \n   O  Sentence 479
      Toke

      Token Tag    Sentence #
63313    \n   O  Sentence 629
63314    \n   O  Sentence 629
      Token Tag    Sentence #
63345    \n   O  Sentence 630
63346    \n   O  Sentence 630
      Token Tag    Sentence #
63366    \n   O  Sentence 631
63367    \n   O  Sentence 631
      Token Tag    Sentence #
63490    \n   O  Sentence 632
63491    \n   O  Sentence 632
      Token Tag    Sentence #
63587    \n   O  Sentence 633
63588    \n   O  Sentence 633
      Token Tag    Sentence #
63678    \n   O  Sentence 634
63679    \n   O  Sentence 634
      Token Tag    Sentence #
63738    \n   O  Sentence 635
63739    \n   O  Sentence 635
      Token Tag    Sentence #
63754    \n   O  Sentence 636
63755    \n   O  Sentence 636
      Token Tag    Sentence #
63903    \n   O  Sentence 637
63904    \n   O  Sentence 637
      Token Tag    Sentence #
63912    \n   O  Sentence 638
63913    \n   O  Sentence 638
      Token Tag    Sentence #
63920    \n   O  Sentence 639
63921    \n   O  Sentence 639
      Toke

      Token Tag    Sentence #
78719    \n   O  Sentence 789
78720    \n   O  Sentence 789
      Token Tag    Sentence #
78881    \n   O  Sentence 790
78882    \n   O  Sentence 790
      Token Tag    Sentence #
79048    \n   O  Sentence 791
79049    \n   O  Sentence 791
      Token Tag    Sentence #
79267    \n   O  Sentence 792
79268    \n   O  Sentence 792
      Token Tag    Sentence #
79402    \n   O  Sentence 793
79403    \n   O  Sentence 793
      Token Tag    Sentence #
79526    \n   O  Sentence 794
79527    \n   O  Sentence 794
      Token Tag    Sentence #
79607    \n   O  Sentence 795
79608    \n   O  Sentence 795
      Token Tag    Sentence #
79762    \n   O  Sentence 796
79763    \n   O  Sentence 796
      Token Tag    Sentence #
79997    \n   O  Sentence 797
79998    \n   O  Sentence 797
      Token Tag    Sentence #
80203    \n   O  Sentence 798
80204    \n   O  Sentence 798
      Token Tag    Sentence #
80286    \n   O  Sentence 799
80287    \n   O  Sentence 799
      Toke

      Token Tag    Sentence #
93611    \n   O  Sentence 947
93612    \n   O  Sentence 947
      Token Tag    Sentence #
93668    \n   O  Sentence 948
93669    \n   O  Sentence 948
      Token Tag    Sentence #
93903    \n   O  Sentence 949
93904    \n   O  Sentence 949
      Token Tag    Sentence #
94075    \n   O  Sentence 950
94076    \n   O  Sentence 950
      Token Tag    Sentence #
94182    \n   O  Sentence 951
94183    \n   O  Sentence 951
      Token Tag    Sentence #
94274    \n   O  Sentence 952
94275    \n   O  Sentence 952
      Token Tag    Sentence #
94599    \n   O  Sentence 953
94600    \n   O  Sentence 953
      Token Tag    Sentence #
94728    \n   O  Sentence 954
94729    \n   O  Sentence 954
      Token Tag    Sentence #
95055    \n   O  Sentence 955
95056    \n   O  Sentence 955
      Token Tag    Sentence #
95337    \n   O  Sentence 956
95338    \n   O  Sentence 956
      Token Tag    Sentence #
95454    \n   O  Sentence 957
95455    \n   O  Sentence 957
      Toke

       Token Tag     Sentence #
109273    \n   O  Sentence 1106
109274    \n   O  Sentence 1106
       Token Tag     Sentence #
109289    \n   O  Sentence 1107
109290    \n   O  Sentence 1107
       Token Tag     Sentence #
109297    \n   O  Sentence 1108
109298    \n   O  Sentence 1108
       Token Tag     Sentence #
109301    \n   O  Sentence 1109
109302    \n   O  Sentence 1109
       Token Tag     Sentence #
109316    \n   O  Sentence 1110
109317    \n   O  Sentence 1110
       Token Tag     Sentence #
109390    \n   O  Sentence 1111
109391    \n   O  Sentence 1111
       Token Tag     Sentence #
109474    \n   O  Sentence 1112
109475    \n   O  Sentence 1112
       Token Tag     Sentence #
109655    \n   O  Sentence 1113
109656    \n   O  Sentence 1113
       Token Tag     Sentence #
109688    \n   O  Sentence 1114
109689    \n   O  Sentence 1114
       Token Tag     Sentence #
109827    \n   O  Sentence 1115
109828    \n   O  Sentence 1115
       Token Tag     Sentence #
109894  

123637    \n   O  Sentence 1264
       Token Tag     Sentence #
123684    \n   O  Sentence 1265
123685    \n   O  Sentence 1265
       Token       Tag     Sentence #
123702    \n  I_Pessoa  Sentence 1266
123703    \n  I_Pessoa  Sentence 1266
       Token Tag     Sentence #
123707    \n   O  Sentence 1267
123708    \n   O  Sentence 1267
       Token Tag     Sentence #
123719    \n   O  Sentence 1268
123720    \n   O  Sentence 1268
       Token Tag     Sentence #
123745    \n   O  Sentence 1269
123746    \n   O  Sentence 1269
       Token           Tag     Sentence #
123766    \n  I_Precedente  Sentence 1270
123767    \n  I_Precedente  Sentence 1270
       Token Tag     Sentence #
123844    \n   O  Sentence 1271
123845    \n   O  Sentence 1271
       Token Tag     Sentence #
123939    \n   O  Sentence 1272
123940    \n   O  Sentence 1272
       Token Tag     Sentence #
124017    \n   O  Sentence 1273
124018    \n   O  Sentence 1273
       Token Tag     Sentence #
124033    \n   O  Senten

       Token Tag     Sentence #
131970    \n   O  Sentence 1421
131971    \n   O  Sentence 1421
       Token Tag     Sentence #
131989    \n   O  Sentence 1422
131990    \n   O  Sentence 1422
       Token Tag     Sentence #
132033    \n   O  Sentence 1423
132034    \n   O  Sentence 1423
       Token Tag     Sentence #
132087    \n   O  Sentence 1424
132088    \n   O  Sentence 1424
       Token Tag     Sentence #
132205    \n   O  Sentence 1425
132206    \n   O  Sentence 1425
       Token Tag     Sentence #
132513    \n   O  Sentence 1426
132514    \n   O  Sentence 1426
       Token Tag     Sentence #
132562    \n   O  Sentence 1427
132563    \n   O  Sentence 1427
       Token Tag     Sentence #
132821    \n   O  Sentence 1428
132822    \n   O  Sentence 1428
       Token Tag     Sentence #
133106    \n   O  Sentence 1429
133107    \n   O  Sentence 1429
       Token Tag     Sentence #
133195    \n   O  Sentence 1430
133196    \n   O  Sentence 1430
       Token Tag     Sentence #
133205  

148800    \n   O  Sentence 1581
       Token Tag     Sentence #
148919    \n   O  Sentence 1582
148920    \n   O  Sentence 1582
       Token Tag     Sentence #
149025    \n   O  Sentence 1583
149026    \n   O  Sentence 1583
       Token Tag     Sentence #
149152    \n   O  Sentence 1584
149153    \n   O  Sentence 1584
       Token Tag     Sentence #
149269    \n   O  Sentence 1585
149270    \n   O  Sentence 1585
       Token Tag     Sentence #
149333    \n   O  Sentence 1586
149334    \n   O  Sentence 1586
       Token Tag     Sentence #
149385    \n   O  Sentence 1587
149386    \n   O  Sentence 1587
       Token Tag     Sentence #
149478    \n   O  Sentence 1588
149479    \n   O  Sentence 1588
       Token Tag     Sentence #
149577    \n   O  Sentence 1589
149578    \n   O  Sentence 1589
       Token Tag     Sentence #
149651    \n   O  Sentence 1590
149652    \n   O  Sentence 1590
       Token Tag     Sentence #
149776    \n   O  Sentence 1591
149777    \n   O  Sentence 1591
       T

       Token Tag     Sentence #
164048    \n   O  Sentence 1739
164049    \n   O  Sentence 1739
       Token Tag     Sentence #
164089    \n   O  Sentence 1740
164090    \n   O  Sentence 1740
       Token Tag     Sentence #
164145    \n   O  Sentence 1741
164146    \n   O  Sentence 1741
       Token Tag     Sentence #
164195    \n   O  Sentence 1742
164196    \n   O  Sentence 1742
       Token Tag     Sentence #
164231    \n   O  Sentence 1743
164232    \n   O  Sentence 1743
       Token Tag     Sentence #
164318    \n   O  Sentence 1744
164319    \n   O  Sentence 1744
       Token Tag     Sentence #
164380    \n   O  Sentence 1745
164381    \n   O  Sentence 1745
       Token Tag     Sentence #
164432    \n   O  Sentence 1746
164433    \n   O  Sentence 1746
       Token Tag     Sentence #
164489    \n   O  Sentence 1747
164490    \n   O  Sentence 1747
       Token Tag     Sentence #
164566    \n   O  Sentence 1748
164567    \n   O  Sentence 1748
       Token Tag     Sentence #
164582  

       Token Tag     Sentence #
179565    \n   O  Sentence 1897
179566    \n   O  Sentence 1897
       Token Tag     Sentence #
179678    \n   O  Sentence 1898
179679    \n   O  Sentence 1898
       Token Tag     Sentence #
179836    \n   O  Sentence 1899
179837    \n   O  Sentence 1899
       Token Tag     Sentence #
180081    \n   O  Sentence 1900
180082    \n   O  Sentence 1900
       Token Tag     Sentence #
180181    \n   O  Sentence 1901
180182    \n   O  Sentence 1901
       Token Tag     Sentence #
180342    \n   O  Sentence 1902
180343    \n   O  Sentence 1902
       Token Tag     Sentence #
180486    \n   O  Sentence 1903
180487    \n   O  Sentence 1903
       Token Tag     Sentence #
180584    \n   O  Sentence 1904
180585    \n   O  Sentence 1904
       Token Tag     Sentence #
180605    \n   O  Sentence 1905
180606    \n   O  Sentence 1905
       Token Tag     Sentence #
180701    \n   O  Sentence 1906
180702    \n   O  Sentence 1906
       Token Tag     Sentence #
180850  

       Token Tag     Sentence #
198393    \n   O  Sentence 2046
198394    \n   O  Sentence 2046
       Token           Tag     Sentence #
198876    \n  I_Precedente  Sentence 2047
198877    \n  I_Precedente  Sentence 2047
       Token Tag     Sentence #
198881    \n   O  Sentence 2048
198882    \n   O  Sentence 2048
       Token Tag     Sentence #
199063    \n   O  Sentence 2049
199064    \n   O  Sentence 2049
       Token Tag     Sentence #
199144    \n   O  Sentence 2050
199145    \n   O  Sentence 2050
       Token Tag     Sentence #
199223    \n   O  Sentence 2051
199224    \n   O  Sentence 2051
       Token Tag     Sentence #
199333    \n   O  Sentence 2052
199334    \n   O  Sentence 2052
       Token Tag     Sentence #
199428    \n   O  Sentence 2053
199429    \n   O  Sentence 2053
       Token Tag     Sentence #
199520    \n   O  Sentence 2054
199521    \n   O  Sentence 2054
       Token Tag     Sentence #
199636    \n   O  Sentence 2055
199637    \n   O  Sentence 2055
       Tok

       Token Tag     Sentence #
215480    \n   O  Sentence 2196
215481    \n   O  Sentence 2196
       Token Tag     Sentence #
215562    \n   O  Sentence 2197
215563    \n   O  Sentence 2197
       Token Tag     Sentence #
215626    \n   O  Sentence 2198
215627    \n   O  Sentence 2198
       Token Tag     Sentence #
215771    \n   O  Sentence 2199
215772    \n   O  Sentence 2199
       Token Tag     Sentence #
215811    \n   O  Sentence 2200
215812    \n   O  Sentence 2200
       Token Tag     Sentence #
217059    \n   O  Sentence 2201
217060    \n   O  Sentence 2201
       Token Tag     Sentence #
217240    \n   O  Sentence 2202
217241    \n   O  Sentence 2202
       Token Tag     Sentence #
217253    \n   O  Sentence 2203
217254    \n   O  Sentence 2203
       Token Tag     Sentence #
217653    \n   O  Sentence 2204
217654    \n   O  Sentence 2204
       Token Tag     Sentence #
217711    \n   O  Sentence 2205
217712    \n   O  Sentence 2205
       Token Tag     Sentence #
217720  

234661    \n   O  Sentence 2341
       Token Tag     Sentence #
234793    \n   O  Sentence 2342
234794    \n   O  Sentence 2342
       Token Tag     Sentence #
234860    \n   O  Sentence 2343
234861    \n   O  Sentence 2343
       Token Tag     Sentence #
235030    \n   O  Sentence 2344
235031    \n   O  Sentence 2344
       Token Tag     Sentence #
235124    \n   O  Sentence 2345
235125    \n   O  Sentence 2345
       Token Tag     Sentence #
235323    \n   O  Sentence 2346
235324    \n   O  Sentence 2346
       Token Tag     Sentence #
235454    \n   O  Sentence 2347
235455    \n   O  Sentence 2347
       Token Tag     Sentence #
235474    \n   O  Sentence 2348
235475    \n   O  Sentence 2348
       Token Tag     Sentence #
235482    \n   O  Sentence 2349
235483    \n   O  Sentence 2349
       Token Tag     Sentence #
235687    \n   O  Sentence 2350
235688    \n   O  Sentence 2350
       Token Tag     Sentence #
235705    \n   O  Sentence 2351
235706    \n   O  Sentence 2351
       T

252778    \n  I_Doutrina  Sentence 2499
       Token Tag     Sentence #
253159    \n   O  Sentence 2500
253160    \n   O  Sentence 2500
       Token Tag     Sentence #
253278    \n   O  Sentence 2501
253279    \n   O  Sentence 2501
       Token Tag     Sentence #
253481    \n   O  Sentence 2502
253482    \n   O  Sentence 2502
       Token Tag     Sentence #
253754    \n   O  Sentence 2503
253755    \n   O  Sentence 2503
       Token Tag     Sentence #
254127    \n   O  Sentence 2504
254128    \n   O  Sentence 2504
       Token Tag     Sentence #
254215    \n   O  Sentence 2505
254216    \n   O  Sentence 2505
       Token Tag     Sentence #
254523    \n   O  Sentence 2506
254524    \n   O  Sentence 2506
       Token Tag     Sentence #
254691    \n   O  Sentence 2507
254692    \n   O  Sentence 2507
       Token           Tag     Sentence #
255167    \n  I_Precedente  Sentence 2508
255168    \n  I_Precedente  Sentence 2508
       Token Tag     Sentence #
255178    \n   O  Sentence 2509
25

268853    \n   O  Sentence 2643
       Token Tag     Sentence #
268931    \n   O  Sentence 2644
268932    \n   O  Sentence 2644
       Token Tag     Sentence #
269016    \n   O  Sentence 2645
269017    \n   O  Sentence 2645
       Token Tag     Sentence #
269415    \n   O  Sentence 2646
269416    \n   O  Sentence 2646
       Token Tag     Sentence #
269651    \n   O  Sentence 2647
269652    \n   O  Sentence 2647
       Token Tag     Sentence #
269781    \n   O  Sentence 2648
269782    \n   O  Sentence 2648
       Token Tag     Sentence #
270044    \n   O  Sentence 2649
270045    \n   O  Sentence 2649
       Token Tag     Sentence #
270093    \n   O  Sentence 2650
270094    \n   O  Sentence 2650
       Token Tag     Sentence #
270107    \n   O  Sentence 2651
270108    \n   O  Sentence 2651
       Token Tag     Sentence #
270122    \n   O  Sentence 2652
270123    \n   O  Sentence 2652
       Token Tag     Sentence #
270205    \n   O  Sentence 2653
270206    \n   O  Sentence 2653
       T

       Token Tag     Sentence #
280702    \n   O  Sentence 2795
280703    \n   O  Sentence 2795
       Token Tag     Sentence #
280708    \n   O  Sentence 2796
280709    \n   O  Sentence 2796
       Token Tag     Sentence #
280722    \n   O  Sentence 2797
280723    \n   O  Sentence 2797
       Token Tag     Sentence #
280752    \n   O  Sentence 2798
280753    \n   O  Sentence 2798
       Token Tag     Sentence #
280801    \n   O  Sentence 2799
280802    \n   O  Sentence 2799
       Token Tag     Sentence #
280848    \n   O  Sentence 2800
280849    \n   O  Sentence 2800
       Token Tag     Sentence #
280867    \n   O  Sentence 2801
280868    \n   O  Sentence 2801
       Token Tag     Sentence #
280885    \n   O  Sentence 2802
280886    \n   O  Sentence 2802
       Token Tag     Sentence #
280901    \n   O  Sentence 2803
280902    \n   O  Sentence 2803
       Token Tag     Sentence #
280925    \n   O  Sentence 2804
280926    \n   O  Sentence 2804
       Token Tag     Sentence #
281035  

       Token Tag     Sentence #
294071    \n   O  Sentence 2947
294072    \n   O  Sentence 2947
       Token Tag     Sentence #
294079    \n   O  Sentence 2948
294080    \n   O  Sentence 2948
       Token Tag     Sentence #
294210    \n   O  Sentence 2949
294211    \n   O  Sentence 2949
       Token Tag     Sentence #
294216    \n   O  Sentence 2950
294217    \n   O  Sentence 2950
       Token Tag     Sentence #
294230    \n   O  Sentence 2951
294231    \n   O  Sentence 2951
       Token Tag     Sentence #
294234    \n   O  Sentence 2952
294235    \n   O  Sentence 2952
       Token Tag     Sentence #
294326    \n   O  Sentence 2953
294327    \n   O  Sentence 2953
       Token Tag     Sentence #
294392    \n   O  Sentence 2954
294393    \n   O  Sentence 2954
       Token Tag     Sentence #
294457    \n   O  Sentence 2955
294458    \n   O  Sentence 2955
       Token Tag     Sentence #
294465    \n   O  Sentence 2956
294466    \n   O  Sentence 2956
       Token Tag     Sentence #
294469  

       Token Tag     Sentence #
308099    \n   O  Sentence 3100
308100    \n   O  Sentence 3100
       Token Tag     Sentence #
308239    \n   O  Sentence 3101
308240    \n   O  Sentence 3101
       Token Tag     Sentence #
308418    \n   O  Sentence 3102
308419    \n   O  Sentence 3102
       Token Tag     Sentence #
308611    \n   O  Sentence 3103
308612    \n   O  Sentence 3103
       Token Tag     Sentence #
308738    \n   O  Sentence 3104
308739    \n   O  Sentence 3104
       Token Tag     Sentence #
308782    \n   O  Sentence 3105
308783    \n   O  Sentence 3105
       Token Tag     Sentence #
308826    \n   O  Sentence 3106
308827    \n   O  Sentence 3106
       Token Tag     Sentence #
308961    \n   O  Sentence 3107
308962    \n   O  Sentence 3107
       Token Tag     Sentence #
309021    \n   O  Sentence 3108
309022    \n   O  Sentence 3108
       Token Tag     Sentence #
309067    \n   O  Sentence 3109
309068    \n   O  Sentence 3109
       Token Tag     Sentence #
309095  

       Token Tag     Sentence #
328793    \n   O  Sentence 3256
328794    \n   O  Sentence 3256
       Token Tag     Sentence #
329115    \n   O  Sentence 3257
329116    \n   O  Sentence 3257
       Token Tag     Sentence #
329175    \n   O  Sentence 3258
329176    \n   O  Sentence 3258
       Token Tag     Sentence #
329191    \n   O  Sentence 3259
329192    \n   O  Sentence 3259
       Token Tag     Sentence #
329311    \n   O  Sentence 3260
329312    \n   O  Sentence 3260
       Token Tag     Sentence #
329327    \n   O  Sentence 3261
329328    \n   O  Sentence 3261
       Token Tag     Sentence #
329339    \n   O  Sentence 3262
329340    \n   O  Sentence 3262
       Token Tag     Sentence #
329347    \n   O  Sentence 3263
329348    \n   O  Sentence 3263
       Token           Tag     Sentence #
329368    \n  I_Precedente  Sentence 3264
329369    \n  I_Precedente  Sentence 3264
       Token Tag     Sentence #
329424    \n   O  Sentence 3265
329425    \n   O  Sentence 3265
       Tok

       Token Tag     Sentence #
343774    \n   O  Sentence 3415
343775    \n   O  Sentence 3415
       Token Tag     Sentence #
343812    \n   O  Sentence 3416
343813    \n   O  Sentence 3416
       Token Tag     Sentence #
343910    \n   O  Sentence 3417
343911    \n   O  Sentence 3417
       Token Tag     Sentence #
343984    \n   O  Sentence 3418
343985    \n   O  Sentence 3418
       Token Tag     Sentence #
344084    \n   O  Sentence 3419
344085    \n   O  Sentence 3419
       Token Tag     Sentence #
344189    \n   O  Sentence 3420
344190    \n   O  Sentence 3420
       Token Tag     Sentence #
344197    \n   O  Sentence 3421
344198    \n   O  Sentence 3421
       Token Tag     Sentence #
344213    \n   O  Sentence 3422
344214    \n   O  Sentence 3422
       Token Tag     Sentence #
344221    \n   O  Sentence 3423
344222    \n   O  Sentence 3423
       Token Tag     Sentence #
344241    \n   O  Sentence 3424
344242    \n   O  Sentence 3424
       Token Tag     Sentence #
344277  

       Token Tag     Sentence #
357509    \n   O  Sentence 3576
357510    \n   O  Sentence 3576
       Token Tag     Sentence #
357533    \n   O  Sentence 3577
357534    \n   O  Sentence 3577
       Token Tag     Sentence #
357558    \n   O  Sentence 3578
357559    \n   O  Sentence 3578
       Token Tag     Sentence #
357635    \n   O  Sentence 3579
357636    \n   O  Sentence 3579
       Token Tag     Sentence #
357657    \n   O  Sentence 3580
357658    \n   O  Sentence 3580
       Token Tag     Sentence #
357662    \n   O  Sentence 3581
357663    \n   O  Sentence 3581
       Token Tag     Sentence #
357854    \n   O  Sentence 3582
357855    \n   O  Sentence 3582
       Token Tag     Sentence #
357916    \n   O  Sentence 3583
357917    \n   O  Sentence 3583
       Token Tag     Sentence #
357937    \n   O  Sentence 3584
357938    \n   O  Sentence 3584
       Token Tag     Sentence #
357945    \n   O  Sentence 3585
357946    \n   O  Sentence 3585
       Token Tag     Sentence #
357951  

       Token Tag     Sentence #
364319    \n   O  Sentence 3726
364320    \n   O  Sentence 3726
       Token Tag     Sentence #
364354    \n   O  Sentence 3727
364355    \n   O  Sentence 3727
       Token Tag     Sentence #
364530    \n   O  Sentence 3728
364531    \n   O  Sentence 3728
       Token Tag     Sentence #
364534    \n   O  Sentence 3729
364535    \n   O  Sentence 3729
       Token Tag     Sentence #
364544    \n   O  Sentence 3730
364545    \n   O  Sentence 3730
       Token Tag     Sentence #
364656    \n   O  Sentence 3731
364657    \n   O  Sentence 3731
       Token Tag     Sentence #
364672    \n   O  Sentence 3732
364673    \n   O  Sentence 3732
       Token Tag     Sentence #
364680    \n   O  Sentence 3733
364681    \n   O  Sentence 3733
       Token Tag     Sentence #
364684    \n   O  Sentence 3734
364685    \n   O  Sentence 3734
       Token Tag     Sentence #
364692    \n   O  Sentence 3735
364693    \n   O  Sentence 3735
       Token Tag     Sentence #
364706  

       Token Tag     Sentence #
376298    \n   O  Sentence 3873
376299    \n   O  Sentence 3873
       Token Tag     Sentence #
376351    \n   O  Sentence 3874
376352    \n   O  Sentence 3874
       Token Tag     Sentence #
376438    \n   O  Sentence 3875
376439    \n   O  Sentence 3875
       Token Tag     Sentence #
376571    \n   O  Sentence 3876
376572    \n   O  Sentence 3876
       Token Tag     Sentence #
376584    \n   O  Sentence 3877
376585    \n   O  Sentence 3877
       Token Tag     Sentence #
376638    \n   O  Sentence 3878
376639    \n   O  Sentence 3878
       Token Tag     Sentence #
376651    \n   O  Sentence 3879
376652    \n   O  Sentence 3879
       Token Tag     Sentence #
376797    \n   O  Sentence 3880
376798    \n   O  Sentence 3880
       Token Tag     Sentence #
376977    \n   O  Sentence 3881
376978    \n   O  Sentence 3881
       Token Tag     Sentence #
376995    \n   O  Sentence 3882
376996    \n   O  Sentence 3882
       Token Tag     Sentence #
377003  

       Token Tag     Sentence #
387157    \n   O  Sentence 4020
387158    \n   O  Sentence 4020
       Token Tag     Sentence #
387162    \n   O  Sentence 4021
387163    \n   O  Sentence 4021
       Token Tag     Sentence #
387325    \n   O  Sentence 4022
387326    \n   O  Sentence 4022
       Token Tag     Sentence #
387430    \n   O  Sentence 4023
387431    \n   O  Sentence 4023
       Token Tag     Sentence #
387446    \n   O  Sentence 4024
387447    \n   O  Sentence 4024
       Token Tag     Sentence #
387457    \n   O  Sentence 4025
387458    \n   O  Sentence 4025
       Token Tag     Sentence #
387547    \n   O  Sentence 4026
387548    \n   O  Sentence 4026
       Token Tag     Sentence #
387555    \n   O  Sentence 4027
387556    \n   O  Sentence 4027
       Token Tag     Sentence #
387633    \n   O  Sentence 4028
387634    \n   O  Sentence 4028
       Token Tag     Sentence #
387637    \n   O  Sentence 4029
387638    \n   O  Sentence 4029
       Token Tag     Sentence #
387649  

       Token Tag     Sentence #
398669    \n   O  Sentence 4170
398670    \n   O  Sentence 4170
       Token Tag     Sentence #
398679    \n   O  Sentence 4171
398680    \n   O  Sentence 4171
       Token Tag     Sentence #
398696    \n   O  Sentence 4172
398697    \n   O  Sentence 4172
       Token Tag     Sentence #
398714    \n   O  Sentence 4173
398715    \n   O  Sentence 4173
       Token           Tag     Sentence #
398742    \n  I_Precedente  Sentence 4174
398743    \n  I_Precedente  Sentence 4174
       Token Tag     Sentence #
398762    \n   O  Sentence 4175
398763    \n   O  Sentence 4175
       Token Tag     Sentence #
398948    \n   O  Sentence 4176
398949    \n   O  Sentence 4176
       Token Tag     Sentence #
399089    \n   O  Sentence 4177
399090    \n   O  Sentence 4177
       Token Tag     Sentence #
399136    \n   O  Sentence 4178
399137    \n   O  Sentence 4178
       Token Tag     Sentence #
399172    \n   O  Sentence 4179
399173    \n   O  Sentence 4179
       Tok

413537    \n   O  Sentence 4330
       Token Tag     Sentence #
413659    \n   O  Sentence 4331
413660    \n   O  Sentence 4331
       Token Tag     Sentence #
413814    \n   O  Sentence 4332
413815    \n   O  Sentence 4332
       Token Tag     Sentence #
413884    \n   O  Sentence 4333
413885    \n   O  Sentence 4333
       Token Tag     Sentence #
413955    \n   O  Sentence 4334
413956    \n   O  Sentence 4334
       Token Tag     Sentence #
414132    \n   O  Sentence 4335
414133    \n   O  Sentence 4335
       Token Tag     Sentence #
414239    \n   O  Sentence 4336
414240    \n   O  Sentence 4336
       Token Tag     Sentence #
414352    \n   O  Sentence 4337
414353    \n   O  Sentence 4337
       Token Tag     Sentence #
414422    \n   O  Sentence 4338
414423    \n   O  Sentence 4338
       Token Tag     Sentence #
414568    \n   O  Sentence 4339
414569    \n   O  Sentence 4339
       Token Tag     Sentence #
414651    \n   O  Sentence 4340
414652    \n   O  Sentence 4340
       T

       Token Tag     Sentence #
427308    \n   O  Sentence 4490
427309    \n   O  Sentence 4490
       Token Tag     Sentence #
427384    \n   O  Sentence 4491
427385    \n   O  Sentence 4491
       Token Tag     Sentence #
427464    \n   O  Sentence 4492
427465    \n   O  Sentence 4492
       Token Tag     Sentence #
427544    \n   O  Sentence 4493
427545    \n   O  Sentence 4493
       Token Tag     Sentence #
427561    \n   O  Sentence 4494
427562    \n   O  Sentence 4494
       Token Tag     Sentence #
427565    \n   O  Sentence 4495
427566    \n   O  Sentence 4495
       Token Tag     Sentence #
427812    \n   O  Sentence 4496
427813    \n   O  Sentence 4496
       Token Tag     Sentence #
427839    \n   O  Sentence 4497
427840    \n   O  Sentence 4497
       Token Tag     Sentence #
427918    \n   O  Sentence 4498
427919    \n   O  Sentence 4498
       Token Tag     Sentence #
428004    \n   O  Sentence 4499
428005    \n   O  Sentence 4499
       Token Tag     Sentence #
428289  

       Token Tag     Sentence #
446565    \n   O  Sentence 4648
446566    \n   O  Sentence 4648
       Token Tag     Sentence #
446607    \n   O  Sentence 4649
446608    \n   O  Sentence 4649
       Token Tag     Sentence #
446742    \n   O  Sentence 4650
446743    \n   O  Sentence 4650
       Token Tag     Sentence #
446778    \n   O  Sentence 4651
446779    \n   O  Sentence 4651
       Token Tag     Sentence #
447562    \n   O  Sentence 4652
447563    \n   O  Sentence 4652
       Token Tag     Sentence #
447650    \n   O  Sentence 4653
447651    \n   O  Sentence 4653
       Token Tag     Sentence #
447787    \n   O  Sentence 4654
447788    \n   O  Sentence 4654
       Token Tag     Sentence #
447916    \n   O  Sentence 4655
447917    \n   O  Sentence 4655
       Token Tag     Sentence #
447974    \n   O  Sentence 4656
447975    \n   O  Sentence 4656
       Token Tag     Sentence #
447981    \n   O  Sentence 4657
447982    \n   O  Sentence 4657
       Token Tag     Sentence #
447997  

       Token Tag     Sentence #
460111    \n   O  Sentence 4808
460112    \n   O  Sentence 4808
       Token Tag     Sentence #
460187    \n   O  Sentence 4809
460188    \n   O  Sentence 4809
       Token Tag     Sentence #
460222    \n   O  Sentence 4810
460223    \n   O  Sentence 4810
       Token Tag     Sentence #
460276    \n   O  Sentence 4811
460277    \n   O  Sentence 4811
       Token Tag     Sentence #
460309    \n   O  Sentence 4812
460310    \n   O  Sentence 4812
       Token Tag     Sentence #
460365    \n   O  Sentence 4813
460366    \n   O  Sentence 4813
       Token Tag     Sentence #
460388    \n   O  Sentence 4814
460389    \n   O  Sentence 4814
       Token Tag     Sentence #
460403    \n   O  Sentence 4815
460404    \n   O  Sentence 4815
       Token Tag     Sentence #
460461    \n   O  Sentence 4816
460462    \n   O  Sentence 4816
       Token Tag     Sentence #
460467    \n   O  Sentence 4817
460468    \n   O  Sentence 4817
       Token Tag     Sentence #
460481  

       Token Tag     Sentence #
473729    \n   O  Sentence 4967
473730    \n   O  Sentence 4967
       Token Tag     Sentence #
473738    \n   O  Sentence 4968
473739    \n   O  Sentence 4968
       Token Tag     Sentence #
473769    \n   O  Sentence 4969
473770    \n   O  Sentence 4969
       Token Tag     Sentence #
473775    \n   O  Sentence 4970
473776    \n   O  Sentence 4970
       Token           Tag     Sentence #
473789    \n  I_Precedente  Sentence 4971
473790    \n             O  Sentence 4971
       Token Tag     Sentence #
473793    \n   O  Sentence 4972
473794    \n   O  Sentence 4972
       Token Tag     Sentence #
473979    \n   O  Sentence 4973
473980    \n   O  Sentence 4973
       Token Tag     Sentence #
473985    \n   O  Sentence 4974
473986    \n   O  Sentence 4974
       Token Tag     Sentence #
473999    \n   O  Sentence 4975
474000    \n   O  Sentence 4975
       Token Tag     Sentence #
474007    \n   O  Sentence 4976
474008    \n   O  Sentence 4976
       Tok

       Token Tag     Sentence #
491651    \n   O  Sentence 5119
491652    \n   O  Sentence 5119
       Token Tag     Sentence #
491747    \n   O  Sentence 5120
491748    \n   O  Sentence 5120
       Token Tag     Sentence #
491963    \n   O  Sentence 5121
491964    \n   O  Sentence 5121
       Token Tag     Sentence #
492032    \n   O  Sentence 5122
492033    \n   O  Sentence 5122
       Token Tag     Sentence #
492269    \n   O  Sentence 5123
492270    \n   O  Sentence 5123
       Token Tag     Sentence #
492382    \n   O  Sentence 5124
492383    \n   O  Sentence 5124
       Token Tag     Sentence #
492391    \n   O  Sentence 5125
492392    \n   O  Sentence 5125
       Token Tag     Sentence #
492588    \n   O  Sentence 5126
492589    \n   O  Sentence 5126
       Token Tag     Sentence #
492612    \n   O  Sentence 5127
492613    \n   O  Sentence 5127
       Token Tag     Sentence #
492708    \n   O  Sentence 5128
492709    \n   O  Sentence 5128
       Token Tag     Sentence #
492775  

       Token Tag     Sentence #
503840    \n   O  Sentence 5264
503841    \n   O  Sentence 5264
       Token Tag     Sentence #
503948    \n   O  Sentence 5265
503949    \n   O  Sentence 5265
       Token Tag     Sentence #
504027    \n   O  Sentence 5266
504028    \n   O  Sentence 5266
       Token                 Tag     Sentence #
504061    \n  I_Ref. Legislativa  Sentence 5267
504062    \n  I_Ref. Legislativa  Sentence 5267
       Token Tag     Sentence #
504081    \n   O  Sentence 5268
504082    \n   O  Sentence 5268
       Token Tag     Sentence #
504152    \n   O  Sentence 5269
504153    \n   O  Sentence 5269
       Token Tag     Sentence #
504236    \n   O  Sentence 5270
504237    \n   O  Sentence 5270
       Token Tag     Sentence #
504292    \n   O  Sentence 5271
504293    \n   O  Sentence 5271
       Token Tag     Sentence #
504398    \n   O  Sentence 5272
504399    \n   O  Sentence 5272
       Token Tag     Sentence #
504432    \n   O  Sentence 5273
504433    \n   O  Senten

       Token Tag     Sentence #
521056    \n   O  Sentence 5413
521057    \n   O  Sentence 5413
       Token Tag     Sentence #
521074    \n   O  Sentence 5414
521075    \n   O  Sentence 5414
       Token Tag     Sentence #
521164    \n   O  Sentence 5415
521165    \n   O  Sentence 5415
       Token Tag     Sentence #
521573    \n   O  Sentence 5416
521574    \n   O  Sentence 5416
       Token Tag     Sentence #
521639    \n   O  Sentence 5417
521640    \n   O  Sentence 5417
       Token Tag     Sentence #
521744    \n   O  Sentence 5418
521745    \n   O  Sentence 5418
       Token Tag     Sentence #
521940    \n   O  Sentence 5419
521941    \n   O  Sentence 5419
       Token Tag     Sentence #
522137    \n   O  Sentence 5420
522138    \n   O  Sentence 5420
       Token Tag     Sentence #
522155    \n   O  Sentence 5421
522156    \n   O  Sentence 5421
       Token Tag     Sentence #
522165    \n   O  Sentence 5422
522166    \n   O  Sentence 5422
       Token Tag     Sentence #
522296  

       Token Tag     Sentence #
542036    \n   O  Sentence 5574
542037    \n   O  Sentence 5574
       Token Tag     Sentence #
542178    \n   O  Sentence 5575
542179    \n   O  Sentence 5575
       Token Tag     Sentence #
542320    \n   O  Sentence 5576
542321    \n   O  Sentence 5576
       Token Tag     Sentence #
542447    \n   O  Sentence 5577
542448    \n   O  Sentence 5577
       Token Tag     Sentence #
542768    \n   O  Sentence 5578
542769    \n   O  Sentence 5578
       Token Tag     Sentence #
542808    \n   O  Sentence 5579
542809    \n   O  Sentence 5579
       Token Tag     Sentence #
543190    \n   O  Sentence 5580
543191    \n   O  Sentence 5580
       Token Tag     Sentence #
543355    \n   O  Sentence 5581
543356    \n   O  Sentence 5581
       Token Tag     Sentence #
543446    \n   O  Sentence 5582
543447    \n   O  Sentence 5582
       Token Tag     Sentence #
543591    \n   O  Sentence 5583
543592    \n   O  Sentence 5583
       Token Tag     Sentence #
543838  

       Token Tag     Sentence #
560687    \n   O  Sentence 5734
560688    \n   O  Sentence 5734
       Token Tag     Sentence #
560691    \n   O  Sentence 5735
560692    \n   O  Sentence 5735
       Token Tag     Sentence #
560697    \n   O  Sentence 5736
560698    \n   O  Sentence 5736
       Token Tag     Sentence #
560715    \n   O  Sentence 5737
560716    \n   O  Sentence 5737
       Token Tag     Sentence #
560719    \n   O  Sentence 5738
560720    \n   O  Sentence 5738
       Token Tag     Sentence #
560816    \n   O  Sentence 5739
560817    \n   O  Sentence 5739
       Token Tag     Sentence #
560847    \n   O  Sentence 5740
560848    \n   O  Sentence 5740
       Token Tag     Sentence #
561046    \n   O  Sentence 5741
561047    \n   O  Sentence 5741
       Token Tag     Sentence #
561115    \n   O  Sentence 5742
561116    \n   O  Sentence 5742
       Token Tag     Sentence #
561288    \n   O  Sentence 5743
561289    \n   O  Sentence 5743
       Token Tag     Sentence #
561351  

       Token Tag     Sentence #
575312    \n   O  Sentence 5891
575313    \n   O  Sentence 5891
       Token                 Tag     Sentence #
575388    \n  I_Ref. Legislativa  Sentence 5892
575389    \n  I_Ref. Legislativa  Sentence 5892
       Token Tag     Sentence #
575719    \n   O  Sentence 5893
575720    \n   O  Sentence 5893
       Token Tag     Sentence #
575809    \n   O  Sentence 5894
575810    \n   O  Sentence 5894
       Token Tag     Sentence #
575903    \n   O  Sentence 5895
575904    \n   O  Sentence 5895
       Token Tag     Sentence #
575929    \n   O  Sentence 5896
575930    \n   O  Sentence 5896
       Token Tag     Sentence #
575949    \n   O  Sentence 5897
575950    \n   O  Sentence 5897
       Token Tag     Sentence #
575957    \n   O  Sentence 5898
575958    \n   O  Sentence 5898
       Token Tag     Sentence #
576049    \n   O  Sentence 5899
576050    \n   O  Sentence 5899
       Token Tag     Sentence #
576180    \n   O  Sentence 5900
576181    \n   O  Senten

       Token Tag     Sentence #
588948    \n   O  Sentence 6035
588949    \n   O  Sentence 6035
       Token Tag     Sentence #
588953    \n   O  Sentence 6036
588954    \n   O  Sentence 6036
       Token Tag     Sentence #
588972    \n   O  Sentence 6037
588973    \n   O  Sentence 6037
       Token Tag     Sentence #
588979    \n   O  Sentence 6038
588980    \n   O  Sentence 6038
       Token Tag     Sentence #
589092    \n   O  Sentence 6039
589093    \n   O  Sentence 6039
       Token Tag     Sentence #
589099    \n   O  Sentence 6040
589100    \n   O  Sentence 6040
       Token Tag     Sentence #
589257    \n   O  Sentence 6041
589258    \n   O  Sentence 6041
       Token Tag     Sentence #
589456    \n   O  Sentence 6042
589457    \n   O  Sentence 6042
       Token Tag     Sentence #
589601    \n   O  Sentence 6043
589602    \n   O  Sentence 6043
       Token Tag     Sentence #
589807    \n   O  Sentence 6044
589808    \n   O  Sentence 6044
       Token Tag     Sentence #
589882  

       Token Tag     Sentence #
603330    \n   O  Sentence 6187
603331    \n   O  Sentence 6187
       Token Tag     Sentence #
603498    \n   O  Sentence 6188
603499    \n   O  Sentence 6188
       Token Tag     Sentence #
603641    \n   O  Sentence 6189
603642    \n   O  Sentence 6189
       Token Tag     Sentence #
603864    \n   O  Sentence 6190
603865    \n   O  Sentence 6190
       Token Tag     Sentence #
604025    \n   O  Sentence 6191
604026    \n   O  Sentence 6191
       Token Tag     Sentence #
604052    \n   O  Sentence 6192
604053    \n   O  Sentence 6192
       Token Tag     Sentence #
604085    \n   O  Sentence 6193
604086    \n   O  Sentence 6193
       Token Tag     Sentence #
604235    \n   O  Sentence 6194
604236    \n   O  Sentence 6194
       Token Tag     Sentence #
604248    \n   O  Sentence 6195
604249    \n   O  Sentence 6195
       Token Tag     Sentence #
604275    \n   O  Sentence 6196
604276    \n   O  Sentence 6196
       Token Tag     Sentence #
604391  

       Token Tag     Sentence #
616287    \n   O  Sentence 6347
616288    \n   O  Sentence 6347
       Token Tag     Sentence #
616314    \n   O  Sentence 6348
616315    \n   O  Sentence 6348
       Token Tag     Sentence #
616318    \n   O  Sentence 6349
616319    \n   O  Sentence 6349
       Token Tag     Sentence #
616674    \n   O  Sentence 6350
616675    \n   O  Sentence 6350
       Token Tag     Sentence #
616927    \n   O  Sentence 6351
616928    \n   O  Sentence 6351
       Token Tag     Sentence #
617002    \n   O  Sentence 6352
617003    \n   O  Sentence 6352
       Token Tag     Sentence #
617016    \n   O  Sentence 6353
617017    \n   O  Sentence 6353
       Token Tag     Sentence #
617067    \n   O  Sentence 6354
617068    \n   O  Sentence 6354
       Token Tag     Sentence #
617129    \n   O  Sentence 6355
617130    \n   O  Sentence 6355
       Token Tag     Sentence #
617156    \n   O  Sentence 6356
617157    \n   O  Sentence 6356
       Token Tag     Sentence #
617160  

       Token Tag     Sentence #
630073    \n   O  Sentence 6507
630074    \n   O  Sentence 6507
       Token Tag     Sentence #
630109    \n   O  Sentence 6508
630110    \n   O  Sentence 6508
       Token Tag     Sentence #
630237    \n   O  Sentence 6509
630238    \n   O  Sentence 6509
       Token Tag     Sentence #
630324    \n   O  Sentence 6510
630325    \n   O  Sentence 6510
       Token Tag     Sentence #
630454    \n   O  Sentence 6511
630455    \n   O  Sentence 6511
       Token Tag     Sentence #
630553    \n   O  Sentence 6512
630554    \n   O  Sentence 6512
       Token Tag     Sentence #
630684    \n   O  Sentence 6513
630685    \n   O  Sentence 6513
       Token Tag     Sentence #
630787    \n   O  Sentence 6514
630788    \n   O  Sentence 6514
       Token Tag     Sentence #
630999    \n   O  Sentence 6515
631000    \n   O  Sentence 6515
       Token Tag     Sentence #
631076    \n   O  Sentence 6516
631077    \n   O  Sentence 6516
       Token Tag     Sentence #
631179  

       Token Tag     Sentence #
644566    \n   O  Sentence 6669
644567    \n   O  Sentence 6669
       Token Tag     Sentence #
644723    \n   O  Sentence 6670
644724    \n   O  Sentence 6670
       Token Tag     Sentence #
644819    \n   O  Sentence 6671
644820    \n   O  Sentence 6671
       Token Tag     Sentence #
644951    \n   O  Sentence 6672
644952    \n   O  Sentence 6672
       Token Tag     Sentence #
645043    \n   O  Sentence 6673
645044    \n   O  Sentence 6673
       Token Tag     Sentence #
645068    \n   O  Sentence 6674
645069    \n   O  Sentence 6674
       Token Tag     Sentence #
645074    \n   O  Sentence 6675
645075    \n   O  Sentence 6675
       Token Tag     Sentence #
645084    \n   O  Sentence 6676
645085    \n   O  Sentence 6676
       Token Tag     Sentence #
645093    \n   O  Sentence 6677
645094    \n   O  Sentence 6677
       Token Tag     Sentence #
645143    \n   O  Sentence 6678
645144    \n   O  Sentence 6678
       Token Tag     Sentence #
645426  

       Token Tag     Sentence #
658262    \n   O  Sentence 6819
658263    \n   O  Sentence 6819
       Token Tag     Sentence #
658325    \n   O  Sentence 6820
658326    \n   O  Sentence 6820
       Token Tag     Sentence #
658402    \n   O  Sentence 6821
658403    \n   O  Sentence 6821
       Token Tag     Sentence #
658470    \n   O  Sentence 6822
658471    \n   O  Sentence 6822
       Token Tag     Sentence #
658534    \n   O  Sentence 6823
658535    \n   O  Sentence 6823
       Token Tag     Sentence #
658643    \n   O  Sentence 6824
658644    \n   O  Sentence 6824
       Token Tag     Sentence #
658773    \n   O  Sentence 6825
658774    \n   O  Sentence 6825
       Token Tag     Sentence #
658837    \n   O  Sentence 6826
658838    \n   O  Sentence 6826
       Token Tag     Sentence #
658848    \n   O  Sentence 6827
658849    \n   O  Sentence 6827
       Token Tag     Sentence #
658859    \n   O  Sentence 6828
658860    \n   O  Sentence 6828
       Token Tag     Sentence #
658990  

670145    \n   O  Sentence 6967
       Token Tag     Sentence #
670238    \n   O  Sentence 6968
670239    \n   O  Sentence 6968
       Token Tag     Sentence #
670270    \n   O  Sentence 6969
670271    \n   O  Sentence 6969
       Token Tag     Sentence #
670318    \n   O  Sentence 6970
670319    \n   O  Sentence 6970
       Token Tag     Sentence #
670480    \n   O  Sentence 6971
670481    \n   O  Sentence 6971
       Token Tag     Sentence #
670627    \n   O  Sentence 6972
670628    \n   O  Sentence 6972
       Token Tag     Sentence #
670754    \n   O  Sentence 6973
670755    \n   O  Sentence 6973
       Token Tag     Sentence #
670775    \n   O  Sentence 6974
670776    \n   O  Sentence 6974
       Token Tag     Sentence #
670951    \n   O  Sentence 6975
670952    \n   O  Sentence 6975
       Token Tag     Sentence #
671038    \n   O  Sentence 6976
671039    \n   O  Sentence 6976
       Token Tag     Sentence #
671113    \n   O  Sentence 6977
671114    \n   O  Sentence 6977
       T

       Token Tag     Sentence #
685541    \n   O  Sentence 7115
685542    \n   O  Sentence 7115
       Token Tag     Sentence #
685551    \n   O  Sentence 7116
685552    \n   O  Sentence 7116
       Token Tag     Sentence #
685616    \n   O  Sentence 7117
685617    \n   O  Sentence 7117
       Token Tag     Sentence #
685839    \n   O  Sentence 7118
685840    \n   O  Sentence 7118
       Token Tag     Sentence #
685864    \n   O  Sentence 7119
685865    \n   O  Sentence 7119
       Token Tag     Sentence #
685884    \n   O  Sentence 7120
685885    \n   O  Sentence 7120
       Token Tag     Sentence #
686408    \n   O  Sentence 7121
686409    \n   O  Sentence 7121
       Token Tag     Sentence #
686503    \n   O  Sentence 7122
686504    \n   O  Sentence 7122
       Token Tag     Sentence #
686761    \n   O  Sentence 7123
686762    \n   O  Sentence 7123
       Token Tag     Sentence #
686844    \n   O  Sentence 7124
686845    \n   O  Sentence 7124
       Token Tag     Sentence #
686945  

       Token Tag     Sentence #
700122    \n   O  Sentence 7266
700123    \n   O  Sentence 7266
       Token Tag     Sentence #
700388    \n   O  Sentence 7267
700389    \n   O  Sentence 7267
       Token Tag     Sentence #
700473    \n   O  Sentence 7268
700474    \n   O  Sentence 7268
       Token Tag     Sentence #
700966    \n   O  Sentence 7269
700967    \n   O  Sentence 7269
       Token Tag     Sentence #
701200    \n   O  Sentence 7270
701201    \n   O  Sentence 7270
       Token Tag     Sentence #
701314    \n   O  Sentence 7271
701315    \n   O  Sentence 7271
       Token Tag     Sentence #
701402    \n   O  Sentence 7272
701403    \n   O  Sentence 7272
       Token Tag     Sentence #
701751    \n   O  Sentence 7273
701752    \n   O  Sentence 7273
       Token Tag     Sentence #
702053    \n   O  Sentence 7274
702054    \n   O  Sentence 7274
       Token Tag     Sentence #
702340    \n   O  Sentence 7275
702341    \n   O  Sentence 7275
       Token Tag     Sentence #
702436  

       Token Tag     Sentence #
717652    \n   O  Sentence 7413
717653    \n   O  Sentence 7413
       Token Tag     Sentence #
717668    \n   O  Sentence 7414
717669    \n   O  Sentence 7414
       Token Tag     Sentence #
717797    \n   O  Sentence 7415
717798    \n   O  Sentence 7415
       Token Tag     Sentence #
717805    \n   O  Sentence 7416
717806    \n   O  Sentence 7416
       Token Tag     Sentence #
717821    \n   O  Sentence 7417
717822    \n   O  Sentence 7417
       Token Tag     Sentence #
717900    \n   O  Sentence 7418
717901    \n   O  Sentence 7418
       Token Tag     Sentence #
717920    \n   O  Sentence 7419
717921    \n   O  Sentence 7419
       Token Tag     Sentence #
717982    \n   O  Sentence 7420
717983    \n   O  Sentence 7420
       Token Tag     Sentence #
718043    \n   O  Sentence 7421
718044    \n   O  Sentence 7421
       Token Tag     Sentence #
718092    \n   O  Sentence 7422
718093    \n   O  Sentence 7422
       Token Tag     Sentence #
718213  

732014    \n         O  Sentence 7562
       Token Tag     Sentence #
732022    \n   O  Sentence 7563
732023    \n   O  Sentence 7563
       Token Tag     Sentence #
732030    \n   O  Sentence 7564
732031    \n   O  Sentence 7564
       Token Tag     Sentence #
732057    \n   O  Sentence 7565
732058    \n   O  Sentence 7565
       Token Tag     Sentence #
732147    \n   O  Sentence 7566
732148    \n   O  Sentence 7566
       Token Tag     Sentence #
732428    \n   O  Sentence 7567
732429    \n   O  Sentence 7567
       Token Tag     Sentence #
732444    \n   O  Sentence 7568
732445    \n   O  Sentence 7568
       Token Tag     Sentence #
732468    \n   O  Sentence 7569
732469    \n   O  Sentence 7569
       Token Tag     Sentence #
732574    \n   O  Sentence 7570
732575    \n   O  Sentence 7570
       Token Tag     Sentence #
732590    \n   O  Sentence 7571
732591    \n   O  Sentence 7571
       Token Tag     Sentence #
732602    \n   O  Sentence 7572
732603    \n   O  Sentence 7572
  

744342    \n   O  Sentence 7710
       Token Tag     Sentence #
744407    \n   O  Sentence 7711
744408    \n   O  Sentence 7711
       Token Tag     Sentence #
744432    \n   O  Sentence 7712
744433    \n   O  Sentence 7712
       Token Tag     Sentence #
744455    \n   O  Sentence 7713
744456    \n   O  Sentence 7713
       Token Tag     Sentence #
744556    \n   O  Sentence 7714
744557    \n   O  Sentence 7714
       Token Tag     Sentence #
744575    \n   O  Sentence 7715
744576    \n   O  Sentence 7715
       Token Tag     Sentence #
744592    \n   O  Sentence 7716
744593    \n   O  Sentence 7716
       Token Tag     Sentence #
744801    \n   O  Sentence 7717
744802    \n   O  Sentence 7717
       Token Tag     Sentence #
744818    \n   O  Sentence 7718
744819    \n   O  Sentence 7718
       Token Tag     Sentence #
744844    \n   O  Sentence 7719
744845    \n   O  Sentence 7719
       Token Tag     Sentence #
744905    \n   O  Sentence 7720
744906    \n   O  Sentence 7720
       T

       Token Tag     Sentence #
760063    \n   O  Sentence 7858
760064    \n   O  Sentence 7858
       Token Tag     Sentence #
760380    \n   O  Sentence 7859
760381    \n   O  Sentence 7859
       Token Tag     Sentence #
760602    \n   O  Sentence 7860
760603    \n   O  Sentence 7860
       Token                 Tag     Sentence #
760631    \n  I_Ref. Legislativa  Sentence 7861
760632    \n  I_Ref. Legislativa  Sentence 7861
       Token                 Tag     Sentence #
760639    \n  I_Ref. Legislativa  Sentence 7862
760640    \n                   O  Sentence 7862
       Token Tag     Sentence #
760668    \n   O  Sentence 7863
760669    \n   O  Sentence 7863
       Token Tag     Sentence #
760799    \n   O  Sentence 7864
760800    \n   O  Sentence 7864
       Token Tag     Sentence #
760886    \n   O  Sentence 7865
760887    \n   O  Sentence 7865
       Token Tag     Sentence #
760988    \n   O  Sentence 7866
760989    \n   O  Sentence 7866
       Token Tag     Sentence #
761284  

       Token Tag     Sentence #
781587    \n   O  Sentence 8009
781588    \n   O  Sentence 8009
       Token Tag     Sentence #
781689    \n   O  Sentence 8010
781690    \n   O  Sentence 8010
       Token Tag     Sentence #
781791    \n   O  Sentence 8011
781792    \n   O  Sentence 8011
       Token Tag     Sentence #
781802    \n   O  Sentence 8012
781803    \n   O  Sentence 8012
       Token Tag     Sentence #
781808    \n   O  Sentence 8013
781809    \n   O  Sentence 8013
       Token Tag     Sentence #
781824    \n   O  Sentence 8014
781825    \n   O  Sentence 8014
       Token Tag     Sentence #
781828    \n   O  Sentence 8015
781829    \n   O  Sentence 8015
       Token Tag     Sentence #
781888    \n   O  Sentence 8016
781889    \n   O  Sentence 8016
       Token Tag     Sentence #
781961    \n   O  Sentence 8017
781962    \n   O  Sentence 8017
       Token Tag     Sentence #
782156    \n   O  Sentence 8018
782157    \n   O  Sentence 8018
       Token Tag     Sentence #
782235  

       Token Tag     Sentence #
797827    \n   O  Sentence 8164
797828    \n   O  Sentence 8164
       Token Tag     Sentence #
797985    \n   O  Sentence 8165
797986    \n   O  Sentence 8165
       Token Tag     Sentence #
798063    \n   O  Sentence 8166
798064    \n   O  Sentence 8166
       Token Tag     Sentence #
798248    \n   O  Sentence 8167
798249    \n   O  Sentence 8167
       Token Tag     Sentence #
798359    \n   O  Sentence 8168
798360    \n   O  Sentence 8168
       Token Tag     Sentence #
798382    \n   O  Sentence 8169
798383    \n   O  Sentence 8169
       Token Tag     Sentence #
798512    \n   O  Sentence 8170
798513    \n   O  Sentence 8170
       Token Tag     Sentence #
798669    \n   O  Sentence 8171
798670    \n   O  Sentence 8171
       Token Tag     Sentence #
798687    \n   O  Sentence 8172
798688    \n   O  Sentence 8172
       Token Tag     Sentence #
798703    \n   O  Sentence 8173
798704    \n   O  Sentence 8173
       Token Tag     Sentence #
798966  

812499    \n   O  Sentence 8327
       Token Tag     Sentence #
812527    \n   O  Sentence 8328
812528    \n   O  Sentence 8328
       Token Tag     Sentence #
812577    \n   O  Sentence 8329
812578    \n   O  Sentence 8329
       Token Tag     Sentence #
813099    \n   O  Sentence 8330
813100    \n   O  Sentence 8330
       Token Tag     Sentence #
813158    \n   O  Sentence 8331
813159    \n   O  Sentence 8331
       Token Tag     Sentence #
813313    \n   O  Sentence 8332
813314    \n   O  Sentence 8332
       Token Tag     Sentence #
813456    \n   O  Sentence 8333
813457    \n   O  Sentence 8333
       Token Tag     Sentence #
813476    \n   O  Sentence 8334
813477    \n   O  Sentence 8334
       Token Tag     Sentence #
813489    \n   O  Sentence 8335
813490    \n   O  Sentence 8335
       Token Tag     Sentence #
813516    \n   O  Sentence 8336
813517    \n   O  Sentence 8336
       Token Tag     Sentence #
813541    \n   O  Sentence 8337
813542    \n   O  Sentence 8337
       T

       Token Tag     Sentence #
829077    \n   O  Sentence 8485
829078    \n   O  Sentence 8485
       Token Tag     Sentence #
829203    \n   O  Sentence 8486
829204    \n   O  Sentence 8486
       Token Tag     Sentence #
829278    \n   O  Sentence 8487
829279    \n   O  Sentence 8487
       Token Tag     Sentence #
829497    \n   O  Sentence 8488
829498    \n   O  Sentence 8488
       Token Tag     Sentence #
829575    \n   O  Sentence 8489
829576    \n   O  Sentence 8489
       Token Tag     Sentence #
829800    \n   O  Sentence 8490
829801    \n   O  Sentence 8490
       Token Tag     Sentence #
830044    \n   O  Sentence 8491
830045    \n   O  Sentence 8491
       Token Tag     Sentence #
830176    \n   O  Sentence 8492
830177    \n   O  Sentence 8492
       Token Tag     Sentence #
830312    \n   O  Sentence 8493
830313    \n   O  Sentence 8493
       Token Tag     Sentence #
830368    \n   O  Sentence 8494
830369    \n   O  Sentence 8494
       Token Tag     Sentence #
830402  

       Token Tag     Sentence #
848860    \n   O  Sentence 8641
848861    \n   O  Sentence 8641
       Token Tag     Sentence #
848872    \n   O  Sentence 8642
848873    \n   O  Sentence 8642
       Token Tag     Sentence #
848978    \n   O  Sentence 8643
848979    \n   O  Sentence 8643
       Token Tag     Sentence #
848982    \n   O  Sentence 8644
848983    \n   O  Sentence 8644
       Token Tag     Sentence #
849051    \n   O  Sentence 8645
849052    \n   O  Sentence 8645
       Token Tag     Sentence #
849246    \n   O  Sentence 8646
849247    \n   O  Sentence 8646
       Token Tag     Sentence #
849523    \n   O  Sentence 8647
849524    \n   O  Sentence 8647
       Token Tag     Sentence #
849543    \n   O  Sentence 8648
849544    \n   O  Sentence 8648
       Token Tag     Sentence #
849547    \n   O  Sentence 8649
849548    \n   O  Sentence 8649
       Token Tag     Sentence #
849657    \n   O  Sentence 8650
849658    \n   O  Sentence 8650
       Token Tag     Sentence #
849673  

868993    \n   O  Sentence 8800
       Token Tag     Sentence #
869147    \n   O  Sentence 8801
869148    \n   O  Sentence 8801
       Token Tag     Sentence #
869344    \n   O  Sentence 8802
869345    \n   O  Sentence 8802
       Token Tag     Sentence #
869514    \n   O  Sentence 8803
869515    \n   O  Sentence 8803
       Token Tag     Sentence #
869679    \n   O  Sentence 8804
869680    \n   O  Sentence 8804
       Token Tag     Sentence #
869819    \n   O  Sentence 8805
869820    \n   O  Sentence 8805
       Token Tag     Sentence #
870074    \n   O  Sentence 8806
870075    \n   O  Sentence 8806
       Token Tag     Sentence #
870086    \n   O  Sentence 8807
870087    \n   O  Sentence 8807
       Token Tag     Sentence #
870092    \n   O  Sentence 8808
870093    \n   O  Sentence 8808
       Token Tag     Sentence #
870118    \n   O  Sentence 8809
870119    \n   O  Sentence 8809
       Token Tag     Sentence #
870200    \n   O  Sentence 8810
870201    \n   O  Sentence 8810
       T

885101    \n   O  Sentence 8962
       Token Tag     Sentence #
885196    \n   O  Sentence 8963
885197    \n   O  Sentence 8963
       Token Tag     Sentence #
885280    \n   O  Sentence 8964
885281    \n   O  Sentence 8964
       Token Tag     Sentence #
885360    \n   O  Sentence 8965
885361    \n   O  Sentence 8965
       Token Tag     Sentence #
885400    \n   O  Sentence 8966
885401    \n   O  Sentence 8966
       Token Tag     Sentence #
885416    \n   O  Sentence 8967
885417    \n   O  Sentence 8967
       Token Tag     Sentence #
885521    \n   O  Sentence 8968
885522    \n   O  Sentence 8968
       Token Tag     Sentence #
885610    \n   O  Sentence 8969
885611    \n   O  Sentence 8969
       Token Tag     Sentence #
885820    \n   O  Sentence 8970
885821    \n   O  Sentence 8970
       Token Tag     Sentence #
885966    \n   O  Sentence 8971
885967    \n   O  Sentence 8971
       Token Tag     Sentence #
886083    \n   O  Sentence 8972
886084    \n   O  Sentence 8972
       T

899811    \n   O  Sentence 9112
       Token Tag     Sentence #
899986    \n   O  Sentence 9113
899987    \n   O  Sentence 9113
       Token Tag     Sentence #
900325    \n   O  Sentence 9114
900326    \n   O  Sentence 9114
       Token Tag     Sentence #
900456    \n   O  Sentence 9115
900457    \n   O  Sentence 9115
       Token Tag     Sentence #
900685    \n   O  Sentence 9116
900686    \n   O  Sentence 9116
       Token Tag     Sentence #
900963    \n   O  Sentence 9117
900964    \n   O  Sentence 9117
       Token Tag     Sentence #
901111    \n   O  Sentence 9118
901112    \n   O  Sentence 9118
       Token Tag     Sentence #
901202    \n   O  Sentence 9119
901203    \n   O  Sentence 9119
       Token Tag     Sentence #
901272    \n   O  Sentence 9120
901273    \n   O  Sentence 9120
       Token Tag     Sentence #
901339    \n   O  Sentence 9121
901340    \n   O  Sentence 9121
       Token Tag     Sentence #
901405    \n   O  Sentence 9122
901406    \n   O  Sentence 9122
       T

       Token Tag     Sentence #
917319    \n   O  Sentence 9271
917320    \n   O  Sentence 9271
       Token Tag     Sentence #
917477    \n   O  Sentence 9272
917478    \n   O  Sentence 9272
       Token Tag     Sentence #
917523    \n   O  Sentence 9273
917524    \n   O  Sentence 9273
       Token           Tag     Sentence #
918347    \n  I_Precedente  Sentence 9274
918348    \n  I_Precedente  Sentence 9274
       Token Tag     Sentence #
918369    \n   O  Sentence 9275
918370    \n   O  Sentence 9275
       Token Tag     Sentence #
918461    \n   O  Sentence 9276
918462    \n   O  Sentence 9276
       Token Tag     Sentence #
918583    \n   O  Sentence 9277
918584    \n   O  Sentence 9277
       Token Tag     Sentence #
918671    \n   O  Sentence 9278
918672    \n   O  Sentence 9278
       Token Tag     Sentence #
918779    \n   O  Sentence 9279
918780    \n   O  Sentence 9279
       Token Tag     Sentence #
919062    \n   O  Sentence 9280
919063    \n   O  Sentence 9280
       Tok

       Token Tag     Sentence #
932544    \n   O  Sentence 9428
932545    \n   O  Sentence 9428
       Token Tag     Sentence #
933415    \n   O  Sentence 9429
933416    \n   O  Sentence 9429
       Token Tag     Sentence #
933499    \n   O  Sentence 9430
933500    \n   O  Sentence 9430
       Token Tag     Sentence #
933519    \n   O  Sentence 9431
933520    \n   O  Sentence 9431
       Token Tag     Sentence #
933547    \n   O  Sentence 9432
933548    \n   O  Sentence 9432
       Token Tag     Sentence #
933700    \n   O  Sentence 9433
933701    \n   O  Sentence 9433
       Token Tag     Sentence #
933784    \n   O  Sentence 9434
933785    \n   O  Sentence 9434
       Token Tag     Sentence #
933887    \n   O  Sentence 9435
933888    \n   O  Sentence 9435
       Token Tag     Sentence #
934062    \n   O  Sentence 9436
934063    \n   O  Sentence 9436
       Token Tag     Sentence #
934203    \n   O  Sentence 9437
934204    \n   O  Sentence 9437
       Token Tag     Sentence #
934272  

952252    \n   O  Sentence 9589
       Token Tag     Sentence #
952347    \n   O  Sentence 9590
952348    \n   O  Sentence 9590
       Token Tag     Sentence #
952365    \n   O  Sentence 9591
952366    \n   O  Sentence 9591
       Token Tag     Sentence #
952384    \n   O  Sentence 9592
952385    \n   O  Sentence 9592
       Token Tag     Sentence #
952656    \n   O  Sentence 9593
952657    \n   O  Sentence 9593
       Token Tag     Sentence #
952726    \n   O  Sentence 9594
952727    \n   O  Sentence 9594
       Token Tag     Sentence #
952776    \n   O  Sentence 9595
952777    \n   O  Sentence 9595
       Token Tag     Sentence #
952855    \n   O  Sentence 9596
952856    \n   O  Sentence 9596
       Token Tag     Sentence #
952960    \n   O  Sentence 9597
952961    \n   O  Sentence 9597
       Token Tag     Sentence #
953164    \n   O  Sentence 9598
953165    \n   O  Sentence 9598
       Token Tag     Sentence #
953186    \n   O  Sentence 9599
953187    \n   O  Sentence 9599
       T

In [19]:
# Cria um array com as posições a serem retiradas.
pos = []
for i in range(len(starts)):
    pos.append(starts[i])
    pos.append(starts[i]+1)
pos[:5]

[12, 13, 24, 25, 98]

In [20]:
# Remove as linhas do dataframe e reseta os índices.
combined_csv = combined_csv.drop(pos).reset_index(drop=True)

In [21]:
# Confirma se a remoção foi bem sucedida.
combined_csv.head(15)

Unnamed: 0,Token,Tag,Sentence #
0,Ementa,INICIO_ARQ,Sentence 1
1,,O,Sentence 1
2,e,O,Sentence 1
3,,O,Sentence 1
4,Acórdão,O,Sentence 1
5,,O,Sentence 1
6,06/03/2018,O,Sentence 1
7,,O,Sentence 1
8,PRIMEIRA,O,Sentence 1
9,,O,Sentence 1


# Mover B_  com Token vazio para linha abaixo

In [22]:
# 'begins' identifica as situações onde a Tag começa com 'B_' e o Token é vazio, uma situação onde 
# o anotador começou a marcação de um espaço vazio gerando a inconsistância.

begins = combined_csv[(combined_csv['Token']==' ') & 
                      (combined_csv['Tag'].str.startswith('B_'))].index.values

In [23]:
begins[0], len (begins)

(13097, 306)

In [24]:
%%time
# Rodar apenas uma vez
for i in range(len(begins)):
    combined_csv.Tag.iloc[begins[i]+1] = combined_csv.Tag.iloc[begins[i]] #Acertar a Tag do Token para começar
                                                                          #sem espaço
    combined_csv.Tag.iloc[begins[i]] = 'O' #Marca o espaço vazio como 'O'

In [25]:
i, n = 1, 10
combined_csv.iloc[begins[i]-n+7:begins[i]+n]

Unnamed: 0,Token,Tag,Sentence #
33259,termos,O,Sentence 315
33260,,O,Sentence 315
33261,do,O,Sentence 315
33262,,O,Sentence 315
33263,art,B_Ref. Legislativa,Sentence 315
33264,.,I_Ref. Legislativa,Sentence 315
33265,,I_Ref. Legislativa,Sentence 315
33266,102,I_Ref. Legislativa,Sentence 315
33267,",",I_Ref. Legislativa,Sentence 315
33268,,I_Ref. Legislativa,Sentence 315


# Tratar caracteres especiais

In [26]:
df_teste = combined_csv.copy()

In [105]:
# Identifica os tokens que começam com '§' e são maiores que 1 pois queremos pegar os casos onde
# '§' está associado ao numero do paragrafo.
espec_carac = df_teste[(df_teste.Token.str.startswith('§')) & (df_teste.Token.str.len()>1)]
espec_carac.head()

Unnamed: 0,Token,Tag,Sentence #
5083,§§,I_Ref. Legislativa,Sentence 68
29231,§3º,B_Ref. Legislativa,Sentence 286
33309,§3º,B_Ref. Legislativa,Sentence 315
42190,§1º,I_Ref. Legislativa,Sentence 398
42237,§3º,O,Sentence 398


In [123]:
espec_carac.Token.str.len().unique()

array([2, 3])

In [124]:
i = espec_carac.index
df_teste.iloc[i[0]].Token[0]

'§'

In [125]:
df_sentenc = combined_csv[combined_csv['Sentence #']=='Sentence 68'].reset_index(drop=True)
df_sentenc

Unnamed: 0,Token,Tag,Sentence #
0,9,O,Sentence 68
1,.,O,Sentence 68
2,,O,Sentence 68
3,Como,O,Sentence 68
4,,O,Sentence 68
...,...,...,...
146,da,I_Ref. Legislativa,Sentence 68
147,,I_Ref. Legislativa,Sentence 68
148,Constituição,I_Ref. Legislativa,Sentence 68
149,:,I_Ref. Legislativa,Sentence 68


In [110]:
df_sentenc[df_sentenc.Token=='§§'].index.values

array([137])

In [111]:
df_sentenc.iloc[137-3:137+5]

Unnamed: 0,Token,Tag,Sentence #
134,220,I_Ref. Legislativa,Sentence 68
135,",",I_Ref. Legislativa,Sentence 68
136,,I_Ref. Legislativa,Sentence 68
137,§§,I_Ref. Legislativa,Sentence 68
138,,I_Ref. Legislativa,Sentence 68
139,1º,I_Ref. Legislativa,Sentence 68
140,,I_Ref. Legislativa,Sentence 68
141,e,I_Ref. Legislativa,Sentence 68


In [126]:
# Splitting o caracter '§'
list(df_sentenc.iloc[137].Token)

['§', '§']

In [66]:
df_sentenc.index.values[137]

137

In [132]:
line.Tag.startswith('I_')

True

In [139]:
line = df_sentenc.iloc[137]
splt = list(line.Token)
if line.Tag.startswith('I_') == True:
    linha = pd.DataFrame(line).transpose()
linha

Unnamed: 0,Token,Tag,Sentence #
137,§§,I_Ref. Legislativa,Sentence 68


In [114]:
df_sentenc = df_sentenc.append(linha, ignore_index=False)
df_sentenc = df_sentenc.sort_index().reset_index(drop=True)
df_sentenc.iloc[137-3:137+5]

Unnamed: 0,Token,Tag,Sentence #
134,220,I_Ref. Legislativa,Sentence 68
135,",",I_Ref. Legislativa,Sentence 68
136,,I_Ref. Legislativa,Sentence 68
137,§,I_Ref. Legislativa,Sentence 68
138,§,I_Ref. Legislativa,Sentence 68
139,,I_Ref. Legislativa,Sentence 68
140,1º,I_Ref. Legislativa,Sentence 68
141,,I_Ref. Legislativa,Sentence 68


In [113]:
# # for i in df_sentenc.index.values[137]:
# line = df_sentenc.iloc[137]
# splt = list(line.Token)
# if len(splt)==2:
#     df_sentenc.iloc[137].Token = splt[0]
#     df_sentenc.iloc[137.5].Token = splt[1]
#     df_sentenc.sort_index().reset_index(drop=True)
        
# df_sentenc[137-3:137+5]

Unnamed: 0,Token,Tag,Sentence #
134,220,I_Ref. Legislativa,Sentence 68
135,",",I_Ref. Legislativa,Sentence 68
136,,I_Ref. Legislativa,Sentence 68
137,§,I_Ref. Legislativa,Sentence 68
138,,I_Ref. Legislativa,Sentence 68
139,1º,I_Ref. Legislativa,Sentence 68
140,,I_Ref. Legislativa,Sentence 68
141,e,I_Ref. Legislativa,Sentence 68


In [85]:
pd.DataFrame({"onset": 30.0, "length": 1.3}, index=[2.5])

Unnamed: 0,onset,length
2.5,30.0,1.3


In [99]:
line

Token                          §
Tag           I_Ref. Legislativa
Sentence #           Sentence 68
Name: 137, dtype: object

In [103]:
pd.DataFrame(line).transpose()

Unnamed: 0,137.5
137,


In [86]:
pd.DataFrame(line, index=[137+0.5])

Unnamed: 0,137
137.5,


In [109]:
[i for i in espec_carac.Token.iloc[0]]

['§', '§']

In [129]:
espec_carac.index

Int64Index([  5083,  29231,  33309,  42190,  42237,  42326,  57473,  60988,
             65243,  65505,
            ...
            909631, 915349, 921182, 921834, 921989, 922201, 926525, 926752,
            927449, 929647],
           dtype='int64', length=108)

In [181]:
tkn = combined_csv.iloc[espec_carac.index[0]]
tkn_expand = [i for i in tkn.Token]
tkn['Sentence #']

'Sentence 68'

In [190]:
# Function to insert row in the dataframe 
def Insert_row(row_number, df, row_value): 
    start_upper = 0 # Starting value of upper half 
    end_upper = row_number  # End value of upper half 
    start_lower = row_number # Start value of lower half 
    end_lower = df.shape[0] # End value of lower half

    # Create a list of upper_half index 
    upper_half = [*range(start_upper, end_upper, 1)]
	# Create a list of lower_half index 
    lower_half = [*range(start_lower, end_lower, 1)] 
	# Increment the value of lower half by 1 
    lower_half = [x.__add__(1) for x in lower_half]

	# Combine the two lists 
    index_ = upper_half + lower_half 

	# Update the index of the dataframe 
    df.index = index_ 

	# Insert a row at the end 
    df.loc[row_number] = row_value 
	
	# Sort the index labels 
    df = df.sort_index() 

	# return the dataframe 
    return df 

In [192]:
tkn.Token[0]

'§'

In [197]:
# Let's create a row which we want to insert 
row_number = espec_carac.index[0]#2
row_value = [tkn.Token[0], tkn.Tag, tkn['Sentence #']] 

if row_number > df.index.max()+1: 
	print("Invalid row_number") 
else: 
	
	# Let's call the function and insert the row 
	# at the second position 
	df_carac = Insert_row(row_number, combined_csv, row_value) 

	# Print the updated dataframe 
	print(df_carac.iloc[row_number-3:row_number+3]) 

     Token                 Tag   Sentence #
5080   220  I_Ref. Legislativa  Sentence 68
5081     ,  I_Ref. Legislativa  Sentence 68
5082        I_Ref. Legislativa  Sentence 68
5083     §  I_Ref. Legislativa  Sentence 68
5084    §§  I_Ref. Legislativa  Sentence 68
5085        I_Ref. Legislativa  Sentence 68


In [184]:
# Baseline de como funciona
#        onset    length
# 1      2.215    1.3
# 2     23.107    1.3
# 3     41.815    1.3
# 4     61.606    1.3

# line = DataFrame({"onset": 30.0, "length": 1.3}, index=[2.5])
# df = df.append(line, ignore_index=False)
# df = df.sort_index().reset_index(drop=True)

In [173]:
espec_carac.index[0]+0.5

5083.5

In [182]:
line = pd.DataFrame({'Token': tkn_expand[0],"Tag": tkn.Tag, 'Sentence #': tkn['Sentence #']}, index=[espec_carac.index[0]+0.5])

In [183]:
combined_csv.append(line, ignore_index=False).sort_index().reset_index(drop=True).iloc[espec_carac.index[0]-3:espec_carac.index[0]+3]

Unnamed: 0,Token,Tag,Sentence #
5080,220,I_Ref. Legislativa,Sentence 68
5081,",",I_Ref. Legislativa,Sentence 68
5082,,I_Ref. Legislativa,Sentence 68
5083,§§,I_Ref. Legislativa,Sentence 68
5084,§,I_Ref. Legislativa,Sentence 68
5085,,I_Ref. Legislativa,Sentence 68


In [160]:
l = []
for i in range(len(espec_carac)):
    l.append([i for i in espec_carac.Token.iloc[i]])
df_carac = pd.DataFrame(l).stack(0).dropna().reset_index()
df_carac

Unnamed: 0,level_0,level_1,0
0,0,0,§
1,0,1,§
2,1,0,§
3,1,1,3
4,1,2,º
...,...,...,...
292,106,1,2
293,106,2,º
294,107,0,§
295,107,1,7


In [32]:
i, n = 3, 10
combined_csv.iloc[espec_carac.index.values[i]-(n-7):espec_carac.index.values[i]+n]

Unnamed: 0,Token,Tag,Sentence #
42187,B,I_Ref. Legislativa,Sentence 398
42188,",",I_Ref. Legislativa,Sentence 398
42189,,I_Ref. Legislativa,Sentence 398
42190,§1º,I_Ref. Legislativa,Sentence 398
42191,",",I_Ref. Legislativa,Sentence 398
42192,,I_Ref. Legislativa,Sentence 398
42193,do,I_Ref. Legislativa,Sentence 398
42194,,I_Ref. Legislativa,Sentence 398
42195,Código,I_Ref. Legislativa,Sentence 398
42196,,I_Ref. Legislativa,Sentence 398


In [252]:
def explode(df, lst_cols, fill_value='', preserve_index=False):
    # make sure `lst_cols` is list-alike
    if (lst_cols is not None
        and len(lst_cols) > 0
        and not isinstance(lst_cols, (list, tuple, np.ndarray, pd.Series))):
        lst_cols = [lst_cols]
    # all columns except `lst_cols`
    idx_cols = df.columns.difference(lst_cols)
    # calculate lengths of lists
    lens = df[lst_cols[0]].str.len()
    # preserve original index values    
    idx = np.repeat(df.index.values, lens)
    # create "exploded" DF
    res = (pd.DataFrame({
                col:np.repeat(df[col].values, lens)
                for col in idx_cols},
                index=idx)
             .assign(**{col:np.concatenate(df.loc[lens>0, col].values)
                            for col in lst_cols}))
    # append those rows that have empty lists
    if (lens == 0).any():
        # at least one list in cells is empty
        res = (res.append(df.loc[lens==0, idx_cols], sort=False)
                  .fillna(fill_value))
    # revert the original index order
    res = res.sort_index()
    # reset index if requested
    if not preserve_index:        
        res = res.reset_index(drop=False)
    return res

explode(espec_carac.assign(Token=espec_carac.Token.str.split('')), 'Token').iloc[:10]

Unnamed: 0,index,Sentence #,Tag,Token
0,5083,Sentence 68,I_Ref. Legislativa,
1,5083,Sentence 68,I_Ref. Legislativa,§
2,5083,Sentence 68,I_Ref. Legislativa,§
3,5083,Sentence 68,I_Ref. Legislativa,
4,29231,Sentence 286,B_Ref. Legislativa,
5,29231,Sentence 286,B_Ref. Legislativa,§
6,29231,Sentence 286,B_Ref. Legislativa,3
7,29231,Sentence 286,B_Ref. Legislativa,º
8,29231,Sentence 286,B_Ref. Legislativa,
9,33309,Sentence 315,B_Ref. Legislativa,


# Remove pontuação dos números

In [121]:
# Criando um DataFrame onde todas os tokens possuem pontuação 
df_pont = combined_csv[(combined_csv.Token.str.contains("""[.]"""))]

# Tokens de tamanho maior que 1.
df_pont[[len(df_pont.Token.iloc[i]) > 1 for i in range(len(df_pont.Token))]].head()

Unnamed: 0,Token,Tag,Sentence #
14,22.328,O,Sentence 2
781,22.328,O,Sentence 21
1061,26.05.2015,O,Sentence 31
1124,05.06.2013,O,Sentence 31
1349,"R$5.000,00",O,Sentence 32


In [65]:
# Encontra todas as linhas que tem dígito
k = []
for i in range(len(combined_csv.Token)):
    l = re.findall('[\d]', combined_csv.Token.iloc[i])
    if len(l) != 0:
        k.append(l)
# k

In [118]:
# combined_csv.Token.replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
# combined_csv.Token.replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
combine_teste = combined_csv[combined_csv.Token.str.len()>1].index.values
combine_teste2 = combined_csv
[combine_teste2.Token.str.len()>1].Token.replace(regex=True,inplace=True,to_replace=r'\D',value='')
combine_teste2

array([     0,      4,      6, ..., 948488, 948490, 948493])

In [125]:
for i in combined_csv[combined_csv.Token.str.len()>1].index.values:
    combined_csv.iloc[i].Token.replace(regex=True,inplace=True,to_replace=r'\D',value=r'')

TypeError: replace() takes no keyword arguments

# IMPORTANTE: Esse último passo é necessário para salvar todas as alterações feitas no preprocessamento.

In [185]:
# Salvando o processamento feito nos dados
combined_csv.to_csv("preprocessados.csv",index=False,encoding='utf-8')

# Olhando para a predição de cada X

In [22]:
X_test[0][0]['word.lower()'], y_pred[0][0], y_test[0][0]

NameError: name 'X_test' is not defined

In [None]:
# Cria dataframe com o que o modelo predisse para análise.

result = []
for i in range(len(y_test)):
    df = pd.DataFrame(zip([X_test[i][j]['word.lower()'] for j in range(len(X_test[i]))], y_test[i], y_pred[i]))
#     df = pd.DataFrame(zip(X_test[i], y_test[i], y_pred[i]))
    result.append(df)
    
result = pd.concat(result)
result.reset_index(inplace=True, drop=True)

In [None]:
# combined_csv = pd.concat(frames)
result.to_csv("resultado.csv", index=False, encoding='utf-8')

In [None]:
result.shape

In [None]:
result.columns = ['X_test','y_test', 'y_pred']
result.head()

In [None]:
result[result['y_test'] != result['y_pred']]

In [None]:
print('Proporção de erros:',result[result.y_test != result.y_pred].shape[0] / result.shape[0])

----------------------------------------------------------------------------------------

In [None]:
df_res = pd.read_csv('mock/resultado.csv')
df_combined = pd.read_csv('mock/combined_csv.csv')

In [None]:
df_res.head()

In [None]:
df_res.shape

In [None]:
df_erros = df_res[df_res['y_test'] != df_res['y_pred']]
df_erros.head()

In [None]:
df_erros[(df_erros['X_test']==' ') & (df_erros['y_test'].str.startswith('B_'))]
indices = df_erros[(df_erros['X_test']==' ') & (df_erros['y_test'].str.startswith('B_'))].index.values

In [None]:
n = 10
pos = indices[1]

print('Retorna %d linhas antes e depois do ocorrido na linha %d para tag que começa com B_'%(n,pos)),df_res.iloc[pos-n:pos+n]

In [None]:
df_res

In [None]:
for i in indices:
    df.iloc[pos-n:pos+n]

# DUMP

In [None]:
# combined_csv.applymap(np.isreal)
# ord(combined_csv.Token)#<128
# combined_csv.Token[:10]
ord(combined_csv.Token.iloc[0])

In [None]:
import string
alphabet = string.ascii_letters#+string.punctuation
alphabet 

In [None]:
regexPattern = re.compile(alphabet)

In [None]:
combined_csv.Token = combined_csv.Token.astype('str')

In [None]:
combined_csv.iloc[[3,5]]

In [None]:
regexPattern.findall(combined_csv.Token.iloc[i])

In [None]:
# combined_csv.Token[combined_csv.Token.str.contains(alphabet) == False]

# regexPattern.findall(combined_csv.Token)

l = []
for i in range(len(combined_csv.Token)):
#     l.append(regexPattern.findall(combined_csv.Token.iloc[i]).index)
    indx = regexPattern.findall(combined_csv.Token.iloc[i]).index.values
    l.append(indx)
l

In [None]:
alphabet = string.ascii_letters
mainStr = combined_csv.Token.iloc[0]

# Create a regex pattern to match character 's'
regexPattern = re.compile(alphabet)
 
# Iterate over all the matches of regex pattern
iteratorOfMatchObs = regexPattern.finditer(mainStr)
indexPositions = []
count = 0
for matchObj in iteratorOfMatchObs:
    indexPositions.append(matchObj.start())
    count = count + 1
 
# print("Occurrence Count of character 's' : ", count)
print("Index Positions of 's' are : ", indexPositions)


In [None]:
l = []
for i in range(len(combined_csv)):
    iteratorOfMatchObs = regexPattern.finditer(combined_csv.Token.iloc[i])
    indexPositions = []
    count = 0
    for matchObj in iteratorOfMatchObs:
        indexPositions.append(matchObj.start())
        count = count + 1
    l.append(indexPositions)

In [None]:
l.count([])

In [None]:
regexPattern.findall(combined_csv.Token.iloc[0]).index

In [None]:
# combined_csv[combined_csv.Token.str.contains("""[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~123456789]""")]

# combined_csv[(combined_csv.Token.str.contains("""[./]"""))& (combined_csv.Token.str.len() > 1)]

In [17]:
# No meio do preprocessamento foi identificado tokens que terminam com 'adv' e estão colados ao nome do
# advogado referente ao caso.
# Tokens que terminan com 'adv'.

adv = combined_csv[combined_csv.Token.str.endswith('ADV')]#.reset_index(drop=True)
adv.head()#, adv.shape

Unnamed: 0,Token,Tag,Sentence #
46,ADV,O,Sentence 3
198,ADV,O,Sentence 9
234,ADV,O,Sentence 9
813,ADV,O,Sentence 22
965,ADV,O,Sentence 28


In [18]:
# Desconfiança que o nome do advogado está agregado à palavra 'adv'

adv[adv.Token.str.len() >3]
adv_pos = adv[adv.Token.str.len() >3].index.values

k = 2 # Vê as linhas antes e depois do k-ésimo ocorrido .
combined_csv.iloc[adv_pos[k]-3:adv_pos[k]+3] # Olhando para as linhas anteriores e posteriores o acontecimento.
# combined_csv.iloc[adv_pos[0]].Token[-3:] #Separando a parte 'adv'

Unnamed: 0,Token,Tag,Sentence #
19453,,O,Sentence 203
19454,SOFIA,B_Pessoa,Sentence 203
19455,,I_Pessoa,Sentence 203
19456,CERQUEIRAADV,O,Sentence 203
19457,.,O,Sentence 203
19458,(,O,Sentence 203


In [19]:
print("Quantidade de vezes que esse caso ocorre em todos os arquivos:",len(adv_pos))

Quantidade de vezes que esse caso ocorre em todos os arquivos: 70
