In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

# Preprocessing

In [80]:
df = pd.read_csv('mock/161704902/treino_1/Documentos/564.ner.csv', 
                 delimiter=';', na_values='NaN')

In [81]:
# df = df.query("Token not in ['', ' ', '\\n']")
# df = df.query("Token not in ['', ' ']")

In [82]:
df.dropna(how='any', inplace=True)

In [83]:
df.head()
# df

Unnamed: 0,Token,Tag
0,EMENTA,O
1,:,O
2,,O
3,EXECUÇÃO,O
4,,O


In [84]:
df.isnull().sum()

Token    0
Tag      0
dtype: int64

In [85]:
df['Tag'].unique()

array(['O', 'B_Precedente', 'I_Precedente', 'B_Ref. Legislativa',
       'I_Ref. Legislativa', 'B_Pessoa', 'I_Pessoa', 'B_Doutrinador',
       'I_Doutrinador'], dtype=object)

In [86]:
len(df[df['Tag']!='O']) , len(df.query("Tag not in 'O'"))

(1072, 1072)

In [87]:
# df.query("Tag.str.contains('B_')", engine='python')
df.query("Tag not in 'O'")

Unnamed: 0,Token,Tag
225,HHCC,B_Precedente
226,,I_Precedente
227,123.382,I_Precedente
228,,I_Precedente
229,e,I_Precedente
230,,I_Precedente
231,123.425,I_Precedente
232,",",I_Precedente
233,,I_Precedente
234,Relatores,I_Precedente


In [88]:
df.groupby('Tag').size().reset_index(name='Count')

Unnamed: 0,Tag,Count
0,B_Doutrinador,2
1,B_Pessoa,19
2,B_Precedente,20
3,B_Ref. Legislativa,38
4,I_Doutrinador,100
5,I_Pessoa,121
6,I_Precedente,370
7,I_Ref. Legislativa,402
8,O,9348


In [89]:
print('Porção de tags que não são "O": ',len(df[df['Tag']!='O'])/len(df) * 100,'%')

Porção de tags que não são "O":  10.287907869481765 %


# POS

In [77]:
import nltk
nltk.download('all')
from nltk.tokenize import word_tokenize

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/biocreative_ppi.zip.
[nltk_data]    | Downloading package brown to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/brown.zip.
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/brown_tei.zip.
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/cess_cat.zip.
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     /home/kaline/n

[nltk_data]    |   Unzipping corpora/shakespeare.zip.
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/sinica_treebank.zip.
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/smultron.zip.
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/state_union.zip.
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/stopwords.zip.
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/subjectivity.zip.
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     /home/kaline/nltk_data...
[nltk_data]    |   Unzipping corpora/swadesh.zip.
[nltk_data]    | Downloading package

0            [EMENTA]
1                 [:]
2                  []
3          [EXECUÇÃO]
4                  []
5             [PENAL]
6                 [.]
7                  []
8            [HABEAS]
9                  []
10           [CORPUS]
11                [.]
12                 []
13           [LESÕES]
14                 []
15        [CORPORAIS]
16                 []
17         [CULPOSAS]
18                 []
19                [-]
20                 []
21              [ART]
22                [.]
23                 []
24              [210]
25                [,]
26                 []
27               [DO]
28                 []
29           [CÓDIGO]
             ...     
10390       [Gustavo]
10391              []
10392         [Gonet]
10393              []
10394              []
10395              []
10396        [Branco]
10397             [.]
10398              []
10399              []
10400              []
10401        [Carmen]
10402              []
10403        [Lilian]
10404     

In [90]:
df['POS'] = df['Token'].apply(word_tokenize)
# df['POS'].unique()

In [92]:
df['POS']

0            [EMENTA]
1                 [:]
2                  []
3          [EXECUÇÃO]
4                  []
5             [PENAL]
6                 [.]
7                  []
8            [HABEAS]
9                  []
10           [CORPUS]
11                [.]
12                 []
13           [LESÕES]
14                 []
15        [CORPORAIS]
16                 []
17         [CULPOSAS]
18                 []
19                [-]
20                 []
21              [ART]
22                [.]
23                 []
24              [210]
25                [,]
26                 []
27               [DO]
28                 []
29           [CÓDIGO]
             ...     
10390       [Gustavo]
10391              []
10392         [Gonet]
10393              []
10394              []
10395              []
10396        [Branco]
10397             [.]
10398              []
10399              []
10400              []
10401        [Carmen]
10402              []
10403        [Lilian]
10404     

In [40]:
X = df.drop('Tag', axis=1) # Define o conjunto X
v = DictVectorizer(sparse=False) # Função que transforma listas de features em vetores
X = v.fit_transform(X.to_dict('records')) #Aplica a função de vetorização no conjunto 
                                          #X que foi colocado no formato 'records' (informa o que preenche cada coluna 
                                          # da linha i)
y = df.Tag.values # Define o conjunto y

classes = np.unique(y) # Define quais serão as classes baseado nos valores únicos da coluna y
classes = classes.tolist() # Tranforma as classes de array para lista

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=0) # Divide o conjunto em treino
                                                                                            #e teste
X_train.shape, y_train.shape # Formato dos dados

((6981, 1263), (6981,))

In [13]:
classes

['B_Doutrinador',
 'B_Pessoa',
 'B_Precedente',
 'B_Ref. Legislativa',
 'I_Doutrinador',
 'I_Pessoa',
 'I_Precedente',
 'I_Ref. Legislativa',
 'O']

# Machine Learning

## Out of core Algorithms

We will try some of the out-of-core algorithms that are designed to process data that is too large to fit into a single computer memory that support partial_fit method.

In [14]:
# Perceptron 

per = Perceptron(verbose=10) # ou per = Perceptron(verbose=10, n_jobs=-1, max_iter=5)
per.partial_fit(X_train, y_train, classes)

-- Epoch 1
Norm: 1.73, NNZs: 3, Bias: -1.000000, T: 6981, Avg. loss: 0.000430
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 4.58, NNZs: 18, Bias: -1.000000, T: 6981, Avg. loss: 0.002292
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 4.58, NNZs: 15, Bias: -1.000000, T: 6981, Avg. loss: 0.000859
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 5.00, NNZs: 14, Bias: -1.000000, T: 6981, Avg. loss: 0.002292
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 5.10, NNZs: 23, Bias: -2.000000, T: 6981, Avg. loss: 0.015041
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 8.83, NNZs: 50, Bias: -2.000000, T: 6981, Avg. loss: 0.016473
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 13.96, NNZs: 105, Bias: -3.000000, T: 6981, Avg. loss: 0.049993
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 13.08, NNZs: 91, Bias: -1.000000, T: 6981, Avg. loss: 0.061453
Total training time: 0.01 seconds.
-- Epoch 1
Norm: 22.25, NNZs: 304, Bias: 1.000000, T: 6981, Avg. loss: 0.139665
Total 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s finished


Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
           fit_intercept=True, max_iter=1000, n_iter_no_change=5, n_jobs=None,
           penalty=None, random_state=0, shuffle=True, tol=0.001,
           validation_fraction=0.1, verbose=10, warm_start=False)

In [15]:
# Removendo a tag 'O'
new_classes = classes.copy()
new_classes.pop()
new_classes

['B_Doutrinador',
 'B_Pessoa',
 'B_Precedente',
 'B_Ref. Legislativa',
 'I_Doutrinador',
 'I_Pessoa',
 'I_Precedente',
 'I_Ref. Legislativa']

In [16]:
print(classification_report(y_pred=per.predict(X_test), y_true=y_test, labels=new_classes))

                    precision    recall  f1-score   support

     B_Doutrinador       0.00      0.00      0.00         0
          B_Pessoa       0.33      0.17      0.22         6
      B_Precedente       0.40      1.00      0.57         2
B_Ref. Legislativa       0.69      0.69      0.69        16
     I_Doutrinador       0.00      0.00      0.00        41
          I_Pessoa       0.00      0.00      0.00        43
      I_Precedente       0.93      0.10      0.18       130
I_Ref. Legislativa       0.62      0.20      0.30       123

         micro avg       0.56      0.14      0.23       361
         macro avg       0.37      0.27      0.24       361
      weighted avg       0.58      0.14      0.20       361



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [17]:
# Classificador Linear com Stochastic Gradient Descent

sgd = SGDClassifier()
sgd.partial_fit(X_train, y_train, classes)

print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

                    precision    recall  f1-score   support

     B_Doutrinador       0.00      0.00      0.00         0
          B_Pessoa       0.00      0.00      0.00         6
      B_Precedente       1.00      1.00      1.00         2
B_Ref. Legislativa       0.69      0.69      0.69        16
     I_Doutrinador       0.05      0.12      0.07        41
          I_Pessoa       0.20      0.02      0.04        43
      I_Precedente       0.95      0.15      0.26       130
I_Ref. Legislativa       0.65      0.11      0.18       123

         micro avg       0.33      0.14      0.20       361
         macro avg       0.44      0.26      0.28       361
      weighted avg       0.63      0.14      0.21       361



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [18]:
# Naive Bayes classifier for multinomial models

nb = MultinomialNB(alpha=0.01)
nb.partial_fit(X_train, y_train, classes)

print(classification_report(y_pred=nb.predict(X_test), y_true=y_test, labels = new_classes))

                    precision    recall  f1-score   support

     B_Doutrinador       0.00      0.00      0.00         0
          B_Pessoa       0.33      0.17      0.22         6
      B_Precedente       0.67      1.00      0.80         2
B_Ref. Legislativa       0.69      0.69      0.69        16
     I_Doutrinador       1.00      0.02      0.05        41
          I_Pessoa       0.33      0.05      0.08        43
      I_Precedente       0.83      0.22      0.35       130
I_Ref. Legislativa       0.74      0.19      0.30       123

         micro avg       0.73      0.19      0.30       361
         macro avg       0.57      0.29      0.31       361
      weighted avg       0.74      0.19      0.28       361



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


## Conditional Random Fields (CRFs) (necessita de POS)

CRFs is often used for labeling or parsing of sequential data, such as natural language processing and CRFs find applications in POS Tagging, named entity recognition, among others.

In [19]:
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

In [20]:
# Obter sentenças 

class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s['Token'].values.tolist(), s['Tag'].values.tolist())]
#         self.grouped = self.data.groupby('Token').apply(agg_func)
        self.sentences = self.data.groupby('Token').apply(agg_func)
        
    def get_next(self):
        try: 
            s = self.grouped['Sentence: {}'.format(self.n_sent)]
            self.n_sent += 1
            return s 
        except:
            return None

In [21]:
getter = SentenceGetter(df)

sentences = getter.sentences

# Feature extraction

def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]
    
    features = {
        'bias': 1.0, 
        'word.lower()': word.lower(), 
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True
    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, label in sent]

def sent2tokens(sent):
    return [token for token, label in sent]

In [59]:
def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]

    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features


def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    return [token for token, postag, label in sent]

In [60]:
X_crf = [sent2features(s) for s in X_train]
y_crf = [sent2labels(s) for s in y_train]

IndexError: invalid index to scalar variable.

In [24]:
X_crf[3:5]

[[{'bias': 1.0,
   'word.lower()': '"art',
   'word[-3:]': 'Art',
   'word[-2:]': 'rt',
   'word.isupper()': False,
   'word.istitle()': True,
   'word.isdigit()': False,
   'postag': 'O',
   'postag[:2]': 'O',
   'BOS': True,
   'EOS': True}],
 [{'bias': 1.0,
   'word.lower()': '"e',
   'word[-3:]': '"e',
   'word[-2:]': '"e',
   'word.isupper()': False,
   'word.istitle()': False,
   'word.isdigit()': False,
   'postag': 'O',
   'postag[:2]': 'O',
   'BOS': True,
   'EOS': True}]]

In [35]:
X_train_crf, X_test_crf, y_train_crf, y_test_crf = train_test_split(X_crf, y_crf, test_size=0.33, random_state=0)

In [36]:
# Obtendo as tags que aparecem
l=[]
for i in range(len(X_train_crf)):
    k = X_train_crf[i][0]['postag']
    l.append(k)

In [37]:
from collections import Counter

Counter(l)

Counter({'O': 747,
         'I_Ref. Legislativa': 28,
         'I_Precedente': 36,
         'I_Doutrinador': 15,
         'B_Ref. Legislativa': 3,
         'I_Pessoa': 12,
         'B_Doutrinador': 2,
         'B_Precedente': 2,
         'B_Pessoa': 1})

In [38]:
len(l), 747+28+36+15+3+12+2+2+1

(846, 846)

In [50]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    all_possible_transitions=True)
crf.fit(X_train_crf, y_train_crf)

y_pred_crf = crf.predict(X_test_crf)
metrics.flat_f1_score(y_test_crf, y_pred_crf, average='weighted', labels=new_classes)

0.9985324564608452

In [57]:
X_train.shape, y_train.shape

((6981, 1263), (6981,))

In [48]:
y_train_crf[0]

['O']

In [42]:
crf = sklearn_crfsuite.CRF(all_possible_transitions=True)
crf.fit(X_train, y_train)

y_pred = crf.predict(X_test)
metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=new_classes)

ValueError: The numbers of items and labels differ: |x| = 1263, |y| = 1

In [48]:
print(metrics.flat_classification_report(y_test, y_pred, labels = new_classes))

                    precision    recall  f1-score   support

     B_Doutrinador       0.00      0.00      0.00         0
          B_Pessoa       0.91      1.00      0.95        10
      B_Precedente       1.00      0.92      0.96        12
B_Ref. Legislativa       1.00      1.00      1.00         3
     I_Doutrinador       1.00      1.00      1.00        65
          I_Pessoa       1.00      1.00      1.00        82
      I_Precedente       1.00      1.00      1.00       234
I_Ref. Legislativa       1.00      1.00      1.00       274

         micro avg       1.00      1.00      1.00       680
         macro avg       0.86      0.86      0.86       680
      weighted avg       1.00      1.00      1.00       680



In [49]:
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV
# from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    max_iterations=100,
    all_possible_transitions=True
)
params_space = {
    'c1': scipy.stats.expon(scale=0.5),
    'c2': scipy.stats.expon(scale=0.05),
}

# use the same metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=new_classes)

# search
rs = RandomizedSearchCV(crf, params_space,
                        cv=3,
                        scoring=f1_scorer)
rs.fit(X_train, y_train)

RandomizedSearchCV(cv=3, error_score='raise-deprecating',
                   estimator=CRF(algorithm='lbfgs', all_possible_states=None,
                                 all_possible_transitions=True, averaging=None,
                                 c=None, c1=None, c2=None,
                                 calibration_candidates=None,
                                 calibration_eta=None,
                                 calibration_max_trials=None,
                                 calibration_rate=None,
                                 calibration_samples=None, delta=None,
                                 epsilon=None, error_sensitive=None,...
                   param_distributions={'c1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fdeb40c5128>,
                                        'c2': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fdeb406bf60>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_sco

In [50]:
print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))

best params: {'c1': 0.11648671857266922, 'c2': 0.0027927110875775084}
best CV score: 0.9956709956709957
model size: 0.01M


In [51]:
crf = rs.best_estimator_
y_pred = crf.predict(X_test)
print(metrics.flat_classification_report(y_test, y_pred, labels=new_classes))

                    precision    recall  f1-score   support

     B_Doutrinador       0.00      0.00      0.00         0
          B_Pessoa       1.00      1.00      1.00        10
      B_Precedente       1.00      1.00      1.00        12
B_Ref. Legislativa       1.00      1.00      1.00         3
     I_Doutrinador       1.00      1.00      1.00        65
          I_Pessoa       1.00      1.00      1.00        82
      I_Precedente       1.00      1.00      1.00       234
I_Ref. Legislativa       1.00      1.00      1.00       274

         micro avg       1.00      1.00      1.00       680
         macro avg       0.88      0.88      0.88       680
      weighted avg       1.00      1.00      1.00       680



In [52]:
crf.transition_features_

{('O', 'O'): 0.166016,
 ('I_Precedente', 'I_Precedente'): 0.408497,
 ('B_Ref. Legislativa', 'B_Ref. Legislativa'): 0.746697,
 ('B_Precedente', 'B_Precedente'): 1.049987}

In [56]:
# Transições mais prováveis

from collections import Counter

def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%s -> %s %s" % (label_from, label_to, weight))

print("Top likely transitions:")
print_transitions(Counter(crf.transition_features_).most_common(20))

print("\nTop unlikely transitions:")
print_transitions(Counter(crf.transition_features_).most_common()[-20:])

Top likely transitions:
B_Precedente -> B_Precedente 1.049987
B_Ref. Legislativa -> B_Ref. Legislativa 0.746697
I_Precedente -> I_Precedente 0.408497
O -> O 0.166016

Top unlikely transitions:
B_Precedente -> B_Precedente 1.049987
B_Ref. Legislativa -> B_Ref. Legislativa 0.746697
I_Precedente -> I_Precedente 0.408497
O -> O 0.166016
